# Initiliation

In [1]:
!pip install transformers datasets --quiet

In [2]:
from transformers import TrainingArguments
from transformers import Trainer
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, classification_report, confusion_matrix
from datasets import Dataset
from datasets import load_metric

import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from google.colab import drive

# Data Preparation

In [3]:
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [4]:
%cd 'drive/MyDrive/Masterarbeit/Colab Notebooks/OVERVIEW MBTI/Datasets/URL_Balanced_MASK'

/content/drive/.shortcut-targets-by-id/1aHXlqhpj1STohhfU4gn53D4whaLH__Jz/Masterarbeit/Colab Notebooks/OVERVIEW MBTI/Datasets/URL_Balanced_MASK


In [5]:
dfPJ = pd.read_csv('MBTI_PJ_URL_MASK.csv', sep=",", error_bad_lines=False)
dfPJ

Unnamed: 0,text,label
0,i like that you are kind as [MASK] i find that...,0
1,oh my you are right who really talks like tha...,1
2,yep yep yep especially the last one yep agree ...,0
3,things that are generalizable to the entire po...,1
4,work student hobbies studying gaming reading d...,1
...,...,...
6935,well mostly i dont like avocado but the primar...,1
6936,during an argument rather than trying to valid...,1
6937,cigarettes are like hamsters perfectly harmles...,1
6938,bookshelf porn as a nonamerican please excuse ...,1


# Model Training

In [6]:
modeltype = "princeton-nlp/sup-simcse-bert-base-uncased"

In [7]:
train, test = train_test_split(dfPJ, test_size=0.2, random_state=0, stratify=dfPJ.label)

train = Dataset.from_pandas(train)
test = Dataset.from_pandas(test)

tokenizer = AutoTokenizer.from_pretrained(modeltype)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_train = train.map(tokenize_function, batched=True)
tokenized_test = test.map(tokenize_function, batched=True)

full_train_dataset = tokenized_train
full_eval_dataset = tokenized_test

model = AutoModelForSequenceClassification.from_pretrained(modeltype, num_labels=2)

training_args = TrainingArguments(
    "SIMCSE_BERT_PJ_MASK", 
    evaluation_strategy="epoch",
    save_strategy = 'no',
    save_steps = 100000,
    save_total_limit = 1,
    metric_for_best_model="eval_f1")

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    print(classification_report(labels, preds, labels=[0,1]))
    print(confusion_matrix(labels,preds))
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Hyperparameter Optimization

In [8]:
! pip install optuna --quiet

In [9]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(modeltype, num_labels=2)

In [10]:
trainer = Trainer(
      model_init=model_init,
      args=training_args, 
      train_dataset=full_train_dataset, 
      eval_dataset=full_eval_dataset,
      compute_metrics=compute_metrics 
  )

loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
 

In [11]:
import sklearn.metrics as metrics
import optuna
import sys
import logging

def objective (metrics):
  return metrics['eval_f1']

def hyperparameter_space(trial):

    return {
        "learning_rate": trial.suggest_float("learning_rate", 5e-8, 5e-1, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [2, 4, 8, 16]),
        "weight_decay": trial.suggest_float("weight_decay", 5e-12, 5e-1, log=True),
        "num_train_epochs": trial.suggest_float("num_train_epochs",1,8,log=True),
        #"adam_epsilon": trial.suggest_float("adam_epsilon", 1e-10, 1e-6, log=True),
        #"seed" : trial.suggest_float("seed",10,60,log=True)
        }

optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = "SIMCSE_BERT_MASK_PJ"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)

best_run = trainer.hyperparameter_search(hp_space=hyperparameter_space,compute_objective=objective, n_trials=50, direction="maximize",study_name=study_name, storage=storage_name )

study = optuna.create_study()

[32m[I 2021-12-08 20:52:02,471][0m A new study created in RDB with name: SIMCSE_BERT_MASK_PJ[0m


A new study created in RDB with name: SIMCSE_BERT_MASK_PJ


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7323,0.690793,0.602305,0.375899,0.301153,0.5
1,0.7036,0.673406,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 21:00:02,774][0m Trial 0 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.0013340513967056542, 'per_device_train_batch_size': 4, 'weight_decay': 8.452287127861227e-08, 'num_train_epochs': 1.1124699291956284}. Best is trial 0 with value: 0.37589928057

Trial 0 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.0013340513967056542, 'per_device_train_batch_size': 4, 'weight_decay': 8.452287127861227e-08, 'num_train_epochs': 1.1124699291956284}. Best is trial 0 with value: 0.37589928057553956.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,2.645,4.549226,0.602305,0.375899,0.301153,0.5
2,1.6336,0.756413,0.602305,0.375899,0.301153,0.5
2,1.1,0.673517,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 21:20:11,938][0m Trial 1 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.017069145282558405, 'per_device_train_batch_size': 4, 'weight_decay': 8.356806173792081e-11, 'num_train_epochs': 2.9576534376194226}. Best is trial 0 with value: 0.375899280575

Trial 1 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.017069145282558405, 'per_device_train_batch_size': 4, 'weight_decay': 8.356806173792081e-11, 'num_train_epochs': 2.9576534376194226}. Best is trial 0 with value: 0.37589928057553956.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.528,0.895216,0.602305,0.375899,0.301153,0.5
1,0.8539,0.674516,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 21:35:34,374][0m Trial 2 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.004566844850136601, 'per_device_train_batch_size': 2, 'weight_decay': 2.341363222267331e-05, 'num_train_epochs': 1.8520053633946332}. Best is trial 0 with value: 0.375899280575

Trial 2 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.004566844850136601, 'per_device_train_batch_size': 2, 'weight_decay': 2.341363222267331e-05, 'num_train_epochs': 1.8520053633946332}. Best is trial 0 with value: 0.37589928057553956.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,30.024,17.374868,0.602305,0.375899,0.301153,0.5
2,23.5613,7.048118,0.602305,0.375899,0.301153,0.5
3,22.2384,7.012425,0.602305,0.375899,0.301153,0.5
4,14.8857,23.71035,0.602305,0.375899,0.301153,0.5
5,10.777,6.011094,0.602305,0.375899,0.301153,0.5
6,6.2132,8.994378,0.397695,0.284536,0.198847,0.5
6,4.3338,0.672108,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       836
           1       0.40      1.00      0.57       552

    accuracy                           0.40      1388
   macro avg       0.20      0.50      0.28      1388
weighted avg       0.16      0.40      0.23      1388

[[  0 836]
 [  0 552]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 22:18:49,251][0m Trial 3 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.20989641248555158, 'per_device_train_batch_size': 4, 'weight_decay': 0.0011357837870954568, 'num_train_epochs': 6.3100394441025225}. Best is trial 0 with value: 0.3758992805755

Trial 3 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.20989641248555158, 'per_device_train_batch_size': 4, 'weight_decay': 0.0011357837870954568, 'num_train_epochs': 6.3100394441025225}. Best is trial 0 with value: 0.37589928057553956.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,6.7101,10.585212,0.602305,0.375899,0.301153,0.5
2,4.9156,1.562549,0.602305,0.375899,0.301153,0.5
2,2.5146,1.047325,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 22:38:12,632][0m Trial 4 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.04972884184245069, 'per_device_train_batch_size': 4, 'weight_decay': 1.7312865785864872e-05, 'num_train_epochs': 2.8128531339433014}. Best is trial 0 with value: 0.375899280575

Trial 4 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.04972884184245069, 'per_device_train_batch_size': 4, 'weight_decay': 1.7312865785864872e-05, 'num_train_epochs': 2.8128531339433014}. Best is trial 0 with value: 0.37589928057553956.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6754,0.653244,0.613833,0.484118,0.588668,0.533571
2,0.641,0.668044,0.626081,0.560834,0.600316,0.571125
3,0.4806,0.746841,0.605187,0.575336,0.580398,0.575012
3,0.4806,0.749112,0.615994,0.57987,0.590061,0.580291


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.93      0.74       836
           1       0.56      0.14      0.23       552

    accuracy                           0.61      1388
   macro avg       0.59      0.53      0.48      1388
weighted avg       0.60      0.61      0.54      1388

[[774  62]
 [474  78]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.84      0.73       836
           1       0.55      0.30      0.39       552

    accuracy                           0.63      1388
   macro avg       0.60      0.57      0.56      1388
weighted avg       0.61      0.63      0.60      1388

[[702 134]
 [385 167]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.72      0.69       836
           1       0.50      0.43      0.46       552

    accuracy                           0.61      1388
   macro avg       0.58      0.58      0.58      1388
weighted avg       0.60      0.61      0.60      1388

[[604 232]
 [316 236]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.75      0.70       836
           1       0.52      0.41      0.46       552

    accuracy                           0.62      1388
   macro avg       0.59      0.58      0.58      1388
weighted avg       0.60      0.62      0.61      1388

[[631 205]
 [328 224]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 22:57:25,274][0m Trial 5 finished with value: 0.5798704635994764 and parameters: {'learning_rate': 1.5117200966434623e-05, 'per_device_train_batch_size': 8, 'weight_decay': 1.9619192649829614e-11, 'num_train_epochs': 3.0809716620403114}. Best is trial 5 with value: 0.5798704635994764.[0m


Trial 5 finished with value: 0.5798704635994764 and parameters: {'learning_rate': 1.5117200966434623e-05, 'per_device_train_batch_size': 8, 'weight_decay': 1.9619192649829614e-11, 'num_train_epochs': 3.0809716620403114}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,6.165125,0.397695,0.284536,0.198847,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       836
           1       0.40      1.00      0.57       552

    accuracy                           0.40      1388
   macro avg       0.20      0.50      0.28      1388
weighted avg       0.16      0.40      0.23      1388

[[  0 836]
 [  0 552]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 23:03:06,911][0m Trial 6 pruned. [0m


Trial 6 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7233,0.774218,0.602305,0.375899,0.301153,0.5
2,0.7142,0.692011,0.602305,0.375899,0.301153,0.5
3,0.7135,0.720186,0.397695,0.284536,0.198847,0.5
4,0.6855,0.679505,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       836
           1       0.40      1.00      0.57       552

    accuracy                           0.40      1388
   macro avg       0.20      0.50      0.28      1388
weighted avg       0.16      0.40      0.23      1388

[[  0 836]
 [  0 552]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 23:30:22,921][0m Trial 7 pruned. [0m


Trial 7 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.1302,1.753,0.602305,0.375899,0.301153,0.5
2,0.7981,0.673162,0.602305,0.375899,0.301153,0.5
2,0.7303,0.672196,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 23:51:55,221][0m Trial 8 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.0015499119355592576, 'per_device_train_batch_size': 2, 'weight_decay': 3.888698794536835e-07, 'num_train_epochs': 2.562983752634922}. Best is trial 5 with value: 0.579870463599

Trial 8 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 0.0015499119355592576, 'per_device_train_batch_size': 2, 'weight_decay': 3.888698794536835e-07, 'num_train_epochs': 2.562983752634922}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.674141,0.602305,0.384236,0.551524,0.501539
1,0.678000,0.672271,0.602305,0.377593,0.551227,0.500308


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.99      0.75       836
           1       0.50      0.01      0.02       552

    accuracy                           0.60      1388
   macro avg       0.55      0.50      0.38      1388
weighted avg       0.56      0.60      0.46      1388

[[831   5]
 [547   5]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.50      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.55      0.50      0.38      1388
weighted avg       0.56      0.60      0.45      1388

[[835   1]
 [551   1]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 00:03:10,763][0m Trial 9 finished with value: 0.37759263290950035 and parameters: {'learning_rate': 7.913411787749208e-08, 'per_device_train_batch_size': 16, 'weight_decay': 5.0047564798381386e-08, 'num_train_epochs': 1.9880868824533793}. Best is trial 5 with value: 0.5798704635994764.[0m


Trial 9 finished with value: 0.37759263290950035 and parameters: {'learning_rate': 7.913411787749208e-08, 'per_device_train_batch_size': 16, 'weight_decay': 5.0047564798381386e-08, 'num_train_epochs': 1.9880868824533793}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6723,0.649433,0.62464,0.518781,0.607353,0.551773
2,0.6467,0.645933,0.636167,0.596134,0.613119,0.59673
3,0.541,0.69672,0.605187,0.583024,0.584507,0.582397
4,0.4566,0.890612,0.618156,0.560163,0.589009,0.567623
5,0.377,1.110419,0.584294,0.580784,0.586546,0.590285
6,0.2165,1.402476,0.582853,0.565809,0.565658,0.566011
7,0.1774,1.719125,0.590778,0.563997,0.566647,0.563666
7,0.1613,1.803037,0.585735,0.566961,0.567074,0.566864


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.91      0.74       836
           1       0.58      0.20      0.29       552

    accuracy                           0.62      1388
   macro avg       0.61      0.55      0.52      1388
weighted avg       0.61      0.62      0.56      1388

[[759  77]
 [444 108]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.79      0.72       836
           1       0.56      0.40      0.47       552

    accuracy                           0.64      1388
   macro avg       0.61      0.60      0.60      1388
weighted avg       0.62      0.64      0.62      1388

[[660 176]
 [329 223]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.69      0.68       836
           1       0.50      0.47      0.49       552

    accuracy                           0.61      1388
   macro avg       0.58      0.58      0.58      1388
weighted avg       0.60      0.61      0.60      1388

[[580 256]
 [292 260]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.81      0.72       836
           1       0.53      0.32      0.40       552

    accuracy                           0.62      1388
   macro avg       0.59      0.57      0.56      1388
weighted avg       0.60      0.62      0.59      1388

[[681 155]
 [375 177]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.69      0.56      0.62       836
           1       0.48      0.62      0.54       552

    accuracy                           0.58      1388
   macro avg       0.59      0.59      0.58      1388
weighted avg       0.61      0.58      0.59      1388

[[469 367]
 [210 342]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.65      0.65       836
           1       0.48      0.48      0.48       552

    accuracy                           0.58      1388
   macro avg       0.57      0.57      0.57      1388
weighted avg       0.58      0.58      0.58      1388

[[542 294]
 [285 267]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.70      0.67       836
           1       0.48      0.43      0.46       552

    accuracy                           0.59      1388
   macro avg       0.57      0.56      0.56      1388
weighted avg       0.58      0.59      0.59      1388

[[582 254]
 [314 238]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.66      0.66       836
           1       0.48      0.47      0.48       552

    accuracy                           0.59      1388
   macro avg       0.57      0.57      0.57      1388
weighted avg       0.59      0.59      0.59      1388

[[551 285]
 [290 262]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 00:51:16,358][0m Trial 10 finished with value: 0.566961454089287 and parameters: {'learning_rate': 7.6145215736127385e-06, 'per_device_train_batch_size': 8, 'weight_decay': 6.346208349406173e-12, 'num_train_epochs': 7.885870878043322}. Best is trial 5 with value: 0.5798704635994764.[0m


Trial 10 finished with value: 0.566961454089287 and parameters: {'learning_rate': 7.6145215736127385e-06, 'per_device_train_batch_size': 8, 'weight_decay': 6.346208349406173e-12, 'num_train_epochs': 7.885870878043322}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6726,0.649537,0.626081,0.522279,0.609748,0.553893
2,0.6459,0.646938,0.632565,0.591929,0.608705,0.592816
3,0.5308,0.71225,0.605187,0.586905,0.587146,0.586705
4,0.4365,0.931418,0.615994,0.555239,0.585833,0.563982
5,0.3589,1.191255,0.575648,0.572612,0.579437,0.5828
6,0.1983,1.520805,0.580692,0.563689,0.563531,0.563909
7,0.1576,1.824548,0.585014,0.559831,0.561625,0.559497
7,0.1458,1.868078,0.584294,0.564064,0.564448,0.563822


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.91      0.74       836
           1       0.59      0.20      0.30       552

    accuracy                           0.63      1388
   macro avg       0.61      0.55      0.52      1388
weighted avg       0.61      0.63      0.57      1388

[[758  78]
 [441 111]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.79      0.72       836
           1       0.55      0.40      0.46       552

    accuracy                           0.63      1388
   macro avg       0.61      0.59      0.59      1388
weighted avg       0.62      0.63      0.62      1388

[[658 178]
 [332 220]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.68      0.67       836
           1       0.50      0.50      0.50       552

    accuracy                           0.61      1388
   macro avg       0.59      0.59      0.59      1388
weighted avg       0.60      0.61      0.60      1388

[[566 270]
 [278 274]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.82      0.72       836
           1       0.53      0.31      0.39       552

    accuracy                           0.62      1388
   macro avg       0.59      0.56      0.56      1388
weighted avg       0.60      0.62      0.59      1388

[[684 152]
 [381 171]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.55      0.61       836
           1       0.47      0.62      0.54       552

    accuracy                           0.58      1388
   macro avg       0.58      0.58      0.57      1388
weighted avg       0.60      0.58      0.58      1388

[[458 378]
 [211 341]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.65      0.65       836
           1       0.47      0.48      0.48       552

    accuracy                           0.58      1388
   macro avg       0.56      0.56      0.56      1388
weighted avg       0.58      0.58      0.58      1388

[[540 296]
 [286 266]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.68      0.67       836
           1       0.48      0.43      0.45       552

    accuracy                           0.59      1388
   macro avg       0.56      0.56      0.56      1388
weighted avg       0.58      0.59      0.58      1388

[[572 264]
 [312 240]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.66      0.66       836
           1       0.48      0.46      0.47       552

    accuracy                           0.58      1388
   macro avg       0.56      0.56      0.56      1388
weighted avg       0.58      0.58      0.58      1388

[[555 281]
 [296 256]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 01:37:39,989][0m Trial 11 finished with value: 0.5640644195906361 and parameters: {'learning_rate': 8.184301639312569e-06, 'per_device_train_batch_size': 8, 'weight_decay': 7.93038238948175e-12, 'num_train_epochs': 7.57282598245151}. Best is trial 5 with value: 0.5798704635994764.[0m


Trial 11 finished with value: 0.5640644195906361 and parameters: {'learning_rate': 8.184301639312569e-06, 'per_device_train_batch_size': 8, 'weight_decay': 7.93038238948175e-12, 'num_train_epochs': 7.57282598245151}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6755,0.653399,0.618156,0.507465,0.594122,0.543929
2,0.6384,0.663193,0.626081,0.578349,0.60013,0.581587
3,0.4493,0.851109,0.584294,0.570471,0.570189,0.571515
4,0.3064,1.201626,0.600144,0.552945,0.568092,0.556979
4,0.2401,1.314587,0.600144,0.57814,0.579395,0.577595


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.91      0.74       836
           1       0.56      0.18      0.27       552

    accuracy                           0.62      1388
   macro avg       0.59      0.54      0.51      1388
weighted avg       0.60      0.62      0.56      1388

[[758  78]
 [452 100]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.80      0.72       836
           1       0.54      0.36      0.44       552

    accuracy                           0.63      1388
   macro avg       0.60      0.58      0.58      1388
weighted avg       0.61      0.63      0.61      1388

[[668 168]
 [351 201]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.63      0.65       836
           1       0.48      0.51      0.49       552

    accuracy                           0.58      1388
   macro avg       0.57      0.57      0.57      1388
weighted avg       0.59      0.58      0.59      1388

[[530 306]
 [271 281]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.77      0.70       836
           1       0.50      0.35      0.41       552

    accuracy                           0.60      1388
   macro avg       0.57      0.56      0.55      1388
weighted avg       0.58      0.60      0.58      1388

[[642 194]
 [361 191]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.69      0.67       836
           1       0.50      0.47      0.48       552

    accuracy                           0.60      1388
   macro avg       0.58      0.58      0.58      1388
weighted avg       0.60      0.60      0.60      1388

[[575 261]
 [294 258]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 02:06:41,414][0m Trial 12 finished with value: 0.5781397603029482 and parameters: {'learning_rate': 1.4904029343406248e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.11219817819715945, 'num_train_epochs': 4.731493161108194}. Best is trial 5 with value: 0.5798704635994764.[0m


Trial 12 finished with value: 0.5781397603029482 and parameters: {'learning_rate': 1.4904029343406248e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.11219817819715945, 'num_train_epochs': 4.731493161108194}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6761,0.652978,0.623199,0.516094,0.604555,0.549962
2,0.6347,0.664574,0.614553,0.568602,0.585972,0.57171
3,0.4378,0.863598,0.57781,0.562457,0.562213,0.563055
4,0.2938,1.208234,0.585735,0.555679,0.559362,0.555787
4,0.2284,1.254282,0.585014,0.554727,0.558458,0.554881


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.91      0.74       836
           1       0.58      0.19      0.29       552

    accuracy                           0.62      1388
   macro avg       0.60      0.55      0.52      1388
weighted avg       0.61      0.62      0.56      1388

[[759  77]
 [446 106]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.78      0.71       836
           1       0.52      0.36      0.43       552

    accuracy                           0.61      1388
   macro avg       0.59      0.57      0.57      1388
weighted avg       0.60      0.61      0.60      1388

[[653 183]
 [352 200]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.64      0.64       836
           1       0.47      0.49      0.48       552

    accuracy                           0.58      1388
   macro avg       0.56      0.56      0.56      1388
weighted avg       0.58      0.58      0.58      1388

[[531 305]
 [281 271]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.70      0.67       836
           1       0.48      0.41      0.44       552

    accuracy                           0.59      1388
   macro avg       0.56      0.56      0.56      1388
weighted avg       0.58      0.59      0.58      1388

[[587 249]
 [326 226]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.70      0.67       836
           1       0.47      0.41      0.44       552

    accuracy                           0.59      1388
   macro avg       0.56      0.55      0.55      1388
weighted avg       0.58      0.59      0.58      1388

[[587 249]
 [327 225]]


[32m[I 2021-12-09 02:33:43,097][0m Trial 13 pruned. [0m


Trial 13 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6735,0.66764,0.602305,0.375899,0.301153,0.5
2,0.6663,0.666101,0.602305,0.375899,0.301153,0.5
3,0.665,0.664978,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 02:52:03,195][0m Trial 14 pruned. [0m


Trial 14 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6897,0.680506,0.602305,0.375899,0.301153,0.5
2,0.6954,0.686804,0.602305,0.375899,0.301153,0.5
3,0.6829,0.674695,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 03:10:23,204][0m Trial 15 pruned. [0m


Trial 15 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6716,0.664125,0.602305,0.375899,0.301153,0.5
2,0.6616,0.65538,0.607349,0.432131,0.581912,0.515264
3,0.6484,0.650316,0.616715,0.489931,0.595815,0.537194
4,0.647,0.65047,0.612392,0.483189,0.584439,0.532375


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.97      0.75       836
           1       0.55      0.07      0.12       552

    accuracy                           0.61      1388
   macro avg       0.58      0.52      0.43      1388
weighted avg       0.59      0.61      0.50      1388

[[807  29]
 [516  36]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.93      0.74       836
           1       0.57      0.15      0.24       552

    accuracy                           0.62      1388
   macro avg       0.60      0.54      0.49      1388
weighted avg       0.60      0.62      0.54      1388

[[774  62]
 [470  82]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.92      0.74       836
           1       0.55      0.14      0.22       552

    accuracy                           0.61      1388
   macro avg       0.58      0.53      0.48      1388
weighted avg       0.59      0.61      0.54      1388

[[772  64]
 [474  78]]


[32m[I 2021-12-09 03:34:49,004][0m Trial 16 pruned. [0m


Trial 16 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6832,0.672385,0.602305,0.375899,0.301153,0.5
2,0.6835,0.677805,0.602305,0.375899,0.301153,0.5
3,0.6764,0.672168,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 03:53:09,080][0m Trial 17 pruned. [0m


Trial 17 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.671,0.658677,0.600865,0.377024,0.425939,0.499112
2,0.6563,0.650949,0.623199,0.49418,0.616154,0.542577
3,0.6301,0.645559,0.634726,0.562794,0.614976,0.57584
4,0.6219,0.663285,0.623199,0.508283,0.607921,0.547192
5,0.605,0.648305,0.631124,0.574587,0.606863,0.58085


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.25      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.43      0.50      0.38      1388
weighted avg       0.46      0.60      0.45      1388

[[833   3]
 [551   1]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.94      0.75       836
           1       0.61      0.15      0.24       552

    accuracy                           0.62      1388
   macro avg       0.62      0.54      0.49      1388
weighted avg       0.62      0.62      0.55      1388

[[783  53]
 [470  82]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.86      0.74       836
           1       0.58      0.29      0.39       552

    accuracy                           0.63      1388
   macro avg       0.61      0.58      0.56      1388
weighted avg       0.62      0.63      0.60      1388

[[722 114]
 [393 159]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.92      0.75       836
           1       0.59      0.18      0.27       552

    accuracy                           0.62      1388
   macro avg       0.61      0.55      0.51      1388
weighted avg       0.61      0.62      0.56      1388

[[768  68]
 [455  97]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.83      0.73       836
           1       0.56      0.34      0.42       552

    accuracy                           0.63      1388
   macro avg       0.61      0.58      0.57      1388
weighted avg       0.62      0.63      0.61      1388

[[691 145]
 [367 185]]


[32m[I 2021-12-09 04:23:41,241][0m Trial 18 pruned. [0m


Trial 18 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6852,0.672075,0.602305,0.375899,0.301153,0.5
2,0.6859,0.672365,0.602305,0.375899,0.301153,0.5
2,0.6723,0.67354,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 04:44:10,737][0m Trial 19 pruned. [0m


Trial 19 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.668609,0.602305,0.375899,0.301153,0.5
1,No log,0.6685,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 04:52:19,873][0m Trial 20 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 3.9408929066113793e-07, 'per_device_train_batch_size': 16, 'weight_decay': 0.00022423994688871394, 'num_train_epochs': 1.3812340524477031}. Best is trial 5 with value: 0.5798704

Trial 20 finished with value: 0.37589928057553956 and parameters: {'learning_rate': 3.9408929066113793e-07, 'per_device_train_batch_size': 16, 'weight_decay': 0.00022423994688871394, 'num_train_epochs': 1.3812340524477031}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6745,0.652708,0.611671,0.4876,0.581363,0.533315
2,0.644,0.657258,0.622478,0.584825,0.597159,0.585366
3,0.4754,0.840099,0.568444,0.561145,0.563169,0.565742
4,0.3313,1.284786,0.607349,0.557708,0.576251,0.562344
5,0.2465,2.116002,0.564121,0.55516,0.556367,0.558461
6,0.1096,2.684119,0.575648,0.558047,0.55794,0.558183
7,0.0625,2.945153,0.585014,0.557518,0.560138,0.557343
7,0.0602,3.048484,0.57781,0.555038,0.555887,0.554747


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.92      0.74       836
           1       0.54      0.15      0.24       552

    accuracy                           0.61      1388
   macro avg       0.58      0.53      0.49      1388
weighted avg       0.59      0.61      0.54      1388

[[766  70]
 [469  83]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.77      0.71       836
           1       0.53      0.40      0.46       552

    accuracy                           0.62      1388
   macro avg       0.60      0.59      0.58      1388
weighted avg       0.61      0.62      0.61      1388

[[641 195]
 [329 223]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.58      0.62       836
           1       0.46      0.55      0.50       552

    accuracy                           0.57      1388
   macro avg       0.56      0.57      0.56      1388
weighted avg       0.58      0.57      0.57      1388

[[484 352]
 [247 305]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.78      0.71       836
           1       0.51      0.34      0.41       552

    accuracy                           0.61      1388
   macro avg       0.58      0.56      0.56      1388
weighted avg       0.59      0.61      0.59      1388

[[654 182]
 [363 189]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.59      0.62       836
           1       0.46      0.53      0.49       552

    accuracy                           0.56      1388
   macro avg       0.56      0.56      0.56      1388
weighted avg       0.58      0.56      0.57      1388

[[490 346]
 [259 293]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.64      0.65       836
           1       0.47      0.47      0.47       552

    accuracy                           0.58      1388
   macro avg       0.56      0.56      0.56      1388
weighted avg       0.58      0.58      0.58      1388

[[538 298]
 [291 261]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.69      0.67       836
           1       0.48      0.42      0.45       552

    accuracy                           0.59      1388
   macro avg       0.56      0.56      0.56      1388
weighted avg       0.58      0.59      0.58      1388

[[579 257]
 [319 233]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.67      0.66       836
           1       0.47      0.44      0.45       552

    accuracy                           0.58      1388
   macro avg       0.56      0.55      0.56      1388
weighted avg       0.57      0.58      0.58      1388

[[558 278]
 [308 244]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 05:40:42,388][0m Trial 21 finished with value: 0.5550376706558392 and parameters: {'learning_rate': 1.2453777658079151e-05, 'per_device_train_batch_size': 8, 'weight_decay': 6.706387411546666e-12, 'num_train_epochs': 7.915666613133056}. Best is trial 5 with value: 0.5798704635994764.[0m


Trial 21 finished with value: 0.5550376706558392 and parameters: {'learning_rate': 1.2453777658079151e-05, 'per_device_train_batch_size': 8, 'weight_decay': 6.706387411546666e-12, 'num_train_epochs': 7.915666613133056}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.671,0.65973,0.600865,0.377024,0.425939,0.499112
2,0.657,0.650571,0.626801,0.49754,0.62769,0.545875
3,0.633,0.645979,0.631124,0.552433,0.610799,0.569157


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.25      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.43      0.50      0.38      1388
weighted avg       0.46      0.60      0.45      1388

[[833   3]
 [551   1]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.94      0.75       836
           1       0.63      0.15      0.24       552

    accuracy                           0.63      1388
   macro avg       0.63      0.55      0.50      1388
weighted avg       0.63      0.63      0.55      1388

[[787  49]
 [469  83]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.87      0.74       836
           1       0.58      0.27      0.36       552

    accuracy                           0.63      1388
   macro avg       0.61      0.57      0.55      1388
weighted avg       0.62      0.63      0.59      1388

[[729 107]
 [405 147]]


[32m[I 2021-12-09 05:59:04,274][0m Trial 22 pruned. [0m


Trial 22 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7003,0.673256,0.602305,0.375899,0.301153,0.5
2,0.6819,0.674741,0.602305,0.375899,0.301153,0.5
3,0.6766,0.672216,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 06:17:26,406][0m Trial 23 pruned. [0m


Trial 23 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6764,0.652489,0.628963,0.518419,0.620034,0.554131
2,0.6324,0.667652,0.622478,0.579445,0.59617,0.581366
3,0.4345,0.842653,0.585735,0.562633,0.563792,0.562249
3,0.4345,0.946858,0.574928,0.546415,0.548866,0.546508


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.92      0.75       836
           1       0.61      0.19      0.29       552

    accuracy                           0.63      1388
   macro avg       0.62      0.55      0.52      1388
weighted avg       0.62      0.63      0.57      1388

[[769  67]
 [448 104]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.78      0.71       836
           1       0.54      0.38      0.44       552

    accuracy                           0.62      1388
   macro avg       0.60      0.58      0.58      1388
weighted avg       0.61      0.62      0.61      1388

[[654 182]
 [342 210]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.68      0.66       836
           1       0.48      0.45      0.46       552

    accuracy                           0.59      1388
   macro avg       0.56      0.56      0.56      1388
weighted avg       0.58      0.59      0.58      1388

[[566 270]
 [305 247]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.69      0.66       836
           1       0.46      0.41      0.43       552

    accuracy                           0.57      1388
   macro avg       0.55      0.55      0.55      1388
weighted avg       0.57      0.57      0.57      1388

[[573 263]
 [327 225]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 06:39:05,006][0m Trial 24 finished with value: 0.546415278270117 and parameters: {'learning_rate': 1.8258818958606158e-05, 'per_device_train_batch_size': 8, 'weight_decay': 5.727514302564412e-09, 'num_train_epochs': 3.505760926319672}. Best is trial 5 with value: 0.5798704635994764.[0m


Trial 24 finished with value: 0.546415278270117 and parameters: {'learning_rate': 1.8258818958606158e-05, 'per_device_train_batch_size': 8, 'weight_decay': 5.727514302564412e-09, 'num_train_epochs': 3.505760926319672}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6831,0.672665,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 06:45:10,797][0m Trial 25 pruned. [0m


Trial 25 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6729,0.667109,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 06:51:16,614][0m Trial 26 pruned. [0m


Trial 26 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6726,0.649548,0.62536,0.521769,0.608049,0.553295
2,0.6455,0.647502,0.635447,0.593473,0.612153,0.594593
3,0.5303,0.709327,0.601585,0.57966,0.580934,0.579099
4,0.4393,0.911712,0.618156,0.561216,0.589088,0.568238
5,0.365,1.099939,0.583573,0.577531,0.580131,0.583533
6,0.2211,1.274562,0.590778,0.568705,0.569704,0.568281
6,0.1853,1.318389,0.590058,0.567197,0.568416,0.56676


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.91      0.74       836
           1       0.58      0.20      0.30       552

    accuracy                           0.63      1388
   macro avg       0.61      0.55      0.52      1388
weighted avg       0.61      0.63      0.57      1388

[[757  79]
 [441 111]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.79      0.72       836
           1       0.56      0.39      0.46       552

    accuracy                           0.64      1388
   macro avg       0.61      0.59      0.59      1388
weighted avg       0.62      0.64      0.62      1388

[[664 172]
 [334 218]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.69      0.68       836
           1       0.50      0.47      0.48       552

    accuracy                           0.60      1388
   macro avg       0.58      0.58      0.58      1388
weighted avg       0.60      0.60      0.60      1388

[[576 260]
 [293 259]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.81      0.72       836
           1       0.53      0.32      0.40       552

    accuracy                           0.62      1388
   macro avg       0.59      0.57      0.56      1388
weighted avg       0.60      0.62      0.59      1388

[[679 157]
 [373 179]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.58      0.63       836
           1       0.48      0.58      0.53       552

    accuracy                           0.58      1388
   macro avg       0.58      0.58      0.58      1388
weighted avg       0.60      0.58      0.59      1388

[[488 348]
 [230 322]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.68      0.67       836
           1       0.48      0.46      0.47       552

    accuracy                           0.59      1388
   macro avg       0.57      0.57      0.57      1388
weighted avg       0.59      0.59      0.59      1388

[[567 269]
 [299 253]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.68      0.67       836
           1       0.48      0.45      0.47       552

    accuracy                           0.59      1388
   macro avg       0.57      0.57      0.57      1388
weighted avg       0.59      0.59      0.59      1388

[[569 267]
 [302 250]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 07:30:54,633][0m Trial 27 finished with value: 0.5671967570938572 and parameters: {'learning_rate': 8.39866673528553e-06, 'per_device_train_batch_size': 8, 'weight_decay': 5.546042625951289e-10, 'num_train_epochs': 6.5242433406361515}. Best is trial 5 with value: 0.5798704635994764.[0m


Trial 27 finished with value: 0.5671967570938572 and parameters: {'learning_rate': 8.39866673528553e-06, 'per_device_train_batch_size': 8, 'weight_decay': 5.546042625951289e-10, 'num_train_epochs': 6.5242433406361515}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6729,0.669926,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 07:38:58,416][0m Trial 28 pruned. [0m


Trial 28 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.673416,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 07:44:36,836][0m Trial 29 pruned. [0m


Trial 29 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6718,0.664952,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 07:50:41,595][0m Trial 30 pruned. [0m


Trial 30 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6721,0.649562,0.629683,0.522334,0.6199,0.55596
2,0.6475,0.645027,0.633285,0.59211,0.609541,0.593106
3,0.5524,0.681954,0.60879,0.582063,0.585758,0.581387
4,0.483,0.83875,0.616715,0.554694,0.586751,0.563965
5,0.408,1.008078,0.575648,0.57339,0.581979,0.585262
6,0.2681,1.130766,0.599424,0.574172,0.576644,0.573612
7,0.2315,1.229268,0.597262,0.573596,0.575345,0.573049
7,0.2315,1.229253,0.59438,0.572649,0.573644,0.572195


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.92      0.75       836
           1       0.61      0.20      0.30       552

    accuracy                           0.63      1388
   macro avg       0.62      0.56      0.52      1388
weighted avg       0.62      0.63      0.57      1388

[[766  70]
 [444 108]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.79      0.72       836
           1       0.55      0.40      0.46       552

    accuracy                           0.63      1388
   macro avg       0.61      0.59      0.59      1388
weighted avg       0.62      0.63      0.62      1388

[[660 176]
 [333 219]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.72      0.69       836
           1       0.51      0.45      0.48       552

    accuracy                           0.61      1388
   macro avg       0.59      0.58      0.58      1388
weighted avg       0.60      0.61      0.60      1388

[[598 238]
 [305 247]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.82      0.72       836
           1       0.53      0.31      0.39       552

    accuracy                           0.62      1388
   macro avg       0.59      0.56      0.55      1388
weighted avg       0.60      0.62      0.59      1388

[[687 149]
 [383 169]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.69      0.54      0.60       836
           1       0.47      0.63      0.54       552

    accuracy                           0.58      1388
   macro avg       0.58      0.59      0.57      1388
weighted avg       0.60      0.58      0.58      1388

[[450 386]
 [203 349]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.70      0.68       836
           1       0.50      0.45      0.47       552

    accuracy                           0.60      1388
   macro avg       0.58      0.57      0.57      1388
weighted avg       0.59      0.60      0.60      1388

[[585 251]
 [305 247]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.69      0.67       836
           1       0.49      0.45      0.47       552

    accuracy                           0.60      1388
   macro avg       0.58      0.57      0.57      1388
weighted avg       0.59      0.60      0.59      1388

[[578 258]
 [301 251]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.68      0.67       836
           1       0.49      0.46      0.48       552

    accuracy                           0.59      1388
   macro avg       0.57      0.57      0.57      1388
weighted avg       0.59      0.59      0.59      1388

[[569 267]
 [296 256]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 08:33:49,154][0m Trial 31 finished with value: 0.572648647170611 and parameters: {'learning_rate': 7.208217369611615e-06, 'per_device_train_batch_size': 8, 'weight_decay': 4.9328024437924864e-11, 'num_train_epochs': 7.076886886161601}. Best is trial 5 with value: 0.5798704635994764.[0m


Trial 31 finished with value: 0.572648647170611 and parameters: {'learning_rate': 7.208217369611615e-06, 'per_device_train_batch_size': 8, 'weight_decay': 4.9328024437924864e-11, 'num_train_epochs': 7.076886886161601}. Best is trial 5 with value: 0.5798704635994764.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6711,0.651189,0.615994,0.467481,0.605066,0.530134
2,0.6509,0.645902,0.634006,0.553292,0.616519,0.570934
3,0.5936,0.650207,0.630403,0.582306,0.605647,0.585483
4,0.561,0.715568,0.629683,0.550024,0.608561,0.567345
5,0.5171,0.739537,0.602305,0.592057,0.591969,0.594775
6,0.4372,0.763133,0.613833,0.588561,0.591835,0.587728
6,0.4203,0.772421,0.612392,0.586392,0.58995,0.585609


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.95      0.75       836
           1       0.59      0.11      0.19       552

    accuracy                           0.62      1388
   macro avg       0.61      0.53      0.47      1388
weighted avg       0.61      0.62      0.53      1388

[[794  42]
 [491  61]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.88      0.74       836
           1       0.59      0.26      0.36       552

    accuracy                           0.63      1388
   macro avg       0.62      0.57      0.55      1388
weighted avg       0.62      0.63      0.59      1388

[[735 101]
 [407 145]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.81      0.72       836
           1       0.55      0.37      0.44       552

    accuracy                           0.63      1388
   macro avg       0.61      0.59      0.58      1388
weighted avg       0.62      0.63      0.61      1388

[[673 163]
 [350 202]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.87      0.74       836
           1       0.58      0.26      0.36       552

    accuracy                           0.63      1388
   macro avg       0.61      0.57      0.55      1388
weighted avg       0.62      0.63      0.59      1388

[[729 107]
 [407 145]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.63      0.66       836
           1       0.50      0.56      0.53       552

    accuracy                           0.60      1388
   macro avg       0.59      0.59      0.59      1388
weighted avg       0.61      0.60      0.61      1388

[[528 308]
 [244 308]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.72      0.69       836
           1       0.52      0.46      0.49       552

    accuracy                           0.61      1388
   macro avg       0.59      0.59      0.59      1388
weighted avg       0.61      0.61      0.61      1388

[[598 238]
 [298 254]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.72      0.69       836
           1       0.51      0.45      0.48       552

    accuracy                           0.61      1388
   macro avg       0.59      0.59      0.59      1388
weighted avg       0.61      0.61      0.61      1388

[[599 237]
 [301 251]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 09:14:42,704][0m Trial 32 finished with value: 0.5863922367954626 and parameters: {'learning_rate': 4.83852752895182e-06, 'per_device_train_batch_size': 8, 'weight_decay': 6.339600996727805e-11, 'num_train_epochs': 6.760783415989672}. Best is trial 32 with value: 0.5863922367954626.[0m


Trial 32 finished with value: 0.5863922367954626 and parameters: {'learning_rate': 4.83852752895182e-06, 'per_device_train_batch_size': 8, 'weight_decay': 6.339600996727805e-11, 'num_train_epochs': 6.760783415989672}. Best is trial 32 with value: 0.5863922367954626.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6817,0.664857,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 09:20:45,751][0m Trial 33 pruned. [0m


Trial 33 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6744,0.659487,0.608069,0.408672,0.620422,0.510631


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.99      0.75       836
           1       0.63      0.03      0.07       552

    accuracy                           0.61      1388
   macro avg       0.62      0.51      0.41      1388
weighted avg       0.62      0.61      0.48      1388

[[825  11]
 [533  19]]


[32m[I 2021-12-09 09:27:27,051][0m Trial 34 pruned. [0m


Trial 34 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7094,0.672466,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 09:33:30,581][0m Trial 35 pruned. [0m


Trial 35 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6715,0.668221,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 09:41:34,558][0m Trial 36 pruned. [0m


Trial 36 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.2243,0.892779,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 09:48:17,867][0m Trial 37 pruned. [0m


Trial 37 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6721,0.665595,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 09:54:21,412][0m Trial 38 pruned. [0m


Trial 38 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.673943,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 09:59:59,661][0m Trial 39 pruned. [0m


Trial 39 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6739,0.660106,0.606628,0.403478,0.610923,0.508512


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.99      0.75       836
           1       0.62      0.03      0.06       552

    accuracy                           0.61      1388
   macro avg       0.61      0.51      0.40      1388
weighted avg       0.61      0.61      0.47      1388

[[826  10]
 [536  16]]


[32m[I 2021-12-09 10:06:41,058][0m Trial 40 pruned. [0m


Trial 40 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6815,0.668988,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 10:12:45,695][0m Trial 41 pruned. [0m


Trial 41 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6722,0.649446,0.626801,0.517766,0.61389,0.552645
2,0.6467,0.645513,0.633285,0.586924,0.609359,0.589414
3,0.5471,0.685102,0.615274,0.581928,0.590114,0.581847
4,0.4745,0.841976,0.616715,0.556347,0.586861,0.564888
5,0.4021,0.959318,0.589337,0.580451,0.581098,0.584009
6,0.2764,1.046144,0.595821,0.573579,0.574779,0.573084
6,0.2764,1.052936,0.596542,0.573593,0.575056,0.573066


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.92      0.75       836
           1       0.60      0.19      0.29       552

    accuracy                           0.63      1388
   macro avg       0.61      0.55      0.52      1388
weighted avg       0.62      0.63      0.56      1388

[[765  71]
 [447 105]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.80      0.73       836
           1       0.56      0.38      0.45       552

    accuracy                           0.63      1388
   macro avg       0.61      0.59      0.59      1388
weighted avg       0.62      0.63      0.62      1388

[[672 164]
 [345 207]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.75      0.70       836
           1       0.52      0.42      0.46       552

    accuracy                           0.62      1388
   macro avg       0.59      0.58      0.58      1388
weighted avg       0.60      0.62      0.61      1388

[[623 213]
 [321 231]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.82      0.72       836
           1       0.53      0.31      0.39       552

    accuracy                           0.62      1388
   macro avg       0.59      0.56      0.56      1388
weighted avg       0.60      0.62      0.59      1388

[[684 152]
 [380 172]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.61      0.64       836
           1       0.49      0.56      0.52       552

    accuracy                           0.59      1388
   macro avg       0.58      0.58      0.58      1388
weighted avg       0.60      0.59      0.59      1388

[[510 326]
 [244 308]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.68      0.67       836
           1       0.49      0.46      0.48       552

    accuracy                           0.60      1388
   macro avg       0.57      0.57      0.57      1388
weighted avg       0.59      0.60      0.59      1388

[[572 264]
 [297 255]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.69      0.67       836
           1       0.49      0.46      0.47       552

    accuracy                           0.60      1388
   macro avg       0.58      0.57      0.57      1388
weighted avg       0.59      0.60      0.59      1388

[[575 261]
 [299 253]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 10:51:03,375][0m Trial 42 finished with value: 0.573593144838331 and parameters: {'learning_rate': 7.5716420852469945e-06, 'per_device_train_batch_size': 8, 'weight_decay': 5.87359355669018e-10, 'num_train_epochs': 6.277076948084418}. Best is trial 32 with value: 0.5863922367954626.[0m


Trial 42 finished with value: 0.573593144838331 and parameters: {'learning_rate': 7.5716420852469945e-06, 'per_device_train_batch_size': 8, 'weight_decay': 5.87359355669018e-10, 'num_train_epochs': 6.277076948084418}. Best is trial 32 with value: 0.5863922367954626.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6716,0.649869,0.622478,0.502325,0.608664,0.544748
2,0.6489,0.64302,0.637608,0.583321,0.615627,0.588387
3,0.5716,0.662928,0.621037,0.580805,0.59493,0.582016
4,0.5208,0.776753,0.621758,0.554002,0.594016,0.56569
5,0.4571,0.883528,0.582133,0.579587,0.587354,0.590952
6,0.3373,0.941953,0.605187,0.57805,0.581712,0.577474
7,0.3044,0.997363,0.598703,0.573247,0.575768,0.572706
7,0.3044,0.999111,0.599424,0.575115,0.577201,0.574535


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.92      0.75       836
           1       0.59      0.16      0.26       552

    accuracy                           0.62      1388
   macro avg       0.61      0.54      0.50      1388
weighted avg       0.61      0.62      0.55      1388

[[773  63]
 [461  91]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.83      0.73       836
           1       0.57      0.35      0.43       552

    accuracy                           0.64      1388
   macro avg       0.62      0.59      0.58      1388
weighted avg       0.62      0.64      0.61      1388

[[693 143]
 [360 192]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.77      0.71       836
           1       0.53      0.39      0.45       552

    accuracy                           0.62      1388
   macro avg       0.59      0.58      0.58      1388
weighted avg       0.61      0.62      0.61      1388

[[646 190]
 [336 216]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.84      0.73       836
           1       0.55      0.29      0.38       552

    accuracy                           0.62      1388
   macro avg       0.59      0.57      0.55      1388
weighted avg       0.60      0.62      0.59      1388

[[702 134]
 [391 161]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.69      0.55      0.61       836
           1       0.48      0.63      0.55       552

    accuracy                           0.58      1388
   macro avg       0.59      0.59      0.58      1388
weighted avg       0.61      0.58      0.59      1388

[[458 378]
 [202 350]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.71      0.69       836
           1       0.50      0.44      0.47       552

    accuracy                           0.61      1388
   macro avg       0.58      0.58      0.58      1388
weighted avg       0.60      0.61      0.60      1388

[[596 240]
 [308 244]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.70      0.68       836
           1       0.49      0.45      0.47       552

    accuracy                           0.60      1388
   macro avg       0.58      0.57      0.57      1388
weighted avg       0.59      0.60      0.59      1388

[[585 251]
 [306 246]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.70      0.68       836
           1       0.50      0.45      0.47       552

    accuracy                           0.60      1388
   macro avg       0.58      0.57      0.58      1388
weighted avg       0.59      0.60      0.60      1388

[[582 254]
 [302 250]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 11:34:45,587][0m Trial 43 finished with value: 0.57511451726568 and parameters: {'learning_rate': 6.0562533910158e-06, 'per_device_train_batch_size': 8, 'weight_decay': 1.8806461402556896e-11, 'num_train_epochs': 7.177656786890243}. Best is trial 32 with value: 0.5863922367954626.[0m


Trial 43 finished with value: 0.57511451726568 and parameters: {'learning_rate': 6.0562533910158e-06, 'per_device_train_batch_size': 8, 'weight_decay': 1.8806461402556896e-11, 'num_train_epochs': 7.177656786890243}. Best is trial 32 with value: 0.5863922367954626.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6801,0.664104,0.605908,0.412053,0.585609,0.50976


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.98      0.75       836
           1       0.56      0.04      0.07       552

    accuracy                           0.61      1388
   macro avg       0.59      0.51      0.41      1388
weighted avg       0.59      0.61      0.48      1388

[[819  17]
 [530  22]]


[32m[I 2021-12-09 11:40:50,027][0m Trial 44 pruned. [0m


Trial 44 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6711,0.651178,0.615994,0.465218,0.606746,0.529519


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.95      0.75       836
           1       0.60      0.11      0.18       552

    accuracy                           0.62      1388
   macro avg       0.61      0.53      0.47      1388
weighted avg       0.61      0.62      0.52      1388

[[796  40]
 [493  59]]


[32m[I 2021-12-09 11:46:54,133][0m Trial 45 pruned. [0m


Trial 45 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6715,0.664123,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 11:52:58,906][0m Trial 46 pruned. [0m


Trial 46 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6712,0.66752,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 12:01:02,850][0m Trial 47 pruned. [0m


Trial 47 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.677,0.655802,0.60879,0.456342,0.579193,0.522307


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.94      0.74       836
           1       0.54      0.10      0.17       552

    accuracy                           0.61      1388
   macro avg       0.58      0.52      0.46      1388
weighted avg       0.59      0.61      0.52      1388

[[790  46]
 [497  55]]


[32m[I 2021-12-09 12:07:07,490][0m Trial 48 pruned. [0m


Trial 48 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6726,0.666523,0.602305,0.375899,0.301153,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      1.00      0.75       836
           1       0.00      0.00      0.00       552

    accuracy                           0.60      1388
   macro avg       0.30      0.50      0.38      1388
weighted avg       0.36      0.60      0.45      1388

[[836   0]
 [552   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 12:13:11,744][0m Trial 49 pruned. [0m


Trial 49 pruned. 


[32m[I 2021-12-09 12:13:11,796][0m A new study created in memory with name: no-name-d3887b49-6163-469a-bc20-69f321e8efe4[0m


A new study created in memory with name: no-name-d3887b49-6163-469a-bc20-69f321e8efe4


In [12]:
storage_name

'sqlite:///SIMCSE_BERT_MASK_PJ.db'

In [13]:
study_name

'SIMCSE_BERT_MASK_PJ'

In [14]:
study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True, direction="maximize")
df = study.trials_dataframe(attrs=("number", "value", "params", "state"))

[32m[I 2021-12-09 12:13:11,913][0m Using an existing study with name 'SIMCSE_BERT_MASK_PJ' instead of creating a new one.[0m


Using an existing study with name 'SIMCSE_BERT_MASK_PJ' instead of creating a new one.


In [15]:
df

Unnamed: 0,number,value,params_learning_rate,params_num_train_epochs,params_per_device_train_batch_size,params_weight_decay,state
0,0,0.375899,0.001334051,1.11247,4,8.452287e-08,COMPLETE
1,1,0.375899,0.01706915,2.957653,4,8.356806e-11,COMPLETE
2,2,0.375899,0.004566845,1.852005,2,2.341363e-05,COMPLETE
3,3,0.375899,0.2098964,6.310039,4,0.001135784,COMPLETE
4,4,0.375899,0.04972884,2.812853,4,1.731287e-05,COMPLETE
5,5,0.57987,1.51172e-05,3.080972,8,1.961919e-11,COMPLETE
6,6,0.284536,0.08840512,1.665303,16,9.903374e-09,PRUNED
7,7,0.375899,0.0008815628,4.349901,4,6.833844e-10,PRUNED
8,8,0.375899,0.001549912,2.562984,2,3.888699e-07,COMPLETE
9,9,0.377593,7.913412e-08,1.988087,16,5.004756e-08,COMPLETE


In [16]:
fig = optuna.visualization.plot_param_importances(study)
fig.show()

In [17]:
best_run

BestRun(run_id='32', objective=0.5863922367954626, hyperparameters={'learning_rate': 4.83852752895182e-06, 'num_train_epochs': 6.760783415989672, 'per_device_train_batch_size': 8, 'weight_decay': 6.339600996727805e-11})

In [18]:
optuna.visualization.plot_intermediate_values(study)

In [19]:
optuna.visualization.plot_parallel_coordinate(study)

In [20]:
optuna.visualization.plot_optimization_history(study)

In [21]:
optuna.visualization.plot_contour(study)

In [22]:
optuna.visualization.plot_slice(study)

In [23]:
optuna.visualization.plot_edf(study)