# Initiliation

In [1]:
!pip install transformers datasets --quiet

[K     |████████████████████████████████| 3.1 MB 11.3 MB/s 
[K     |████████████████████████████████| 298 kB 6.6 MB/s 
[K     |████████████████████████████████| 61 kB 279 kB/s 
[K     |████████████████████████████████| 3.3 MB 42.3 MB/s 
[K     |████████████████████████████████| 596 kB 46.2 MB/s 
[K     |████████████████████████████████| 895 kB 46.3 MB/s 
[K     |████████████████████████████████| 243 kB 51.2 MB/s 
[K     |████████████████████████████████| 132 kB 25.9 MB/s 
[K     |████████████████████████████████| 1.1 MB 48.4 MB/s 
[K     |████████████████████████████████| 192 kB 42.7 MB/s 
[K     |████████████████████████████████| 271 kB 49.1 MB/s 
[K     |████████████████████████████████| 160 kB 44.8 MB/s 
[?25h

In [2]:
from transformers import TrainingArguments
from transformers import Trainer
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, classification_report, confusion_matrix
from datasets import Dataset
from datasets import load_metric

import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from google.colab import drive

# Data Preparation

In [3]:
drive.mount('/content/drive/')

Mounted at /content/drive/


In [4]:
%cd 'drive/MyDrive/Masterarbeit/Colab Notebooks/OVERVIEW MBTI/Datasets/URL_Balanced_MASK'

/content/drive/.shortcut-targets-by-id/1aHXlqhpj1STohhfU4gn53D4whaLH__Jz/Masterarbeit/Colab Notebooks/OVERVIEW MBTI/Datasets/URL_Balanced_MASK


In [5]:
dfTF = pd.read_csv('MBTI_TF_URL_MASK.csv', sep=",", error_bad_lines=False)
dfTF

Unnamed: 0,text,label
0,i like that you are kind as [MASK] i find that...,1
1,oh my you are right who really talks like tha...,1
2,yep yep yep especially the last one yep agree ...,1
3,things that are generalizable to the entire po...,1
4,work student hobbies studying gaming reading d...,0
...,...,...
6935,well mostly i dont like avocado but the primar...,1
6936,during an argument rather than trying to valid...,1
6937,cigarettes are like hamsters perfectly harmles...,1
6938,bookshelf porn as a nonamerican please excuse ...,0


# Model Training

In [6]:
modeltype = "princeton-nlp/sup-simcse-bert-base-uncased"

In [7]:
train, test = train_test_split(dfTF, test_size=0.2, random_state=0, stratify=dfTF.label)

train = Dataset.from_pandas(train)
test = Dataset.from_pandas(test)

tokenizer = AutoTokenizer.from_pretrained(modeltype)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_train = train.map(tokenize_function, batched=True)
tokenized_test = test.map(tokenize_function, batched=True)

full_train_dataset = tokenized_train
full_eval_dataset = tokenized_test

model = AutoModelForSequenceClassification.from_pretrained(modeltype, num_labels=2)

training_args = TrainingArguments(
    "SIMCSE_BERT_TF_MASK", 
    evaluation_strategy="epoch",
    save_strategy = 'no',
    save_steps = 100000,
    save_total_limit = 1,
    metric_for_best_model="eval_f1")

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    print(classification_report(labels, preds, labels=[0,1]))
    print(confusion_matrix(labels,preds))
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

Downloading:   0%|          | 0.00/252 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/689 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

Downloading:   0%|          | 0.00/418M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Hyperparameter Optimization

In [8]:
! pip install optuna --quiet

[K     |████████████████████████████████| 308 kB 10.8 MB/s 
[K     |████████████████████████████████| 209 kB 48.3 MB/s 
[K     |████████████████████████████████| 80 kB 8.3 MB/s 
[K     |████████████████████████████████| 75 kB 4.4 MB/s 
[K     |████████████████████████████████| 49 kB 5.5 MB/s 
[K     |████████████████████████████████| 149 kB 49.4 MB/s 
[K     |████████████████████████████████| 112 kB 47.5 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [9]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(modeltype, num_labels=2)

In [10]:
trainer = Trainer(
      model_init=model_init,
      args=training_args, 
      train_dataset=full_train_dataset, 
      eval_dataset=full_eval_dataset,
      compute_metrics=compute_metrics 
  )

loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
 

In [11]:
import sklearn.metrics as metrics
import optuna
import sys
import logging

def objective (metrics):
  return metrics['eval_f1']

def hyperparameter_space(trial):

    return {
        "learning_rate": trial.suggest_float("learning_rate", 5e-8, 5e-1, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [2, 4, 8, 16]),
        "weight_decay": trial.suggest_float("weight_decay", 5e-12, 5e-1, log=True),
        "num_train_epochs": trial.suggest_float("num_train_epochs",1,8,log=True),
        #"adam_epsilon": trial.suggest_float("adam_epsilon", 1e-10, 1e-6, log=True),
        #"seed" : trial.suggest_float("seed",10,60,log=True)
        }

optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = "SIMCSE_BERT_MASK_TF"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)

best_run = trainer.hyperparameter_search(hp_space=hyperparameter_space,compute_objective=objective, n_trials=50, direction="maximize",study_name=study_name, storage=storage_name )

study = optuna.create_study()

[32m[I 2021-12-07 21:27:13,913][0m A new study created in RDB with name: SIMCSE_BERT_MASK_TF[0m


A new study created in RDB with name: SIMCSE_BERT_MASK_TF


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6774,0.662608,0.623919,0.577061,0.647011,0.598964
2,0.657,0.625932,0.663545,0.637052,0.680015,0.644552
3,0.6054,0.595617,0.698847,0.689882,0.700235,0.689158
4,0.5833,0.58211,0.698847,0.686449,0.70452,0.686562
5,0.5784,0.570028,0.708934,0.701433,0.70963,0.700363
6,0.5613,0.566542,0.708213,0.699901,0.709775,0.698923
6,0.5548,0.565216,0.709654,0.70153,0.711131,0.700506


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.69      0.32      0.44       632
           1       0.61      0.88      0.72       756

    accuracy                           0.62      1388
   macro avg       0.65      0.60      0.58      1388
weighted avg       0.64      0.62      0.59      1388

[[202 430]
 [ 92 664]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.72      0.43      0.54       632
           1       0.64      0.86      0.74       756

    accuracy                           0.66      1388
   macro avg       0.68      0.64      0.64      1388
weighted avg       0.68      0.66      0.65      1388

[[273 359]
 [108 648]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.71      0.58      0.64       632
           1       0.69      0.80      0.74       756

    accuracy                           0.70      1388
   macro avg       0.70      0.69      0.69      1388
weighted avg       0.70      0.70      0.69      1388

[[367 265]
 [153 603]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.72      0.55      0.62       632
           1       0.69      0.82      0.75       756

    accuracy                           0.70      1388
   macro avg       0.70      0.69      0.69      1388
weighted avg       0.70      0.70      0.69      1388

[[347 285]
 [133 623]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.71      0.60      0.65       632
           1       0.71      0.80      0.75       756

    accuracy                           0.71      1388
   macro avg       0.71      0.70      0.70      1388
weighted avg       0.71      0.71      0.71      1388

[[382 250]
 [154 602]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.72      0.59      0.65       632
           1       0.70      0.80      0.75       756

    accuracy                           0.71      1388
   macro avg       0.71      0.70      0.70      1388
weighted avg       0.71      0.71      0.70      1388

[[376 256]
 [149 607]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.72      0.60      0.65       632
           1       0.70      0.80      0.75       756

    accuracy                           0.71      1388
   macro avg       0.71      0.70      0.70      1388
weighted avg       0.71      0.71      0.71      1388

[[378 254]
 [149 607]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-07 22:08:51,352][0m Trial 0 finished with value: 0.7015297451634195 and parameters: {'learning_rate': 5.113164637727514e-07, 'per_device_train_batch_size': 8, 'weight_decay': 0.0004337243841065953, 'num_train_epochs': 6.90519349381888}. Best is trial 0 with value: 0.7015297451634195.[0m


Trial 0 finished with value: 0.7015297451634195 and parameters: {'learning_rate': 5.113164637727514e-07, 'per_device_train_batch_size': 8, 'weight_decay': 0.0004337243841065953, 'num_train_epochs': 6.90519349381888}. Best is trial 0 with value: 0.7015297451634195.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6103,0.588189,0.684438,0.677471,0.736268,0.70318
2,0.5138,0.531387,0.742795,0.742391,0.743271,0.745203
3,0.3654,0.757042,0.737032,0.735339,0.73505,0.735759
4,0.2332,1.089246,0.73487,0.730232,0.734147,0.728974
4,0.2332,1.090853,0.731988,0.727609,0.730881,0.726458


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.91      0.72       632
           1       0.87      0.49      0.63       756

    accuracy                           0.68      1388
   macro avg       0.74      0.70      0.68      1388
weighted avg       0.75      0.68      0.67      1388

[[577  55]
 [383 373]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.70      0.77      0.73       632
           1       0.79      0.72      0.75       756

    accuracy                           0.74      1388
   macro avg       0.74      0.75      0.74      1388
weighted avg       0.75      0.74      0.74      1388

[[488 144]
 [213 543]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.71      0.72      0.71       632
           1       0.76      0.75      0.76       756

    accuracy                           0.74      1388
   macro avg       0.74      0.74      0.74      1388
weighted avg       0.74      0.74      0.74      1388

[[456 176]
 [189 567]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.73      0.66      0.69       632
           1       0.74      0.79      0.77       756

    accuracy                           0.73      1388
   macro avg       0.73      0.73      0.73      1388
weighted avg       0.73      0.73      0.73      1388

[[419 213]
 [155 601]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.72      0.66      0.69       632
           1       0.74      0.79      0.76       756

    accuracy                           0.73      1388
   macro avg       0.73      0.73      0.73      1388
weighted avg       0.73      0.73      0.73      1388

[[420 212]
 [160 596]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-07 22:33:35,880][0m Trial 1 finished with value: 0.7276088222633006 and parameters: {'learning_rate': 2.8957120638078643e-05, 'per_device_train_batch_size': 8, 'weight_decay': 1.1108012743093335e-09, 'num_train_epochs': 4.03344948971642}. Best is trial 1 with value: 0.7276088222633006.[0m


Trial 1 finished with value: 0.7276088222633006 and parameters: {'learning_rate': 2.8957120638078643e-05, 'per_device_train_batch_size': 8, 'weight_decay': 1.1108012743093335e-09, 'num_train_epochs': 4.03344948971642}. Best is trial 1 with value: 0.7276088222633006.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.612012,0.659942,0.646774,0.734117,0.68264
2,0.575000,0.550747,0.753602,0.751214,0.751701,0.750841
2,0.575000,0.580998,0.754323,0.752182,0.752362,0.752022


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.94      0.71       632
           1       0.89      0.43      0.58       756

    accuracy                           0.66      1388
   macro avg       0.73      0.68      0.65      1388
weighted avg       0.75      0.66      0.64      1388

[[592  40]
 [432 324]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.73      0.72      0.73       632
           1       0.77      0.78      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[455 177]
 [165 591]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.73      0.73      0.73       632
           1       0.77      0.78      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[459 173]
 [168 588]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-07 22:46:12,286][0m Trial 2 finished with value: 0.7521821788855245 and parameters: {'learning_rate': 0.00010383242911913873, 'per_device_train_batch_size': 16, 'weight_decay': 4.717749467497328e-07, 'num_train_epochs': 2.169265655030074}. Best is trial 2 with value: 0.7521821788855245.[0m


Trial 2 finished with value: 0.7521821788855245 and parameters: {'learning_rate': 0.00010383242911913873, 'per_device_train_batch_size': 16, 'weight_decay': 4.717749467497328e-07, 'num_train_epochs': 2.169265655030074}. Best is trial 2 with value: 0.7521821788855245.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,7.6828,1.960315,0.544669,0.352612,0.272334,0.5
2,5.6479,3.962965,0.455331,0.312871,0.227666,0.5
3,2.7656,2.521621,0.544669,0.352612,0.272334,0.5
3,2.7656,0.742753,0.455331,0.312871,0.227666,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-07 23:08:00,670][0m Trial 3 finished with value: 0.31287128712871287 and parameters: {'learning_rate': 0.06032012701524992, 'per_device_train_batch_size': 4, 'weight_decay': 5.597111728168741e-09, 'num_train_epochs': 3.150463025055146}. Best is trial 2 with value: 0.75218217888552

Trial 3 finished with value: 0.31287128712871287 and parameters: {'learning_rate': 0.06032012701524992, 'per_device_train_batch_size': 4, 'weight_decay': 5.597111728168741e-09, 'num_train_epochs': 3.150463025055146}. Best is trial 2 with value: 0.7521821788855245.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7167,0.700985,0.455331,0.312871,0.227666,0.5
2,0.6996,0.689237,0.544669,0.352612,0.272334,0.5
3,0.6937,0.689379,0.544669,0.352612,0.272334,0.5
4,0.6927,0.68917,0.544669,0.352612,0.272334,0.5
5,0.6891,0.689151,0.544669,0.352612,0.272334,0.5
5,0.6897,0.689181,0.544669,0.352612,0.272334,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-07 23:40:39,478][0m Trial 4 finished with value: 0.35261194029850745 and parameters: {'learning_rate': 0.00034161359596541536, 'per_device_train_batch_size': 8, 'weight_decay': 0.019259178689961923, 'num_train_epochs': 5.336965770180979}. Best is trial 2 with value: 0.752182178885

Trial 4 finished with value: 0.35261194029850745 and parameters: {'learning_rate': 0.00034161359596541536, 'per_device_train_batch_size': 8, 'weight_decay': 0.019259178689961923, 'num_train_epochs': 5.336965770180979}. Best is trial 2 with value: 0.7521821788855245.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6772,0.669966,0.585735,0.499189,0.617037,0.553011


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.19      0.29       632
           1       0.57      0.92      0.71       756

    accuracy                           0.59      1388
   macro avg       0.62      0.55      0.50      1388
weighted avg       0.61      0.59      0.52      1388

[[118 514]
 [ 61 695]]


[32m[I 2021-12-07 23:47:26,245][0m Trial 5 pruned. [0m


Trial 5 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,14.8067,15.145612,0.455331,0.312871,0.227666,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-07 23:54:14,529][0m Trial 6 pruned. [0m


Trial 6 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.328,1.779015,0.455331,0.312871,0.227666,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 00:00:20,883][0m Trial 7 pruned. [0m


Trial 7 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.686017,0.560519,0.47652,0.558224,0.528954


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.56      0.18      0.27       632
           1       0.56      0.88      0.69       756

    accuracy                           0.56      1388
   macro avg       0.56      0.53      0.48      1388
weighted avg       0.56      0.56      0.50      1388

[[111 521]
 [ 89 667]]


[32m[I 2021-12-08 00:06:01,099][0m Trial 8 pruned. [0m


Trial 8 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8052,0.737043,0.7183,0.717976,0.731014,0.727128
2,0.866,0.857907,0.744957,0.743831,0.743481,0.744981
3,0.7832,1.058639,0.745677,0.734483,0.757958,0.733185
4,0.6893,1.190879,0.729107,0.710936,0.753802,0.712394
5,0.6364,1.094498,0.75,0.746055,0.749231,0.744809
6,0.5019,1.338237,0.733429,0.719624,0.748865,0.719346
7,0.4862,1.300148,0.743516,0.740955,0.741521,0.740544
7,0.4656,1.350096,0.743516,0.739129,0.742944,0.737819


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.83      0.73       632
           1       0.81      0.63      0.71       756

    accuracy                           0.72      1388
   macro avg       0.73      0.73      0.72      1388
weighted avg       0.74      0.72      0.72      1388

[[522 110]
 [281 475]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.71      0.75      0.73       632
           1       0.78      0.74      0.76       756

    accuracy                           0.74      1388
   macro avg       0.74      0.74      0.74      1388
weighted avg       0.75      0.74      0.75      1388

[[471 161]
 [193 563]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.80      0.59      0.68       632
           1       0.72      0.87      0.79       756

    accuracy                           0.75      1388
   macro avg       0.76      0.73      0.73      1388
weighted avg       0.75      0.75      0.74      1388

[[375 257]
 [ 96 660]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.81      0.53      0.64       632
           1       0.69      0.90      0.78       756

    accuracy                           0.73      1388
   macro avg       0.75      0.71      0.71      1388
weighted avg       0.75      0.73      0.72      1388

[[332 300]
 [ 76 680]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.74      0.69      0.71       632
           1       0.75      0.80      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.74      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[434 198]
 [149 607]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.79      0.56      0.66       632
           1       0.71      0.88      0.78       756

    accuracy                           0.73      1388
   macro avg       0.75      0.72      0.72      1388
weighted avg       0.74      0.73      0.73      1388

[[355 277]
 [ 93 663]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.72      0.71      0.72       632
           1       0.76      0.77      0.77       756

    accuracy                           0.74      1388
   macro avg       0.74      0.74      0.74      1388
weighted avg       0.74      0.74      0.74      1388

[[447 185]
 [171 585]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.74      0.67      0.71       632
           1       0.75      0.80      0.77       756

    accuracy                           0.74      1388
   macro avg       0.74      0.74      0.74      1388
weighted avg       0.74      0.74      0.74      1388

[[426 206]
 [150 606]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 01:10:20,139][0m Trial 9 finished with value: 0.7391285984592513 and parameters: {'learning_rate': 3.515249330255268e-06, 'per_device_train_batch_size': 2, 'weight_decay': 0.00015865248205273314, 'num_train_epochs': 7.852372823525647}. Best is trial 2 with value: 0.7521821788855245.[0m


Trial 9 finished with value: 0.7391285984592513 and parameters: {'learning_rate': 3.515249330255268e-06, 'per_device_train_batch_size': 2, 'weight_decay': 0.00015865248205273314, 'num_train_epochs': 7.852372823525647}. Best is trial 2 with value: 0.7521821788855245.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.689222,0.544669,0.352612,0.272334,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 01:16:00,522][0m Trial 10 pruned. [0m


Trial 10 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7956,0.810653,0.737752,0.737488,0.750417,0.746542
1,0.7511,0.995395,0.757925,0.754908,0.756511,0.754031


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.84      0.75       632
           1       0.83      0.65      0.73       756

    accuracy                           0.74      1388
   macro avg       0.75      0.75      0.74      1388
weighted avg       0.76      0.74      0.74      1388

[[534  98]
 [266 490]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.71      0.73       632
           1       0.77      0.80      0.78       756

    accuracy                           0.76      1388
   macro avg       0.76      0.75      0.75      1388
weighted avg       0.76      0.76      0.76      1388

[[449 183]
 [153 603]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 01:30:49,806][0m Trial 11 finished with value: 0.754907958049808 and parameters: {'learning_rate': 3.558787916816092e-05, 'per_device_train_batch_size': 2, 'weight_decay': 2.6453705505426204e-05, 'num_train_epochs': 1.796459751947201}. Best is trial 11 with value: 0.754907958049808.[0m


Trial 11 finished with value: 0.754907958049808 and parameters: {'learning_rate': 3.558787916816092e-05, 'per_device_train_batch_size': 2, 'weight_decay': 2.6453705505426204e-05, 'num_train_epochs': 1.796459751947201}. Best is trial 11 with value: 0.754907958049808.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8636,0.672961,0.75,0.749688,0.750925,0.752855
1,0.7323,1.015488,0.75072,0.745649,0.751471,0.744043


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.70      0.78      0.74       632
           1       0.80      0.72      0.76       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.76      0.75      0.75      1388

[[496 136]
 [211 545]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.76      0.67      0.71       632
           1       0.75      0.82      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.74      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[423 209]
 [137 619]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 01:45:18,139][0m Trial 12 finished with value: 0.7456486000949089 and parameters: {'learning_rate': 2.879490739702494e-05, 'per_device_train_batch_size': 2, 'weight_decay': 1.2764023216744377e-11, 'num_train_epochs': 1.7529057098780358}. Best is trial 11 with value: 0.754907958049808.[0m


Trial 12 finished with value: 0.7456486000949089 and parameters: {'learning_rate': 2.879490739702494e-05, 'per_device_train_batch_size': 2, 'weight_decay': 1.2764023216744377e-11, 'num_train_epochs': 1.7529057098780358}. Best is trial 11 with value: 0.754907958049808.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.537811,0.721182,0.720677,0.736148,0.730812
2,0.550400,0.511596,0.762248,0.759046,0.761148,0.757999
2,0.550400,0.512572,0.760807,0.757336,0.759951,0.756157


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.84      0.73       632
           1       0.82      0.62      0.71       756

    accuracy                           0.72      1388
   macro avg       0.74      0.73      0.72      1388
weighted avg       0.74      0.72      0.72      1388

[[530 102]
 [285 471]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.71      0.73       632
           1       0.77      0.81      0.79       756

    accuracy                           0.76      1388
   macro avg       0.76      0.76      0.76      1388
weighted avg       0.76      0.76      0.76      1388

[[449 183]
 [147 609]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.70      0.73       632
           1       0.77      0.81      0.79       756

    accuracy                           0.76      1388
   macro avg       0.76      0.76      0.76      1388
weighted avg       0.76      0.76      0.76      1388

[[445 187]
 [145 611]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 01:57:14,046][0m Trial 13 finished with value: 0.7573360126551616 and parameters: {'learning_rate': 2.6781315018468085e-05, 'per_device_train_batch_size': 16, 'weight_decay': 3.4450729101456406e-06, 'num_train_epochs': 2.03163161021332}. Best is trial 13 with value: 0.7573360126551616.[0m


Trial 13 finished with value: 0.7573360126551616 and parameters: {'learning_rate': 2.6781315018468085e-05, 'per_device_train_batch_size': 16, 'weight_decay': 3.4450729101456406e-06, 'num_train_epochs': 2.03163161021332}. Best is trial 13 with value: 0.7573360126551616.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8098,0.739764,0.7183,0.717948,0.719213,0.720899
1,0.7465,0.841105,0.739193,0.733997,0.739356,0.732553


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.75      0.71       632
           1       0.77      0.69      0.73       756

    accuracy                           0.72      1388
   macro avg       0.72      0.72      0.72      1388
weighted avg       0.72      0.72      0.72      1388

[[474 158]
 [233 523]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.74      0.66      0.70       632
           1       0.74      0.81      0.77       756

    accuracy                           0.74      1388
   macro avg       0.74      0.73      0.73      1388
weighted avg       0.74      0.74      0.74      1388

[[416 216]
 [146 610]]


[32m[I 2021-12-08 02:08:48,727][0m Trial 14 pruned. [0m


Trial 14 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.9309,0.847542,0.455331,0.312871,0.227666,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 02:17:01,248][0m Trial 15 pruned. [0m


Trial 15 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.617877,0.682997,0.669749,0.687232,0.670455


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.70      0.53      0.60       632
           1       0.67      0.81      0.74       756

    accuracy                           0.68      1388
   macro avg       0.69      0.67      0.67      1388
weighted avg       0.69      0.68      0.68      1388

[[335 297]
 [143 613]]


[32m[I 2021-12-08 02:22:41,755][0m Trial 16 pruned. [0m


Trial 16 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.846,0.712204,0.751441,0.750928,0.751377,0.753399
1,0.7698,0.948782,0.754323,0.750168,0.754012,0.748778


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.71      0.78      0.74       632
           1       0.80      0.73      0.76       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.76      0.75      0.75      1388

[[490 142]
 [203 553]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.69      0.72       632
           1       0.76      0.81      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[434 198]
 [143 613]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 02:35:19,567][0m Trial 17 finished with value: 0.7501677220885898 and parameters: {'learning_rate': 2.1025362834713015e-05, 'per_device_train_batch_size': 2, 'weight_decay': 5.167420942207734e-06, 'num_train_epochs': 1.5165589271751867}. Best is trial 13 with value: 0.7573360126551616.[0m


Trial 17 finished with value: 0.7501677220885898 and parameters: {'learning_rate': 2.1025362834713015e-05, 'per_device_train_batch_size': 2, 'weight_decay': 5.167420942207734e-06, 'num_train_epochs': 1.5165589271751867}. Best is trial 13 with value: 0.7573360126551616.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.692968,0.544669,0.352612,0.272334,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 02:40:59,724][0m Trial 18 pruned. [0m


Trial 18 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.72675,0.455331,0.312871,0.227666,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 02:46:39,807][0m Trial 19 pruned. [0m


Trial 19 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8598,0.740463,0.729827,0.729803,0.737427,0.736542
2,0.8223,0.943497,0.755043,0.749192,0.757414,0.747363
2,0.682,1.018036,0.764409,0.761356,0.763227,0.760373


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.81      0.73       632
           1       0.81      0.66      0.73       756

    accuracy                           0.73      1388
   macro avg       0.74      0.74      0.73      1388
weighted avg       0.74      0.73      0.73      1388

[[513 119]
 [256 500]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.77      0.66      0.71       632
           1       0.75      0.83      0.79       756

    accuracy                           0.76      1388
   macro avg       0.76      0.75      0.75      1388
weighted avg       0.76      0.76      0.75      1388

[[418 214]
 [126 630]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.72      0.73       632
           1       0.77      0.81      0.79       756

    accuracy                           0.76      1388
   macro avg       0.76      0.76      0.76      1388
weighted avg       0.76      0.76      0.76      1388

[[452 180]
 [147 609]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 03:06:20,868][0m Trial 20 finished with value: 0.7613559108152659 and parameters: {'learning_rate': 9.550187523162385e-06, 'per_device_train_batch_size': 2, 'weight_decay': 7.344308135535765e-11, 'num_train_epochs': 2.391488658266402}. Best is trial 20 with value: 0.7613559108152659.[0m


Trial 20 finished with value: 0.7613559108152659 and parameters: {'learning_rate': 9.550187523162385e-06, 'per_device_train_batch_size': 2, 'weight_decay': 7.344308135535765e-11, 'num_train_epochs': 2.391488658266402}. Best is trial 20 with value: 0.7613559108152659.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8586,0.737413,0.733429,0.733374,0.737001,0.738032
2,0.7946,0.92744,0.757205,0.753283,0.756738,0.751942
2,0.6991,1.024544,0.754323,0.750354,0.753778,0.749037


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.79      0.73       632
           1       0.80      0.69      0.74       756

    accuracy                           0.73      1388
   macro avg       0.74      0.74      0.73      1388
weighted avg       0.74      0.73      0.73      1388

[[499 133]
 [237 519]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.69      0.72       632
           1       0.76      0.81      0.78       756

    accuracy                           0.76      1388
   macro avg       0.76      0.75      0.75      1388
weighted avg       0.76      0.76      0.76      1388

[[438 194]
 [143 613]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.69      0.72       632
           1       0.76      0.81      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[436 196]
 [145 611]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 03:28:07,512][0m Trial 21 finished with value: 0.7503543136600246 and parameters: {'learning_rate': 1.1044057137355722e-05, 'per_device_train_batch_size': 2, 'weight_decay': 2.470141657816699e-11, 'num_train_epochs': 2.640014269888514}. Best is trial 20 with value: 0.7613559108152659.[0m


Trial 21 finished with value: 0.7503543136600246 and parameters: {'learning_rate': 1.1044057137355722e-05, 'per_device_train_batch_size': 2, 'weight_decay': 2.470141657816699e-11, 'num_train_epochs': 2.640014269888514}. Best is trial 20 with value: 0.7613559108152659.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6969,0.694502,0.455331,0.312871,0.227666,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 03:36:24,319][0m Trial 22 pruned. [0m


Trial 22 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6226,0.605707,0.685879,0.674637,0.688281,0.674658


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.70      0.55      0.61       632
           1       0.68      0.80      0.74       756

    accuracy                           0.69      1388
   macro avg       0.69      0.67      0.67      1388
weighted avg       0.69      0.69      0.68      1388

[[347 285]
 [151 605]]


[32m[I 2021-12-08 03:44:43,533][0m Trial 23 pruned. [0m


Trial 23 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7023,0.694631,0.455331,0.312871,0.227666,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 03:53:03,485][0m Trial 24 pruned. [0m


Trial 24 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8453,0.745943,0.732709,0.732705,0.739251,0.738928
2,0.8113,0.944119,0.755764,0.749873,0.758271,0.748024
2,0.8113,0.946383,0.752882,0.748797,0.752411,0.747455


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.81      0.73       632
           1       0.81      0.67      0.73       756

    accuracy                           0.73      1388
   macro avg       0.74      0.74      0.73      1388
weighted avg       0.75      0.73      0.73      1388

[[511 121]
 [250 506]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.77      0.66      0.71       632
           1       0.75      0.83      0.79       756

    accuracy                           0.76      1388
   macro avg       0.76      0.75      0.75      1388
weighted avg       0.76      0.76      0.75      1388

[[418 214]
 [125 631]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.69      0.72       632
           1       0.76      0.81      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[434 198]
 [145 611]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 04:11:21,032][0m Trial 25 finished with value: 0.7487968383534321 and parameters: {'learning_rate': 8.39487800112118e-06, 'per_device_train_batch_size': 2, 'weight_decay': 1.8248832708908834e-06, 'num_train_epochs': 2.153328431655919}. Best is trial 20 with value: 0.7613559108152659.[0m


Trial 25 finished with value: 0.7487968383534321 and parameters: {'learning_rate': 8.39487800112118e-06, 'per_device_train_batch_size': 2, 'weight_decay': 1.8248832708908834e-06, 'num_train_epochs': 2.153328431655919}. Best is trial 20 with value: 0.7613559108152659.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.691977,0.544669,0.352612,0.272334,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 04:17:02,020][0m Trial 26 pruned. [0m


Trial 26 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6856,0.685836,0.556916,0.468917,0.5517,0.524739


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      0.16      0.25       632
           1       0.56      0.88      0.69       756

    accuracy                           0.56      1388
   macro avg       0.55      0.52      0.47      1388
weighted avg       0.55      0.56      0.49      1388

[[104 528]
 [ 87 669]]


[32m[I 2021-12-08 04:23:52,402][0m Trial 27 pruned. [0m


Trial 27 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,2.3388,0.699171,0.544669,0.352612,0.272334,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 04:32:06,552][0m Trial 28 pruned. [0m


Trial 28 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6748,0.653137,0.638329,0.608689,0.649522,0.618679


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.40      0.50       632
           1       0.63      0.84      0.72       756

    accuracy                           0.64      1388
   macro avg       0.65      0.62      0.61      1388
weighted avg       0.65      0.64      0.62      1388

[[252 380]
 [122 634]]


[32m[I 2021-12-08 04:38:14,029][0m Trial 29 pruned. [0m


Trial 29 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.525045,0.755764,0.751249,0.756024,0.749711
1,No log,0.518691,0.747839,0.746726,0.746368,0.747886


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.76      0.68      0.72       632
           1       0.75      0.82      0.78       756

    accuracy                           0.76      1388
   macro avg       0.76      0.75      0.75      1388
weighted avg       0.76      0.76      0.75      1388

[[431 201]
 [138 618]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.71      0.75      0.73       632
           1       0.78      0.75      0.76       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[473 159]
 [191 565]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 04:45:23,038][0m Trial 30 finished with value: 0.746725892559226 and parameters: {'learning_rate': 4.519860073508858e-05, 'per_device_train_batch_size': 16, 'weight_decay': 2.2169028347942e-06, 'num_train_epochs': 1.191412553838028}. Best is trial 20 with value: 0.7613559108152659.[0m


Trial 30 finished with value: 0.746725892559226 and parameters: {'learning_rate': 4.519860073508858e-05, 'per_device_train_batch_size': 16, 'weight_decay': 2.2169028347942e-06, 'num_train_epochs': 1.191412553838028}. Best is trial 20 with value: 0.7613559108152659.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.690739,0.544669,0.352612,0.272334,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 04:51:03,629][0m Trial 31 pruned. [0m


Trial 31 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.534934,0.714697,0.714198,0.729276,0.724211


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.83      0.73       632
           1       0.81      0.62      0.70       756

    accuracy                           0.71      1388
   macro avg       0.73      0.72      0.71      1388
weighted avg       0.74      0.71      0.71      1388

[[525 107]
 [289 467]]


[32m[I 2021-12-08 04:56:43,835][0m Trial 32 pruned. [0m


Trial 32 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.532987,0.728386,0.728047,0.729303,0.731067
2,0.561700,0.521655,0.75,0.744645,0.751127,0.742993


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.76      0.72       632
           1       0.78      0.70      0.74       756

    accuracy                           0.73      1388
   macro avg       0.73      0.73      0.73      1388
weighted avg       0.73      0.73      0.73      1388

[[481 151]
 [226 530]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.76      0.66      0.71       632
           1       0.75      0.82      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.74      0.74      1388
weighted avg       0.75      0.75      0.75      1388

[[420 212]
 [135 621]]


[32m[I 2021-12-08 05:08:01,995][0m Trial 33 pruned. [0m


Trial 33 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6496,0.587607,0.690922,0.676916,0.697435,0.67773


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.72      0.53      0.61       632
           1       0.68      0.83      0.74       756

    accuracy                           0.69      1388
   macro avg       0.70      0.68      0.68      1388
weighted avg       0.70      0.69      0.68      1388

[[335 297]
 [132 624]]


[32m[I 2021-12-08 05:14:07,514][0m Trial 34 pruned. [0m


Trial 34 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.693082,0.544669,0.352612,0.272334,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 05:19:47,568][0m Trial 35 pruned. [0m


Trial 35 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5783,0.539279,0.727666,0.727312,0.741155,0.736764
2,0.5178,0.547909,0.753602,0.750531,0.752111,0.749673
2,0.5175,0.58591,0.755043,0.75199,0.753578,0.751126


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.84      0.74       632
           1       0.82      0.63      0.72       756

    accuracy                           0.73      1388
   macro avg       0.74      0.74      0.73      1388
weighted avg       0.75      0.73      0.73      1388

[[530 102]
 [276 480]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.74      0.71      0.72       632
           1       0.76      0.79      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[446 186]
 [156 600]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.74      0.71      0.72       632
           1       0.76      0.79      0.78       756

    accuracy                           0.76      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.75      0.76      0.75      1388

[[447 185]
 [155 601]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 05:37:46,953][0m Trial 36 finished with value: 0.7519901956456392 and parameters: {'learning_rate': 5.933575828730239e-06, 'per_device_train_batch_size': 4, 'weight_decay': 0.00031550587547210827, 'num_train_epochs': 2.64380532657309}. Best is trial 20 with value: 0.7613559108152659.[0m


Trial 36 finished with value: 0.7519901956456392 and parameters: {'learning_rate': 5.933575828730239e-06, 'per_device_train_batch_size': 4, 'weight_decay': 0.00031550587547210827, 'num_train_epochs': 2.64380532657309}. Best is trial 20 with value: 0.7613559108152659.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6482,0.576452,0.70317,0.699662,0.73913,0.71856


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.89      0.73       632
           1       0.86      0.55      0.67       756

    accuracy                           0.70      1388
   macro avg       0.74      0.72      0.70      1388
weighted avg       0.75      0.70      0.70      1388

[[563  69]
 [343 413]]


[32m[I 2021-12-08 05:43:53,267][0m Trial 37 pruned. [0m


Trial 37 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6802,0.676245,0.567723,0.453754,0.596379,0.531156


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.12      0.20       632
           1       0.56      0.94      0.70       756

    accuracy                           0.57      1388
   macro avg       0.60      0.53      0.45      1388
weighted avg       0.59      0.57      0.48      1388

[[ 77 555]
 [ 45 711]]


[32m[I 2021-12-08 05:50:39,170][0m Trial 38 pruned. [0m


Trial 38 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.689174,0.544669,0.352612,0.272334,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 05:56:19,673][0m Trial 39 pruned. [0m


Trial 39 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.683,0.659126,0.716859,0.716561,0.718084,0.719706


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.75      0.71       632
           1       0.77      0.69      0.73       756

    accuracy                           0.72      1388
   macro avg       0.72      0.72      0.72      1388
weighted avg       0.72      0.72      0.72      1388

[[475 157]
 [236 520]]


[32m[I 2021-12-08 06:04:29,674][0m Trial 40 pruned. [0m


Trial 40 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,64.202,7.347014,0.544669,0.352612,0.272334,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       632
           1       0.54      1.00      0.71       756

    accuracy                           0.54      1388
   macro avg       0.27      0.50      0.35      1388
weighted avg       0.30      0.54      0.38      1388

[[  0 632]
 [  0 756]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 06:11:16,350][0m Trial 41 pruned. [0m


Trial 41 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6091,0.537753,0.744236,0.744236,0.750354,0.750289
2,0.5954,0.712441,0.762968,0.760279,0.761419,0.759569
2,0.507,0.881253,0.746398,0.741658,0.74641,0.740205


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.82      0.74       632
           1       0.82      0.68      0.74       756

    accuracy                           0.74      1388
   macro avg       0.75      0.75      0.74      1388
weighted avg       0.76      0.74      0.74      1388

[[517 115]
 [240 516]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.72      0.73       632
           1       0.77      0.80      0.79       756

    accuracy                           0.76      1388
   macro avg       0.76      0.76      0.76      1388
weighted avg       0.76      0.76      0.76      1388

[[456 176]
 [153 603]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.67      0.71       632
           1       0.75      0.81      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.74      0.74      1388
weighted avg       0.75      0.75      0.74      1388

[[424 208]
 [144 612]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 06:29:27,399][0m Trial 42 finished with value: 0.7416582064297801 and parameters: {'learning_rate': 2.7603341256578127e-05, 'per_device_train_batch_size': 4, 'weight_decay': 0.0197974863223102, 'num_train_epochs': 2.6753126014096016}. Best is trial 20 with value: 0.7613559108152659.[0m


Trial 42 finished with value: 0.7416582064297801 and parameters: {'learning_rate': 2.7603341256578127e-05, 'per_device_train_batch_size': 4, 'weight_decay': 0.0197974863223102, 'num_train_epochs': 2.6753126014096016}. Best is trial 20 with value: 0.7613559108152659.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5808,0.536114,0.723343,0.722716,0.739737,0.733445


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.85      0.74       632
           1       0.83      0.62      0.71       756

    accuracy                           0.72      1388
   macro avg       0.74      0.73      0.72      1388
weighted avg       0.75      0.72      0.72      1388

[[535  97]
 [287 469]]


[32m[I 2021-12-08 06:36:12,796][0m Trial 43 pruned. [0m


Trial 43 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5769,0.536753,0.732709,0.732622,0.735761,0.736982
2,0.5252,0.548988,0.737032,0.73069,0.738609,0.729142


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.78      0.73       632
           1       0.79      0.69      0.74       756

    accuracy                           0.73      1388
   macro avg       0.74      0.74      0.73      1388
weighted avg       0.74      0.73      0.73      1388

[[496 136]
 [235 521]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.64      0.69       632
           1       0.73      0.82      0.77       756

    accuracy                           0.74      1388
   macro avg       0.74      0.73      0.73      1388
weighted avg       0.74      0.74      0.73      1388

[[405 227]
 [138 618]]


[32m[I 2021-12-08 06:49:44,577][0m Trial 44 pruned. [0m


Trial 44 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5936,0.530298,0.744957,0.744914,0.753474,0.752118
2,0.5734,0.645246,0.766571,0.764871,0.76469,0.765082
2,0.4554,0.902964,0.757205,0.753551,0.756413,0.752332


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.83      0.75       632
           1       0.83      0.67      0.74       756

    accuracy                           0.74      1388
   macro avg       0.75      0.75      0.74      1388
weighted avg       0.76      0.74      0.74      1388

[[526 106]
 [248 508]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.74      0.75      0.74       632
           1       0.79      0.78      0.78       756

    accuracy                           0.77      1388
   macro avg       0.76      0.77      0.76      1388
weighted avg       0.77      0.77      0.77      1388

[[473 159]
 [165 591]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.75      0.70      0.72       632
           1       0.76      0.81      0.78       756

    accuracy                           0.76      1388
   macro avg       0.76      0.75      0.75      1388
weighted avg       0.76      0.76      0.76      1388

[[441 191]
 [146 610]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-08 07:09:42,610][0m Trial 45 finished with value: 0.7535510065158645 and parameters: {'learning_rate': 1.6165531171201104e-05, 'per_device_train_batch_size': 4, 'weight_decay': 6.308241593768865e-05, 'num_train_epochs': 2.9562366228147448}. Best is trial 20 with value: 0.7613559108152659.[0m


Trial 45 finished with value: 0.7535510065158645 and parameters: {'learning_rate': 1.6165531171201104e-05, 'per_device_train_batch_size': 4, 'weight_decay': 6.308241593768865e-05, 'num_train_epochs': 2.9562366228147448}. Best is trial 20 with value: 0.7613559108152659.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8471,0.741917,0.752161,0.748739,0.750935,0.747702
1,0.7914,0.929156,0.746398,0.740572,0.747975,0.738907


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.74      0.70      0.72       632
           1       0.76      0.80      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.75      0.75      1388
weighted avg       0.75      0.75      0.75      1388

[[441 191]
 [153 603]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.76      0.66      0.70       632
           1       0.74      0.82      0.78       756

    accuracy                           0.75      1388
   macro avg       0.75      0.74      0.74      1388
weighted avg       0.75      0.75      0.74      1388

[[414 218]
 [134 622]]


[32m[I 2021-12-08 07:21:46,112][0m Trial 46 pruned. [0m


Trial 46 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.546162,0.709654,0.707431,0.73798,0.723214


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.88      0.73       632
           1       0.85      0.57      0.68       756

    accuracy                           0.71      1388
   macro avg       0.74      0.72      0.71      1388
weighted avg       0.75      0.71      0.71      1388

[[553  79]
 [324 432]]


[32m[I 2021-12-08 07:27:26,961][0m Trial 47 pruned. [0m


Trial 47 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.623,0.557966,0.715418,0.713586,0.741564,0.728376


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.87      0.74       632
           1       0.85      0.58      0.69       756

    accuracy                           0.72      1388
   macro avg       0.74      0.73      0.71      1388
weighted avg       0.75      0.72      0.71      1388

[[552  80]
 [315 441]]


[32m[I 2021-12-08 07:33:33,247][0m Trial 48 pruned. [0m


Trial 48 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.5",
  "type_vocab_size

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7016,0.693869,0.455331,0.312871,0.227666,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1388
  Batch size = 8


              precision    recall  f1-score   support

           0       0.46      1.00      0.63       632
           1       0.00      0.00      0.00       756

    accuracy                           0.46      1388
   macro avg       0.23      0.50      0.31      1388
weighted avg       0.21      0.46      0.28      1388

[[632   0]
 [756   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-08 07:41:42,399][0m Trial 49 pruned. [0m


Trial 49 pruned. 


[32m[I 2021-12-08 07:41:42,450][0m A new study created in memory with name: no-name-ce91c0fc-4eae-400e-9f62-3be4727526a8[0m


A new study created in memory with name: no-name-ce91c0fc-4eae-400e-9f62-3be4727526a8


In [12]:
storage_name

'sqlite:///SIMCSE_BERT_MASK_TF.db'

In [13]:
study_name

'SIMCSE_BERT_MASK_TF'

In [14]:
study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True, direction="maximize")
df = study.trials_dataframe(attrs=("number", "value", "params", "state"))

[32m[I 2021-12-08 07:41:42,572][0m Using an existing study with name 'SIMCSE_BERT_MASK_TF' instead of creating a new one.[0m


Using an existing study with name 'SIMCSE_BERT_MASK_TF' instead of creating a new one.


In [15]:
df

Unnamed: 0,number,value,params_learning_rate,params_num_train_epochs,params_per_device_train_batch_size,params_weight_decay,state
0,0,0.70153,5.113165e-07,6.905193,8,0.0004337244,COMPLETE
1,1,0.727609,2.895712e-05,4.033449,8,1.110801e-09,COMPLETE
2,2,0.752182,0.0001038324,2.169266,16,4.717749e-07,COMPLETE
3,3,0.312871,0.06032013,3.150463,4,5.597112e-09,COMPLETE
4,4,0.352612,0.0003416136,5.336966,8,0.01925918,COMPLETE
5,5,0.499189,3.405867e-07,1.906998,4,1.597199e-09,PRUNED
6,6,0.312871,0.1309907,2.800622,4,0.0001297187,PRUNED
7,7,0.312871,0.0110963,3.710493,8,3.778855e-08,PRUNED
8,8,0.47652,9.091989e-08,4.166444,16,5.388756e-07,PRUNED
9,9,0.739129,3.515249e-06,7.852373,2,0.0001586525,COMPLETE


In [16]:
fig = optuna.visualization.plot_param_importances(study)
fig.show()

In [17]:
best_run

BestRun(run_id='20', objective=0.7613559108152659, hyperparameters={'learning_rate': 9.550187523162385e-06, 'num_train_epochs': 2.391488658266402, 'per_device_train_batch_size': 2, 'weight_decay': 7.344308135535765e-11})

In [18]:
optuna.visualization.plot_intermediate_values(study)

In [19]:
optuna.visualization.plot_parallel_coordinate(study)

In [20]:
optuna.visualization.plot_optimization_history(study)

In [21]:
optuna.visualization.plot_contour(study)

In [22]:
optuna.visualization.plot_slice(study)

In [23]:
optuna.visualization.plot_edf(study)