# Initiliation

In [1]:
!pip install transformers datasets --quiet

In [2]:
from transformers import TrainingArguments
from transformers import Trainer
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, classification_report, confusion_matrix
from datasets import Dataset
from datasets import load_metric

import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from google.colab import drive

# Data Preparation

In [3]:
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [4]:
%cd 'drive/MyDrive/Masterarbeit/Colab Notebooks/OVERVIEW myPers/00_Datasets/URL'

/content/drive/.shortcut-targets-by-id/1aHXlqhpj1STohhfU4gn53D4whaLH__Jz/Masterarbeit/Colab Notebooks/OVERVIEW myPers/00_Datasets/URL


In [5]:
dfPJ = pd.read_csv('myPers_CON_URL.csv', sep=",", error_bad_lines=False)
dfPJ

Unnamed: 0,text,label
0,likes the sound of thunder,0
1,is so sleepy its not even funny thats ...,0
2,is sore and wants the knot of muscles ...,0
3,likes how the day sounds in this new s...,0
4,is home 3,0
...,...,...
9912,little things give you away,0
9913,is wishing it was saturday,1
9914,is studying hard for the gre,1
9915,snipers get more head,0


In [6]:
df_clean= []
for i,row in dfPJ.iterrows():
  df_clean.append({
      'text': str(row['text']),
      'label': int(row['label']),
  })

dfPJ = pd.DataFrame(df_clean)
dfPJ

Unnamed: 0,text,label
0,likes the sound of thunder,0
1,is so sleepy its not even funny thats ...,0
2,is sore and wants the knot of muscles ...,0
3,likes how the day sounds in this new s...,0
4,is home 3,0
...,...,...
9912,little things give you away,0
9913,is wishing it was saturday,1
9914,is studying hard for the gre,1
9915,snipers get more head,0


# Model Training

In [7]:
modeltype = "princeton-nlp/sup-simcse-bert-base-uncased"

In [8]:
train, test = train_test_split(dfPJ, test_size=0.2, random_state=0, stratify=dfPJ.label)

train = Dataset.from_pandas(train)
test = Dataset.from_pandas(test)

tokenizer = AutoTokenizer.from_pretrained(modeltype)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_train = train.map(tokenize_function, batched=True)
tokenized_test = test.map(tokenize_function, batched=True)

full_train_dataset = tokenized_train
full_eval_dataset = tokenized_test

model = AutoModelForSequenceClassification.from_pretrained(modeltype, num_labels=2)

training_args = TrainingArguments(
    "SIMCSE_BERT_CON", 
    evaluation_strategy="epoch",
    save_strategy = 'no',
    save_steps = 100000,
    save_total_limit = 1,
    metric_for_best_model="eval_f1")

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    print(classification_report(labels, preds, labels=[0,1]))
    print(confusion_matrix(labels,preds))
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

  0%|          | 0/8 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

Downloading:   0%|          | 0.00/418M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Hyperparameter Optimization

In [9]:
! pip install optuna --quiet

[K     |████████████████████████████████| 308 kB 4.4 MB/s 
[K     |████████████████████████████████| 209 kB 48.0 MB/s 
[K     |████████████████████████████████| 80 kB 8.2 MB/s 
[K     |████████████████████████████████| 75 kB 4.5 MB/s 
[K     |████████████████████████████████| 149 kB 38.0 MB/s 
[K     |████████████████████████████████| 49 kB 6.1 MB/s 
[K     |████████████████████████████████| 112 kB 48.2 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [10]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(modeltype, num_labels=2)

In [11]:
trainer = Trainer(
      model_init=model_init,
      args=training_args, 
      train_dataset=full_train_dataset, 
      eval_dataset=full_eval_dataset,
      compute_metrics=compute_metrics 
  )

loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file

In [12]:
import sklearn.metrics as metrics
import optuna
import sys
import logging

def objective (metrics):
  return metrics['eval_f1']

def hyperparameter_space(trial):

    return {
        "learning_rate": trial.suggest_float("learning_rate", 5e-8, 5e-1, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [2, 4, 8, 16]),
        "weight_decay": trial.suggest_float("weight_decay", 5e-12, 5e-1, log=True),
        "num_train_epochs": trial.suggest_float("num_train_epochs",1,8,log=True),
        #"adam_epsilon": trial.suggest_float("adam_epsilon", 1e-10, 1e-6, log=True),
        #"seed" : trial.suggest_float("seed",10,60,log=True)
        }

optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = "SIMCSE_BERT_myPers_CON"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)

best_run = trainer.hyperparameter_search(hp_space=hyperparameter_space,compute_objective=objective, n_trials=50, direction="maximize",study_name=study_name, storage=storage_name )

study = optuna.create_study()

[32m[I 2021-12-24 13:54:33,065][0m A new study created in RDB with name: SIMCSE_BERT_myPers_CON[0m


A new study created in RDB with name: SIMCSE_BERT_myPers_CON


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6895,0.676174,0.577117,0.532808,0.577397,0.555676
2,0.6622,0.669825,0.582157,0.564175,0.57702,0.569368
3,0.6337,0.672615,0.587702,0.566374,0.583917,0.573416
4,0.6175,0.672099,0.587198,0.576935,0.582154,0.578336
4,0.6098,0.67204,0.585685,0.57559,0.58057,0.576938


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.82      0.68      1073
           1       0.58      0.29      0.39       911

    accuracy                           0.58      1984
   macro avg       0.58      0.56      0.53      1984
weighted avg       0.58      0.58      0.54      1984

[[878 195]
 [644 267]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.73      0.65      1073
           1       0.56      0.41      0.48       911

    accuracy                           0.58      1984
   macro avg       0.58      0.57      0.56      1984
weighted avg       0.58      0.58      0.57      1984

[[779 294]
 [535 376]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.75      0.66      1073
           1       0.57      0.40      0.47       911

    accuracy                           0.59      1984
   macro avg       0.58      0.57      0.57      1984
weighted avg       0.58      0.59      0.57      1984

[[803 270]
 [548 363]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.69      0.64      1073
           1       0.56      0.47      0.51       911

    accuracy                           0.59      1984
   macro avg       0.58      0.58      0.58      1984
weighted avg       0.58      0.59      0.58      1984

[[737 336]
 [483 428]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.68      0.64      1073
           1       0.56      0.47      0.51       911

    accuracy                           0.59      1984
   macro avg       0.58      0.58      0.58      1984
weighted avg       0.58      0.59      0.58      1984

[[734 339]
 [483 428]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 14:31:00,621][0m Trial 0 finished with value: 0.5755895716984766 and parameters: {'learning_rate': 3.001811005079923e-06, 'per_device_train_batch_size': 8, 'weight_decay': 7.674193134586628e-12, 'num_train_epochs': 4.105974207913222}. Best is trial 0 with value: 0.5755895716984766.[0m


Trial 0 finished with value: 0.5755895716984766 and parameters: {'learning_rate': 3.001811005079923e-06, 'per_device_train_batch_size': 8, 'weight_decay': 7.674193134586628e-12, 'num_train_epochs': 4.105974207913222}. Best is trial 0 with value: 0.5755895716984766.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.687,0.686791,0.546875,0.401032,0.554931,0.509984
2,0.6827,0.683887,0.549395,0.427323,0.551564,0.515131
3,0.6827,0.681146,0.557964,0.467166,0.561788,0.527776
4,0.6728,0.680281,0.558972,0.47612,0.560808,0.530034
4,0.6685,0.680168,0.557964,0.476413,0.558236,0.529267


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.96      0.70      1073
           1       0.56      0.06      0.11       911

    accuracy                           0.55      1984
   macro avg       0.55      0.51      0.40      1984
weighted avg       0.55      0.55      0.43      1984

[[1032   41]
 [ 858   53]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.93      0.69      1073
           1       0.55      0.10      0.16       911

    accuracy                           0.55      1984
   macro avg       0.55      0.52      0.43      1984
weighted avg       0.55      0.55      0.45      1984

[[1003   70]
 [ 824   87]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.56      0.90      0.69      1073
           1       0.57      0.16      0.25       911

    accuracy                           0.56      1984
   macro avg       0.56      0.53      0.47      1984
weighted avg       0.56      0.56      0.49      1984

[[963 110]
 [767 144]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.56      0.88      0.68      1073
           1       0.56      0.18      0.27       911

    accuracy                           0.56      1984
   macro avg       0.56      0.53      0.48      1984
weighted avg       0.56      0.56      0.49      1984

[[949 124]
 [751 160]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.56      0.88      0.68      1073
           1       0.56      0.18      0.27       911

    accuracy                           0.56      1984
   macro avg       0.56      0.53      0.48      1984
weighted avg       0.56      0.56      0.49      1984

[[945 128]
 [749 162]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 15:21:48,732][0m Trial 1 finished with value: 0.47641271112766825 and parameters: {'learning_rate': 4.051153876605337e-07, 'per_device_train_batch_size': 2, 'weight_decay': 0.005144718788100126, 'num_train_epochs': 4.350658126020607}. Best is trial 0 with value: 0.5755895716984766.[0m


Trial 1 finished with value: 0.47641271112766825 and parameters: {'learning_rate': 4.051153876605337e-07, 'per_device_train_batch_size': 2, 'weight_decay': 0.005144718788100126, 'num_train_epochs': 4.350658126020607}. Best is trial 0 with value: 0.5755895716984766.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7062,0.697387,0.540827,0.350998,0.270413,0.5
2,0.6968,0.689886,0.540827,0.350998,0.270413,0.5
3,0.6926,0.695789,0.540827,0.350998,0.270413,0.5
4,0.692,0.690156,0.540827,0.350998,0.270413,0.5
5,0.6922,0.69114,0.540827,0.350998,0.270413,0.5
6,0.6918,0.689811,0.540827,0.350998,0.270413,0.5
6,0.6898,0.689829,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 16:17:43,346][0m Trial 2 finished with value: 0.35099771017337256 and parameters: {'learning_rate': 9.297904565433019e-05, 'per_device_train_batch_size': 8, 'weight_decay': 3.1036978723744258e-06, 'num_train_epochs': 6.435027674872897}. Best is trial 0 with value: 0.57558957169

Trial 2 finished with value: 0.35099771017337256 and parameters: {'learning_rate': 9.297904565433019e-05, 'per_device_train_batch_size': 8, 'weight_decay': 3.1036978723744258e-06, 'num_train_epochs': 6.435027674872897}. Best is trial 0 with value: 0.5755895716984766.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6812,0.743208,0.59627,0.570385,0.595569,0.580095
1,0.6748,1.195366,0.586694,0.584742,0.58469,0.584997


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.78      0.68      1073
           1       0.59      0.38      0.46       911

    accuracy                           0.60      1984
   macro avg       0.60      0.58      0.57      1984
weighted avg       0.60      0.60      0.58      1984

[[835 238]
 [563 348]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.61      0.61      1073
           1       0.55      0.56      0.56       911

    accuracy                           0.59      1984
   macro avg       0.58      0.58      0.58      1984
weighted avg       0.59      0.59      0.59      1984

[[650 423]
 [397 514]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 16:38:25,710][0m Trial 3 finished with value: 0.5847423017234338 and parameters: {'learning_rate': 1.1896155685382957e-05, 'per_device_train_batch_size': 2, 'weight_decay': 7.353255948601793e-05, 'num_train_epochs': 1.769989961577153}. Best is trial 3 with value: 0.5847423017234338.[0m


Trial 3 finished with value: 0.5847423017234338 and parameters: {'learning_rate': 1.1896155685382957e-05, 'per_device_train_batch_size': 2, 'weight_decay': 7.353255948601793e-05, 'num_train_epochs': 1.769989961577153}. Best is trial 3 with value: 0.5847423017234338.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7781,0.878349,0.540827,0.350998,0.270413,0.5
2,0.7486,0.765447,0.459173,0.31468,0.229587,0.5
3,0.7389,0.754298,0.540827,0.350998,0.270413,0.5
3,0.7035,0.689861,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1073
           1       0.46      1.00      0.63       911

    accuracy                           0.46      1984
   macro avg       0.23      0.50      0.31      1984
weighted avg       0.21      0.46      0.29      1984

[[   0 1073]
 [   0  911]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 17:09:48,787][0m Trial 4 finished with value: 0.35099771017337256 and parameters: {'learning_rate': 0.0014973653237991646, 'per_device_train_batch_size': 8, 'weight_decay': 3.895097167361002e-10, 'num_train_epochs': 3.5990224614592354}. Best is trial 3 with value: 0.58474230172

Trial 4 finished with value: 0.35099771017337256 and parameters: {'learning_rate': 0.0014973653237991646, 'per_device_train_batch_size': 8, 'weight_decay': 3.895097167361002e-10, 'num_train_epochs': 3.5990224614592354}. Best is trial 3 with value: 0.5847423017234338.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,7.2473,2.348345,0.459173,0.31468,0.229587,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1073
           1       0.46      1.00      0.63       911

    accuracy                           0.46      1984
   macro avg       0.23      0.50      0.31      1984
weighted avg       0.21      0.46      0.29      1984

[[   0 1073]
 [   0  911]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-24 17:21:30,964][0m Trial 5 pruned. [0m


Trial 5 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.687427,0.546875,0.37382,0.622262,0.507332


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      0.99      0.70      1073
           1       0.70      0.02      0.04       911

    accuracy                           0.55      1984
   macro avg       0.62      0.51      0.37      1984
weighted avg       0.62      0.55      0.40      1984

[[1064    9]
 [ 890   21]]


[32m[I 2021-12-24 17:29:37,514][0m Trial 6 pruned. [0m


Trial 6 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6817,0.680258,0.587198,0.577865,0.582216,0.578916
1,0.6561,0.724124,0.589718,0.588056,0.588047,0.588455


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.68      0.64      1073
           1       0.56      0.48      0.52       911

    accuracy                           0.59      1984
   macro avg       0.58      0.58      0.58      1984
weighted avg       0.58      0.59      0.58      1984

[[730 343]
 [476 435]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.60      0.61      1073
           1       0.55      0.57      0.56       911

    accuracy                           0.59      1984
   macro avg       0.59      0.59      0.59      1984
weighted avg       0.59      0.59      0.59      1984

[[648 425]
 [389 522]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 17:41:00,056][0m Trial 7 finished with value: 0.5880562598523612 and parameters: {'learning_rate': 4.1697269575826184e-05, 'per_device_train_batch_size': 4, 'weight_decay': 0.00013037742311561838, 'num_train_epochs': 1.1137190633204128}. Best is trial 7 with value: 0.5880562598523612.[0m


Trial 7 finished with value: 0.5880562598523612 and parameters: {'learning_rate': 4.1697269575826184e-05, 'per_device_train_batch_size': 4, 'weight_decay': 0.00013037742311561838, 'num_train_epochs': 1.1137190633204128}. Best is trial 7 with value: 0.5880562598523612.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6892,0.668207,0.59375,0.556921,0.597405,0.574036
2,0.5858,0.699048,0.603327,0.598961,0.59975,0.5988
3,0.3902,0.939743,0.619456,0.615509,0.616218,0.615286
4,0.2168,1.522601,0.611391,0.606328,0.607735,0.606173
4,0.1393,1.841937,0.612399,0.609633,0.609684,0.60959


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.82      0.68      1073
           1       0.60      0.33      0.43       911

    accuracy                           0.59      1984
   macro avg       0.60      0.57      0.56      1984
weighted avg       0.60      0.59      0.57      1984

[[875 198]
 [608 303]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.65      0.64      1073
           1       0.57      0.54      0.56       911

    accuracy                           0.60      1984
   macro avg       0.60      0.60      0.60      1984
weighted avg       0.60      0.60      0.60      1984

[[702 371]
 [416 495]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.67      0.65      1073
           1       0.59      0.56      0.58       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[715 358]
 [397 514]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.67      0.65      1073
           1       0.58      0.54      0.56       911

    accuracy                           0.61      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.61      0.61      0.61      1984

[[719 354]
 [417 494]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.64      0.64      1073
           1       0.58      0.58      0.58       911

    accuracy                           0.61      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.61      0.61      0.61      1984

[[691 382]
 [387 524]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 18:24:12,123][0m Trial 8 finished with value: 0.6096333839682916 and parameters: {'learning_rate': 1.3038107443645101e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.000269454837271572, 'num_train_epochs': 4.981085904972722}. Best is trial 8 with value: 0.6096333839682916.[0m


Trial 8 finished with value: 0.6096333839682916 and parameters: {'learning_rate': 1.3038107443645101e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.000269454837271572, 'num_train_epochs': 4.981085904972722}. Best is trial 8 with value: 0.6096333839682916.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6895,0.689548,0.537802,0.437731,0.515955,0.506733


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      0.89      0.67      1073
           1       0.49      0.13      0.20       911

    accuracy                           0.54      1984
   macro avg       0.52      0.51      0.44      1984
weighted avg       0.52      0.54      0.46      1984

[[952 121]
 [796 115]]


[32m[I 2021-12-24 18:33:53,444][0m Trial 9 pruned. [0m


Trial 9 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,3.585353,0.459173,0.31468,0.229587,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1073
           1       0.46      1.00      0.63       911

    accuracy                           0.46      1984
   macro avg       0.23      0.50      0.31      1984
weighted avg       0.21      0.46      0.29      1984

[[   0 1073]
 [   0  911]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-24 18:41:59,090][0m Trial 10 pruned. [0m


Trial 10 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6791,0.66928,0.581653,0.551554,0.578857,0.564261
2,0.6218,0.680146,0.59123,0.579526,0.58643,0.581484
2,0.6152,0.680223,0.588206,0.576994,0.583197,0.578771


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.78      0.67      1073
           1       0.57      0.35      0.44       911

    accuracy                           0.58      1984
   macro avg       0.58      0.56      0.55      1984
weighted avg       0.58      0.58      0.56      1984

[[834 239]
 [591 320]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.70      0.65      1073
           1       0.57      0.46      0.51       911

    accuracy                           0.59      1984
   macro avg       0.59      0.58      0.58      1984
weighted avg       0.59      0.59      0.59      1984

[[752 321]
 [490 421]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.69      0.65      1073
           1       0.56      0.46      0.51       911

    accuracy                           0.59      1984
   macro avg       0.58      0.58      0.58      1984
weighted avg       0.58      0.59      0.58      1984

[[745 328]
 [489 422]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 19:03:03,316][0m Trial 11 finished with value: 0.5769940293747767 and parameters: {'learning_rate': 5.5476791465127605e-06, 'per_device_train_batch_size': 4, 'weight_decay': 0.0021245536579701852, 'num_train_epochs': 2.1313568888422285}. Best is trial 8 with value: 0.6096333839682916.[0m


Trial 11 finished with value: 0.5769940293747767 and parameters: {'learning_rate': 5.5476791465127605e-06, 'per_device_train_batch_size': 4, 'weight_decay': 0.0021245536579701852, 'num_train_epochs': 2.1313568888422285}. Best is trial 8 with value: 0.6096333839682916.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7152,0.689855,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-24 19:12:41,939][0m Trial 12 pruned. [0m


Trial 12 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6947,0.669092,0.588206,0.532103,0.600931,0.56369


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.86      0.69      1073
           1       0.62      0.26      0.37       911

    accuracy                           0.59      1984
   macro avg       0.60      0.56      0.53      1984
weighted avg       0.60      0.59      0.55      1984

[[927 146]
 [671 240]]


[32m[I 2021-12-24 19:21:23,732][0m Trial 13 pruned. [0m


Trial 13 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7395,0.767081,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-24 19:31:02,224][0m Trial 14 pruned. [0m


Trial 14 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6871,0.682314,0.553931,0.454513,0.555496,0.522556


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.91      0.69      1073
           1       0.56      0.14      0.22       911

    accuracy                           0.55      1984
   macro avg       0.56      0.52      0.45      1984
weighted avg       0.56      0.55      0.47      1984

[[973 100]
 [785 126]]


[32m[I 2021-12-24 19:40:40,745][0m Trial 15 pruned. [0m


Trial 15 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,31.2087,10.578556,0.459173,0.31468,0.229587,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1073
           1       0.46      1.00      0.63       911

    accuracy                           0.46      1984
   macro avg       0.23      0.50      0.31      1984
weighted avg       0.21      0.46      0.29      1984

[[   0 1073]
 [   0  911]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-24 19:49:23,115][0m Trial 16 pruned. [0m


Trial 16 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.662371,0.591734,0.578774,0.587052,0.581287
2,0.677500,0.679044,0.597782,0.597635,0.599739,0.60022
3,0.579700,0.840292,0.623488,0.617164,0.620019,0.617108
4,0.388200,1.114323,0.615927,0.609831,0.612235,0.609786
5,0.214900,1.328643,0.622984,0.618116,0.619584,0.617885
5,0.214900,1.328692,0.622984,0.618116,0.619584,0.617885


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.71      0.65      1073
           1       0.57      0.45      0.50       911

    accuracy                           0.59      1984
   macro avg       0.59      0.58      0.58      1984
weighted avg       0.59      0.59      0.58      1984

[[761 312]
 [498 413]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.57      0.61      1073
           1       0.55      0.63      0.59       911

    accuracy                           0.60      1984
   macro avg       0.60      0.60      0.60      1984
weighted avg       0.60      0.60      0.60      1984

[[612 461]
 [337 574]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.70      0.67      1073
           1       0.60      0.54      0.57       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[746 327]
 [420 491]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.68      0.66      1073
           1       0.59      0.53      0.56       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.61      0.62      0.61      1984

[[735 338]
 [424 487]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.68      0.66      1073
           1       0.60      0.56      0.58       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[730 343]
 [405 506]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.68      0.66      1073
           1       0.60      0.56      0.58       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[730 343]
 [405 506]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 20:30:21,762][0m Trial 17 finished with value: 0.6181159420289855 and parameters: {'learning_rate': 1.7443883599512133e-05, 'per_device_train_batch_size': 16, 'weight_decay': 2.4793361471561817e-07, 'num_train_epochs': 5.009529088071898}. Best is trial 17 with value: 0.6181159420289855.[0m


Trial 17 finished with value: 0.6181159420289855 and parameters: {'learning_rate': 1.7443883599512133e-05, 'per_device_train_batch_size': 16, 'weight_decay': 2.4793361471561817e-07, 'num_train_epochs': 5.009529088071898}. Best is trial 17 with value: 0.6181159420289855.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.68913,0.539315,0.438112,0.519429,0.508049


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.89      0.68      1073
           1       0.49      0.13      0.20       911

    accuracy                           0.54      1984
   macro avg       0.52      0.51      0.44      1984
weighted avg       0.52      0.54      0.46      1984

[[956 117]
 [797 114]]


[32m[I 2021-12-24 20:38:27,722][0m Trial 18 pruned. [0m


Trial 18 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.66251,0.59123,0.57771,0.586557,0.58049
2,0.677800,0.677684,0.596774,0.596626,0.598729,0.599205
3,0.584100,0.835534,0.620464,0.615429,0.616989,0.615223
4,0.391000,1.24804,0.613911,0.605363,0.610214,0.605934
5,0.198000,1.698585,0.620464,0.617556,0.61769,0.617461
6,0.106800,2.191003,0.616935,0.610552,0.61326,0.610553
7,0.059200,2.40376,0.620464,0.615248,0.616965,0.615057
7,0.042000,2.461443,0.61744,0.609146,0.613886,0.60961


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.71      0.65      1073
           1       0.57      0.45      0.50       911

    accuracy                           0.59      1984
   macro avg       0.59      0.58      0.58      1984
weighted avg       0.59      0.59      0.58      1984

[[764 309]
 [502 409]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.57      0.60      1073
           1       0.55      0.63      0.59       911

    accuracy                           0.60      1984
   macro avg       0.60      0.60      0.60      1984
weighted avg       0.60      0.60      0.60      1984

[[611 462]
 [338 573]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.68      0.66      1073
           1       0.59      0.55      0.57       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[729 344]
 [409 502]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.70      0.66      1073
           1       0.59      0.51      0.55       911

    accuracy                           0.61      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.61      0.61      0.61      1984

[[755 318]
 [448 463]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.65      0.65      1073
           1       0.59      0.58      0.58       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[702 371]
 [382 529]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.69      0.66      1073
           1       0.59      0.53      0.56       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.61      0.62      0.61      1984

[[739 334]
 [426 485]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.68      0.66      1073
           1       0.59      0.55      0.57       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[731 342]
 [411 500]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.71      0.67      1073
           1       0.60      0.51      0.55       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.62      0.62      0.61      1984

[[757 316]
 [443 468]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 21:40:06,330][0m Trial 19 finished with value: 0.609146229886042 and parameters: {'learning_rate': 1.5980630089189062e-05, 'per_device_train_batch_size': 16, 'weight_decay': 2.8298928350252083e-09, 'num_train_epochs': 7.625796721169922}. Best is trial 17 with value: 0.6181159420289855.[0m


Trial 19 finished with value: 0.609146229886042 and parameters: {'learning_rate': 1.5980630089189062e-05, 'per_device_train_batch_size': 16, 'weight_decay': 2.8298928350252083e-09, 'num_train_epochs': 7.625796721169922}. Best is trial 17 with value: 0.6181159420289855.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.683889,0.553427,0.456457,0.553325,0.522422


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.90      0.69      1073
           1       0.55      0.14      0.23       911

    accuracy                           0.55      1984
   macro avg       0.55      0.52      0.46      1984
weighted avg       0.55      0.55      0.48      1984

[[968 105]
 [781 130]]


[32m[I 2021-12-24 21:48:13,839][0m Trial 20 pruned. [0m


Trial 20 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.662432,0.590222,0.576351,0.585495,0.579309
2,0.677600,0.679711,0.592238,0.592192,0.595055,0.595343
3,0.580200,0.855955,0.620464,0.616038,0.617105,0.615803
4,0.376400,1.339108,0.615927,0.607542,0.612311,0.608046
5,0.181800,1.847441,0.621472,0.617058,0.618131,0.616818
6,0.097700,2.326732,0.616935,0.613465,0.613856,0.613287
7,0.055500,2.557858,0.620464,0.613987,0.6169,0.61398
7,0.040000,2.620677,0.620968,0.61505,0.617426,0.614943


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.71      0.65      1073
           1       0.57      0.45      0.50       911

    accuracy                           0.59      1984
   macro avg       0.59      0.58      0.58      1984
weighted avg       0.59      0.59      0.58      1984

[[765 308]
 [505 406]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.56      0.60      1073
           1       0.55      0.63      0.59       911

    accuracy                           0.59      1984
   macro avg       0.60      0.60      0.59      1984
weighted avg       0.60      0.59      0.59      1984

[[598 475]
 [334 577]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.67      0.66      1073
           1       0.59      0.56      0.57       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[722 351]
 [402 509]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.70      0.66      1073
           1       0.60      0.51      0.55       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.61      0.62      0.61      1984

[[756 317]
 [445 466]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.67      0.66      1073
           1       0.59      0.56      0.58       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[723 350]
 [401 510]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.66      0.65      1073
           1       0.59      0.57      0.58       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.62      0.62      0.62      1984

[[706 367]
 [393 518]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.69      0.66      1073
           1       0.60      0.53      0.56       911

    accuracy                           0.62      1984
   macro avg       0.62      0.61      0.61      1984
weighted avg       0.62      0.62      0.62      1984

[[744 329]
 [424 487]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.69      0.66      1073
           1       0.60      0.54      0.57       911

    accuracy                           0.62      1984
   macro avg       0.62      0.61      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[739 334]
 [418 493]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-24 22:52:35,459][0m Trial 21 finished with value: 0.6150495131252355 and parameters: {'learning_rate': 1.6787980830681923e-05, 'per_device_train_batch_size': 16, 'weight_decay': 8.867832943566768e-10, 'num_train_epochs': 7.976258653833072}. Best is trial 17 with value: 0.6181159420289855.[0m


Trial 21 finished with value: 0.6150495131252355 and parameters: {'learning_rate': 1.6787980830681923e-05, 'per_device_train_batch_size': 16, 'weight_decay': 8.867832943566768e-10, 'num_train_epochs': 7.976258653833072}. Best is trial 17 with value: 0.6181159420289855.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.703859,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-24 23:00:43,133][0m Trial 22 pruned. [0m


Trial 22 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.689965,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-24 23:08:50,457][0m Trial 23 pruned. [0m


Trial 23 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.671535,0.580141,0.564757,0.574586,0.568581
2,0.682500,0.667402,0.594254,0.577507,0.590387,0.581877


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.71      0.65      1073
           1       0.56      0.43      0.48       911

    accuracy                           0.58      1984
   macro avg       0.57      0.57      0.56      1984
weighted avg       0.58      0.58      0.57      1984

[[762 311]
 [522 389]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.73      0.66      1073
           1       0.58      0.43      0.49       911

    accuracy                           0.59      1984
   macro avg       0.59      0.58      0.58      1984
weighted avg       0.59      0.59      0.58      1984

[[787 286]
 [519 392]]


[32m[I 2021-12-24 23:25:01,737][0m Trial 24 pruned. [0m


Trial 24 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6921,0.684624,0.584677,0.532997,0.592288,0.561257


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.85      0.69      1073
           1       0.61      0.27      0.38       911

    accuracy                           0.58      1984
   macro avg       0.59      0.56      0.53      1984
weighted avg       0.59      0.58      0.55      1984

[[910 163]
 [661 250]]


[32m[I 2021-12-24 23:33:46,226][0m Trial 25 pruned. [0m


Trial 25 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.689811,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-24 23:41:52,691][0m Trial 26 pruned. [0m


Trial 26 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.68692,0.551915,0.434185,0.558068,0.518124


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.93      0.69      1073
           1       0.57      0.10      0.18       911

    accuracy                           0.55      1984
   macro avg       0.56      0.52      0.43      1984
weighted avg       0.56      0.55      0.46      1984

[[1000   73]
 [ 816   95]]


[32m[I 2021-12-24 23:49:59,081][0m Trial 27 pruned. [0m


Trial 27 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6895,0.675614,0.575605,0.534126,0.574286,0.55494


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.81      0.67      1073
           1       0.57      0.30      0.40       911

    accuracy                           0.58      1984
   macro avg       0.57      0.55      0.53      1984
weighted avg       0.57      0.58      0.55      1984

[[867 206]
 [636 275]]


[32m[I 2021-12-24 23:58:42,515][0m Trial 28 pruned. [0m


Trial 28 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6895,0.676969,0.573589,0.525093,0.573747,0.551171


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.57      0.83      0.68      1073
           1       0.57      0.28      0.37       911

    accuracy                           0.57      1984
   macro avg       0.57      0.55      0.53      1984
weighted avg       0.57      0.57      0.54      1984

[[886 187]
 [659 252]]


[32m[I 2021-12-25 00:07:25,989][0m Trial 29 pruned. [0m


Trial 29 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,2.1405,2.039953,0.459173,0.31468,0.229587,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1073
           1       0.46      1.00      0.63       911

    accuracy                           0.46      1984
   macro avg       0.23      0.50      0.31      1984
weighted avg       0.21      0.46      0.29      1984

[[   0 1073]
 [   0  911]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-25 00:19:02,980][0m Trial 30 pruned. [0m


Trial 30 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.662799,0.589214,0.575784,0.584362,0.578543
2,0.677900,0.675875,0.597278,0.597089,0.598987,0.599506
3,0.589000,0.813987,0.622984,0.618462,0.619644,0.618216
4,0.407200,1.172947,0.618952,0.610157,0.61555,0.610759
5,0.215400,1.611839,0.621976,0.61825,0.61884,0.61803
6,0.115000,2.128928,0.61744,0.610911,0.613778,0.610936
7,0.065400,2.369501,0.618448,0.613476,0.614943,0.613276
7,0.045500,2.445127,0.618448,0.612924,0.614868,0.612779


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.71      0.65      1073
           1       0.57      0.45      0.50       911

    accuracy                           0.59      1984
   macro avg       0.58      0.58      0.58      1984
weighted avg       0.59      0.59      0.58      1984

[[761 312]
 [503 408]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.57      0.61      1073
           1       0.55      0.63      0.59       911

    accuracy                           0.60      1984
   macro avg       0.60      0.60      0.60      1984
weighted avg       0.60      0.60      0.60      1984

[[614 459]
 [340 571]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.68      0.66      1073
           1       0.60      0.56      0.58       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[726 347]
 [401 510]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.71      0.67      1073
           1       0.60      0.51      0.55       911

    accuracy                           0.62      1984
   macro avg       0.62      0.61      0.61      1984
weighted avg       0.62      0.62      0.61      1984

[[763 310]
 [446 465]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.67      0.66      1073
           1       0.59      0.57      0.58       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[715 358]
 [392 519]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.69      0.66      1073
           1       0.59      0.53      0.56       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.62      0.62      0.62      1984

[[741 332]
 [427 484]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.68      0.66      1073
           1       0.59      0.55      0.57       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.62      0.62      0.62      1984

[[726 347]
 [410 501]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.68      0.66      1073
           1       0.59      0.54      0.57       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.62      0.62      0.62      1984

[[732 341]
 [416 495]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-25 01:22:29,991][0m Trial 31 finished with value: 0.6129241431957582 and parameters: {'learning_rate': 1.4999439729345387e-05, 'per_device_train_batch_size': 16, 'weight_decay': 7.033090376529723e-09, 'num_train_epochs': 7.880822380170101}. Best is trial 17 with value: 0.6181159420289855.[0m


Trial 31 finished with value: 0.6129241431957582 and parameters: {'learning_rate': 1.4999439729345387e-05, 'per_device_train_batch_size': 16, 'weight_decay': 7.033090376529723e-09, 'num_train_epochs': 7.880822380170101}. Best is trial 17 with value: 0.6181159420289855.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.662681,0.589214,0.576251,0.58433,0.578792
2,0.677800,0.676412,0.599294,0.599088,0.600909,0.601452
3,0.587000,0.81418,0.621976,0.616362,0.618476,0.616207
4,0.407700,1.104992,0.621472,0.612914,0.618163,0.613421
5,0.227300,1.410774,0.619456,0.616118,0.616455,0.615948
5,0.127000,1.612804,0.615927,0.60993,0.612242,0.609869


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.71      0.65      1073
           1       0.57      0.45      0.50       911

    accuracy                           0.59      1984
   macro avg       0.58      0.58      0.58      1984
weighted avg       0.59      0.59      0.58      1984

[[758 315]
 [500 411]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.58      0.61      1073
           1       0.56      0.63      0.59       911

    accuracy                           0.60      1984
   macro avg       0.60      0.60      0.60      1984
weighted avg       0.60      0.60      0.60      1984

[[617 456]
 [339 572]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.69      0.66      1073
           1       0.60      0.55      0.57       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[737 336]
 [414 497]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.71      0.67      1073
           1       0.60      0.51      0.56       911

    accuracy                           0.62      1984
   macro avg       0.62      0.61      0.61      1984
weighted avg       0.62      0.62      0.62      1984

[[764 309]
 [442 469]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.66      0.65      1073
           1       0.59      0.57      0.58       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[707 366]
 [389 522]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.68      0.66      1073
           1       0.59      0.54      0.56       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.61      0.62      0.61      1984

[[734 339]
 [423 488]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-25 02:10:16,625][0m Trial 32 finished with value: 0.6099304906933902 and parameters: {'learning_rate': 1.560834173448929e-05, 'per_device_train_batch_size': 16, 'weight_decay': 6.262729943539074e-09, 'num_train_epochs': 5.9312290787535895}. Best is trial 17 with value: 0.6181159420289855.[0m


Trial 32 finished with value: 0.6099304906933902 and parameters: {'learning_rate': 1.560834173448929e-05, 'per_device_train_batch_size': 16, 'weight_decay': 6.262729943539074e-09, 'num_train_epochs': 5.9312290787535895}. Best is trial 17 with value: 0.6181159420289855.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.67351,0.580141,0.527001,0.585865,0.556483


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.85      0.69      1073
           1       0.60      0.27      0.37       911

    accuracy                           0.58      1984
   macro avg       0.59      0.56      0.53      1984
weighted avg       0.59      0.58      0.54      1984

[[908 165]
 [668 243]]


[32m[I 2021-12-25 02:18:23,384][0m Trial 33 pruned. [0m


Trial 33 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.674995,0.573589,0.547776,0.568103,0.5578


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.75      0.66      1073
           1       0.55      0.36      0.44       911

    accuracy                           0.57      1984
   macro avg       0.57      0.56      0.55      1984
weighted avg       0.57      0.57      0.56      1984

[[806 267]
 [579 332]]


[32m[I 2021-12-25 02:26:29,534][0m Trial 34 pruned. [0m


Trial 34 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.662557,0.590726,0.577424,0.58599,0.580107
2,0.677800,0.677057,0.598286,0.59808,0.599899,0.600438
3,0.585600,0.824875,0.620968,0.615621,0.617465,0.615441
4,0.399400,1.173497,0.61744,0.60879,0.613934,0.609361
5,0.211400,1.563085,0.61996,0.617082,0.617201,0.616995
6,0.114600,1.96501,0.61996,0.612123,0.616469,0.612437
6,0.065700,2.04958,0.61996,0.61268,0.616415,0.612851


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.71      0.65      1073
           1       0.57      0.45      0.50       911

    accuracy                           0.59      1984
   macro avg       0.59      0.58      0.58      1984
weighted avg       0.59      0.59      0.58      1984

[[762 311]
 [501 410]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.57      0.61      1073
           1       0.56      0.63      0.59       911

    accuracy                           0.60      1984
   macro avg       0.60      0.60      0.60      1984
weighted avg       0.60      0.60      0.60      1984

[[616 457]
 [340 571]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.68      0.66      1073
           1       0.59      0.55      0.57       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[733 340]
 [412 499]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.71      0.67      1073
           1       0.60      0.51      0.55       911

    accuracy                           0.62      1984
   macro avg       0.61      0.61      0.61      1984
weighted avg       0.62      0.62      0.61      1984

[[760 313]
 [446 465]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.65      0.65      1073
           1       0.59      0.58      0.58       911

    accuracy                           0.62      1984
   macro avg       0.62      0.62      0.62      1984
weighted avg       0.62      0.62      0.62      1984

[[701 372]
 [382 529]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.70      0.67      1073
           1       0.60      0.52      0.56       911

    accuracy                           0.62      1984
   macro avg       0.62      0.61      0.61      1984
weighted avg       0.62      0.62      0.62      1984

[[756 317]
 [437 474]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.70      0.67      1073
           1       0.60      0.53      0.56       911

    accuracy                           0.62      1984
   macro avg       0.62      0.61      0.61      1984
weighted avg       0.62      0.62      0.62      1984

[[751 322]
 [432 479]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-25 03:20:38,946][0m Trial 35 finished with value: 0.612679790548154 and parameters: {'learning_rate': 1.5755598387236765e-05, 'per_device_train_batch_size': 16, 'weight_decay': 1.4993994922190472e-07, 'num_train_epochs': 6.703731632600585}. Best is trial 17 with value: 0.6181159420289855.[0m


Trial 35 finished with value: 0.612679790548154 and parameters: {'learning_rate': 1.5755598387236765e-05, 'per_device_train_batch_size': 16, 'weight_decay': 1.4993994922190472e-07, 'num_train_epochs': 6.703731632600585}. Best is trial 17 with value: 0.6181159420289855.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.695037,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-25 03:28:45,220][0m Trial 36 pruned. [0m


Trial 36 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.692,0.822327,0.577621,0.53815,0.576421,0.557467


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.80      0.67      1073
           1       0.57      0.31      0.40       911

    accuracy                           0.58      1984
   macro avg       0.58      0.56      0.54      1984
weighted avg       0.58      0.58      0.55      1984

[[863 210]
 [628 283]]


[32m[I 2021-12-25 03:40:21,251][0m Trial 37 pruned. [0m


Trial 37 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.688858,0.544355,0.438329,0.532124,0.512294


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.90      0.68      1073
           1       0.52      0.12      0.19       911

    accuracy                           0.54      1984
   macro avg       0.53      0.51      0.44      1984
weighted avg       0.53      0.54      0.46      1984

[[971 102]
 [802 109]]


[32m[I 2021-12-25 03:48:27,181][0m Trial 38 pruned. [0m


Trial 38 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.690755,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-25 03:56:33,104][0m Trial 39 pruned. [0m


Trial 39 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.690593,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-25 04:04:39,324][0m Trial 40 pruned. [0m


Trial 40 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.662738,0.595262,0.582185,0.590883,0.584715
2,0.677200,0.689819,0.588206,0.588118,0.593887,0.593355


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.71      0.66      1073
           1       0.57      0.46      0.51       911

    accuracy                           0.60      1984
   macro avg       0.59      0.58      0.58      1984
weighted avg       0.59      0.60      0.59      1984

[[766 307]
 [496 415]]


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.53      0.58      1073
           1       0.54      0.66      0.59       911

    accuracy                           0.59      1984
   macro avg       0.59      0.59      0.59      1984
weighted avg       0.60      0.59      0.59      1984

[[569 504]
 [313 598]]


[32m[I 2021-12-25 04:20:47,192][0m Trial 41 pruned. [0m


Trial 41 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.668297,0.579133,0.567528,0.57354,0.569555


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.69      0.64      1073
           1       0.55      0.45      0.50       911

    accuracy                           0.58      1984
   macro avg       0.57      0.57      0.57      1984
weighted avg       0.58      0.58      0.57      1984

[[737 336]
 [499 412]]


[32m[I 2021-12-25 04:28:53,388][0m Trial 42 pruned. [0m


Trial 42 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.684029,0.553931,0.456224,0.554844,0.522805


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.90      0.69      1073
           1       0.56      0.14      0.23       911

    accuracy                           0.55      1984
   macro avg       0.55      0.52      0.46      1984
weighted avg       0.55      0.55      0.48      1984

[[970 103]
 [782 129]]


[32m[I 2021-12-25 04:36:59,195][0m Trial 43 pruned. [0m


Trial 43 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6984,0.824104,0.592742,0.554971,0.596513,0.572773


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.82      0.68      1073
           1       0.60      0.33      0.43       911

    accuracy                           0.59      1984
   macro avg       0.60      0.57      0.55      1984
weighted avg       0.60      0.59      0.57      1984

[[877 196]
 [612 299]]


[32m[I 2021-12-25 04:48:28,224][0m Trial 44 pruned. [0m


Trial 44 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.668364,0.577621,0.565453,0.571894,0.567743


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.69      0.64      1073
           1       0.55      0.45      0.49       911

    accuracy                           0.58      1984
   macro avg       0.57      0.57      0.57      1984
weighted avg       0.57      0.58      0.57      1984

[[739 334]
 [504 407]]


[32m[I 2021-12-25 04:56:33,645][0m Trial 45 pruned. [0m


Trial 45 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.689836,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-25 05:04:39,542][0m Trial 46 pruned. [0m


Trial 46 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.677818,0.568044,0.525039,0.56417,0.547122


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.57      0.80      0.67      1073
           1       0.56      0.29      0.38       911

    accuracy                           0.57      1984
   macro avg       0.56      0.55      0.53      1984
weighted avg       0.56      0.57      0.54      1984

[[862 211]
 [646 265]]


[32m[I 2021-12-25 05:12:45,637][0m Trial 47 pruned. [0m


Trial 47 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.837337,0.540827,0.350998,0.270413,0.5


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1073
           1       0.00      0.00      0.00       911

    accuracy                           0.54      1984
   macro avg       0.27      0.50      0.35      1984
weighted avg       0.29      0.54      0.38      1984

[[1073    0]
 [ 911    0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-25 05:20:52,126][0m Trial 48 pruned. [0m


Trial 48 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-bert-base-uncased",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.15.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weigh

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.678233,0.589214,0.54771,0.593506,0.568268


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, __index_level_0__.
***** Running Evaluation *****
  Num examples = 1984
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.82      0.68      1073
           1       0.60      0.31      0.41       911

    accuracy                           0.59      1984
   macro avg       0.59      0.57      0.55      1984
weighted avg       0.59      0.59      0.56      1984

[[885 188]
 [627 284]]


[32m[I 2021-12-25 05:28:58,803][0m Trial 49 pruned. [0m


Trial 49 pruned. 


[32m[I 2021-12-25 05:28:58,853][0m A new study created in memory with name: no-name-92bef35c-4b3e-4714-8b2c-6342a31faec0[0m


A new study created in memory with name: no-name-92bef35c-4b3e-4714-8b2c-6342a31faec0


In [13]:
storage_name

'sqlite:///SIMCSE_BERT_myPers_CON.db'

In [14]:
study_name

'SIMCSE_BERT_myPers_CON'

In [15]:
study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True, direction="maximize")
df = study.trials_dataframe(attrs=("number", "value", "params", "state"))

[32m[I 2021-12-25 05:28:58,994][0m Using an existing study with name 'SIMCSE_BERT_myPers_CON' instead of creating a new one.[0m


Using an existing study with name 'SIMCSE_BERT_myPers_CON' instead of creating a new one.


In [16]:
df

Unnamed: 0,number,value,params_learning_rate,params_num_train_epochs,params_per_device_train_batch_size,params_weight_decay,state
0,0,0.57559,3.001811e-06,4.105974,8,7.674193e-12,COMPLETE
1,1,0.476413,4.051154e-07,4.350658,2,0.005144719,COMPLETE
2,2,0.350998,9.297905e-05,6.435028,8,3.103698e-06,COMPLETE
3,3,0.584742,1.189616e-05,1.76999,2,7.353256e-05,COMPLETE
4,4,0.350998,0.001497365,3.599022,8,3.895097e-10,COMPLETE
5,5,0.31468,0.07779768,1.081947,2,4.260396e-08,PRUNED
6,6,0.37382,0.000121403,3.594346,16,3.132025e-06,PRUNED
7,7,0.588056,4.169727e-05,1.113719,4,0.0001303774,COMPLETE
8,8,0.609633,1.303811e-05,4.981086,8,0.0002694548,COMPLETE
9,9,0.437731,8.831596e-08,5.703767,4,1.025546e-06,PRUNED


In [17]:
fig = optuna.visualization.plot_param_importances(study)
fig.show()

In [18]:
best_run

BestRun(run_id='17', objective=0.6181159420289855, hyperparameters={'learning_rate': 1.7443883599512133e-05, 'num_train_epochs': 5.009529088071898, 'per_device_train_batch_size': 16, 'weight_decay': 2.4793361471561817e-07})

In [19]:
optuna.visualization.plot_intermediate_values(study)

In [20]:
optuna.visualization.plot_parallel_coordinate(study)

In [21]:
optuna.visualization.plot_optimization_history(study)

In [22]:
optuna.visualization.plot_contour(study)

In [23]:
optuna.visualization.plot_slice(study)

In [24]:
optuna.visualization.plot_edf(study)