# Initiliation

In [None]:
!pip install transformers datasets --quiet

[K     |████████████████████████████████| 3.3 MB 9.8 MB/s 
[K     |████████████████████████████████| 298 kB 72.4 MB/s 
[K     |████████████████████████████████| 895 kB 41.3 MB/s 
[K     |████████████████████████████████| 61 kB 611 kB/s 
[K     |████████████████████████████████| 596 kB 64.9 MB/s 
[K     |████████████████████████████████| 3.3 MB 54.1 MB/s 
[K     |████████████████████████████████| 132 kB 70.8 MB/s 
[K     |████████████████████████████████| 243 kB 58.4 MB/s 
[K     |████████████████████████████████| 1.1 MB 54.7 MB/s 
[K     |████████████████████████████████| 271 kB 65.2 MB/s 
[K     |████████████████████████████████| 192 kB 71.7 MB/s 
[K     |████████████████████████████████| 160 kB 71.6 MB/s 
[?25h

In [None]:
from transformers import TrainingArguments
from transformers import Trainer
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, classification_report, confusion_matrix
from datasets import Dataset
from datasets import load_metric

import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from google.colab import drive

# Data Preparation

In [None]:
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
%cd 'drive/MyDrive/Masterarbeit/Colab Notebooks/OVERVIEW MBTI/Datasets/URL_Balanced_MASK'

/content/drive/.shortcut-targets-by-id/1aHXlqhpj1STohhfU4gn53D4whaLH__Jz/Masterarbeit/Colab Notebooks/OVERVIEW MBTI/Datasets/URL_Balanced_MASK


In [None]:
dfIE = pd.read_csv('MBTI_IE_URL_Balanced_MASK.csv', sep=",", error_bad_lines=False)
dfIE

Unnamed: 0,text,label
0,i like that you are kind as [MASK] i find that...,1
1,oh my you are right who really talks like tha...,0
2,yep yep yep especially the last one yep agree ...,0
3,things that are generalizable to the entire po...,0
4,work student hobbies studying gaming reading d...,0
...,...,...
3186,i dont recall much i just remember your willin...,1
3187,no because its complete fucking bullshit if an...,1
3188,when some days you talk with every people you ...,1
3189,it gets 100 f here in the summer only flunkies...,1


# Model Training

In [None]:
modeltype = "princeton-nlp/sup-simcse-roberta-base"

In [None]:
train, test = train_test_split(dfIE, test_size=0.2, random_state=0, stratify=dfIE.label)

train = Dataset.from_pandas(train)
test = Dataset.from_pandas(test)

tokenizer = AutoTokenizer.from_pretrained(modeltype)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_train = train.map(tokenize_function, batched=True)
tokenized_test = test.map(tokenize_function, batched=True)

full_train_dataset = tokenized_train
full_eval_dataset = tokenized_test

model = AutoModelForSequenceClassification.from_pretrained(modeltype, num_labels=2)

training_args = TrainingArguments(
    "SIMCSE_RoBERTa_IE_MASK", 
    evaluation_strategy="epoch",
    save_strategy = 'no',
    save_steps = 100000,
    save_total_limit = 1,
    metric_for_best_model="eval_f1")

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    print(classification_report(labels, preds, labels=[0,1]))
    print(confusion_matrix(labels,preds))
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

Downloading:   0%|          | 0.00/255 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/738 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/780k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Downloading:   0%|          | 0.00/476M [00:00<?, ?B/s]

Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model 

# Hyperparameter Optimization

In [None]:
! pip install optuna --quiet

[K     |████████████████████████████████| 308 kB 8.6 MB/s 
[K     |████████████████████████████████| 80 kB 7.7 MB/s 
[K     |████████████████████████████████| 209 kB 66.6 MB/s 
[K     |████████████████████████████████| 75 kB 5.1 MB/s 
[K     |████████████████████████████████| 112 kB 70.1 MB/s 
[K     |████████████████████████████████| 49 kB 6.8 MB/s 
[K     |████████████████████████████████| 149 kB 74.3 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [None]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(modeltype, num_labels=2)

In [None]:
trainer = Trainer(
      model_init=model_init,
      args=training_args, 
      train_dataset=full_train_dataset, 
      eval_dataset=full_eval_dataset,
      compute_metrics=compute_metrics 
  )

loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": true,
  "v

In [None]:
import sklearn.metrics as metrics
import optuna
import sys
import logging

def objective (metrics):
  return metrics['eval_f1']

def hyperparameter_space(trial):

    return {
        "learning_rate": trial.suggest_float("learning_rate", 5e-8, 5e-1, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [2, 4, 8, 16]),
        "weight_decay": trial.suggest_float("weight_decay", 5e-12, 5e-1, log=True),
        "num_train_epochs": trial.suggest_float("num_train_epochs",1,8,log=True),
        #"adam_epsilon": trial.suggest_float("adam_epsilon", 1e-10, 1e-6, log=True),
        #"seed" : trial.suggest_float("seed",10,60,log=True)
        }

optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = "SIMCSE_RoBERTa_MASK_IE"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)

best_run = trainer.hyperparameter_search(hp_space=hyperparameter_space,compute_objective=objective, n_trials=50, direction="maximize",study_name=study_name, storage=storage_name )

study = optuna.create_study()

[32m[I 2021-12-09 19:50:50,612][0m A new study created in RDB with name: SIMCSE_RoBERTa_MASK_IE[0m


A new study created in RDB with name: SIMCSE_RoBERTa_MASK_IE


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.688965,0.574335,0.570157,0.577554,0.574491
1,0.692400,0.687968,0.58529,0.581087,0.589078,0.585448


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.47      0.53       320
           1       0.56      0.67      0.61       319

    accuracy                           0.57       639
   macro avg       0.58      0.57      0.57       639
weighted avg       0.58      0.57      0.57       639

[[152 168]
 [104 215]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.48      0.54       320
           1       0.57      0.69      0.62       319

    accuracy                           0.59       639
   macro avg       0.59      0.59      0.58       639
weighted avg       0.59      0.59      0.58       639

[[155 165]
 [100 219]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 19:56:12,589][0m Trial 0 finished with value: 0.5810872657554579 and parameters: {'learning_rate': 4.2151917527372246e-07, 'per_device_train_batch_size': 8, 'weight_decay': 6.15204879642629e-11, 'num_train_epochs': 1.971005532730213}. Best is trial 0 with value: 0.5810872657554579.[0m


Trial 0 finished with value: 0.5810872657554579 and parameters: {'learning_rate': 4.2151917527372246e-07, 'per_device_train_batch_size': 8, 'weight_decay': 6.15204879642629e-11, 'num_train_epochs': 1.971005532730213}. Best is trial 0 with value: 0.5810872657554579.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,8.206353,0.499218,0.332985,0.249609,0.5
2,No log,3.634358,0.499218,0.332985,0.249609,0.5
2,No log,0.81479,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 20:02:16,724][0m Trial 1 finished with value: 0.33298538622129437 and parameters: {'learning_rate': 0.20864993799773618, 'per_device_train_batch_size': 16, 'weight_decay': 1.526087338992115e-08, 'num_train_epochs': 2.320750938500846}. Best is trial 0 with value: 0.5810872657554

Trial 1 finished with value: 0.33298538622129437 and parameters: {'learning_rate': 0.20864993799773618, 'per_device_train_batch_size': 16, 'weight_decay': 1.526087338992115e-08, 'num_train_epochs': 2.320750938500846}. Best is trial 0 with value: 0.5810872657554579.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.687024,0.56651,0.555176,0.5737,0.566262
2,No log,0.684329,0.571205,0.568456,0.573234,0.571331
2,No log,0.684318,0.5759,0.573493,0.577812,0.576019


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.72      0.63       320
           1       0.60      0.41      0.48       319

    accuracy                           0.57       639
   macro avg       0.57      0.57      0.56       639
weighted avg       0.57      0.57      0.56       639

[[232  88]
 [189 130]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.49      0.53       320
           1       0.56      0.65      0.60       319

    accuracy                           0.57       639
   macro avg       0.57      0.57      0.57       639
weighted avg       0.57      0.57      0.57       639

[[157 163]
 [111 208]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.50      0.54       320
           1       0.57      0.65      0.61       319

    accuracy                           0.58       639
   macro avg       0.58      0.58      0.57       639
weighted avg       0.58      0.58      0.57       639

[[160 160]
 [111 208]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 20:07:47,083][0m Trial 2 finished with value: 0.5734932281160641 and parameters: {'learning_rate': 1.1355065911442271e-06, 'per_device_train_batch_size': 16, 'weight_decay': 8.377336014580731e-10, 'num_train_epochs': 2.0817583837921267}. Best is trial 0 with value: 0.5810872657554579.[0m


Trial 2 finished with value: 0.5734932281160641 and parameters: {'learning_rate': 1.1355065911442271e-06, 'per_device_train_batch_size': 16, 'weight_decay': 8.377336014580731e-10, 'num_train_epochs': 2.0817583837921267}. Best is trial 0 with value: 0.5810872657554579.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7214,0.694872,0.500782,0.333681,0.250391,0.5
1,0.6982,0.693148,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.50      1.00      0.67       320
           1       0.00      0.00      0.00       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[320   0]
 [319   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 20:12:14,857][0m Trial 3 finished with value: 0.33298538622129437 and parameters: {'learning_rate': 0.000543933982245278, 'per_device_train_batch_size': 2, 'weight_decay': 2.3268732316543053e-10, 'num_train_epochs': 1.23689484226937}. Best is trial 0 with value: 0.5810872657554

Trial 3 finished with value: 0.33298538622129437 and parameters: {'learning_rate': 0.000543933982245278, 'per_device_train_batch_size': 2, 'weight_decay': 2.3268732316543053e-10, 'num_train_epochs': 1.23689484226937}. Best is trial 0 with value: 0.5810872657554579.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6819,0.652038,0.613459,0.609031,0.618586,0.613293
2,0.6479,0.64418,0.649452,0.646011,0.65524,0.6493
3,0.6182,0.648905,0.651017,0.646273,0.659893,0.6512
4,0.5621,0.663421,0.657277,0.65579,0.659873,0.657176
4,0.5621,0.664154,0.657277,0.656304,0.658942,0.657195


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.72      0.65       320
           1       0.64      0.51      0.57       319

    accuracy                           0.61       639
   macro avg       0.62      0.61      0.61       639
weighted avg       0.62      0.61      0.61       639

[[230  90]
 [157 162]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.75      0.68       320
           1       0.68      0.55      0.61       319

    accuracy                           0.65       639
   macro avg       0.66      0.65      0.65       639
weighted avg       0.66      0.65      0.65       639

[[239  81]
 [143 176]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.70      0.53      0.61       320
           1       0.62      0.77      0.69       319

    accuracy                           0.65       639
   macro avg       0.66      0.65      0.65       639
weighted avg       0.66      0.65      0.65       639

[[171 149]
 [ 74 245]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.72      0.68       320
           1       0.68      0.59      0.63       319

    accuracy                           0.66       639
   macro avg       0.66      0.66      0.66       639
weighted avg       0.66      0.66      0.66       639

[[231  89]
 [130 189]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.71      0.67       320
           1       0.67      0.61      0.64       319

    accuracy                           0.66       639
   macro avg       0.66      0.66      0.66       639
weighted avg       0.66      0.66      0.66       639

[[227  93]
 [126 193]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 20:25:00,094][0m Trial 4 finished with value: 0.656303955398917 and parameters: {'learning_rate': 3.4698872258481893e-06, 'per_device_train_batch_size': 4, 'weight_decay': 0.00010004516295065008, 'num_train_epochs': 4.2836023326677655}. Best is trial 4 with value: 0.656303955398917.[0m


Trial 4 finished with value: 0.656303955398917 and parameters: {'learning_rate': 3.4698872258481893e-06, 'per_device_train_batch_size': 4, 'weight_decay': 0.00010004516295065008, 'num_train_epochs': 4.2836023326677655}. Best is trial 4 with value: 0.656303955398917.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7343,0.694572,0.500782,0.333681,0.250391,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.50      1.00      0.67       320
           1       0.00      0.00      0.00       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[320   0]
 [319   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 20:28:29,136][0m Trial 5 pruned. [0m


Trial 5 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,21.2312,18.935816,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 20:31:58,686][0m Trial 6 pruned. [0m


Trial 6 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,2.4029,0.71612,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 20:35:27,934][0m Trial 7 pruned. [0m


Trial 7 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6994,0.647554,0.618153,0.596198,0.651743,0.61852
2,0.672,0.697709,0.658842,0.654817,0.666319,0.658675
3,0.6384,0.678133,0.633803,0.63378,0.633856,0.633817
4,0.4952,1.365023,0.633803,0.632701,0.635296,0.633719


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.72      0.38      0.50       320
           1       0.58      0.85      0.69       319

    accuracy                           0.62       639
   macro avg       0.65      0.62      0.60       639
weighted avg       0.65      0.62      0.60       639

[[123 197]
 [ 47 272]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.63      0.77      0.69       320
           1       0.70      0.55      0.62       319

    accuracy                           0.66       639
   macro avg       0.67      0.66      0.65       639
weighted avg       0.67      0.66      0.65       639

[[245  75]
 [143 176]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.62      0.63       320
           1       0.63      0.64      0.64       319

    accuracy                           0.63       639
   macro avg       0.63      0.63      0.63       639
weighted avg       0.63      0.63      0.63       639

[[200 120]
 [114 205]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.69      0.65       320
           1       0.65      0.58      0.61       319

    accuracy                           0.63       639
   macro avg       0.64      0.63      0.63       639
weighted avg       0.64      0.63      0.63       639

[[220 100]
 [134 185]]


[32m[I 2021-12-09 20:47:13,795][0m Trial 8 pruned. [0m


Trial 8 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7335,0.695108,0.500782,0.333681,0.250391,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.50      1.00      0.67       320
           1       0.00      0.00      0.00       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[320   0]
 [319   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 20:50:42,448][0m Trial 9 pruned. [0m


Trial 9 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6921,0.69076,0.539906,0.46652,0.590489,0.540488


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.17      0.27       320
           1       0.52      0.91      0.66       319

    accuracy                           0.54       639
   macro avg       0.59      0.54      0.47       639
weighted avg       0.59      0.54      0.47       639

[[ 54 266]
 [ 28 291]]


[32m[I 2021-12-09 20:53:40,736][0m Trial 10 pruned. [0m


Trial 10 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.646595,0.627543,0.624963,0.63095,0.627415
2,0.665000,0.633386,0.668232,0.667344,0.66991,0.668152
3,0.665000,0.62495,0.655712,0.654554,0.65798,0.655804
3,0.602300,0.623876,0.666667,0.666402,0.667125,0.666624


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.71      0.66       320
           1       0.65      0.55      0.59       319

    accuracy                           0.63       639
   macro avg       0.63      0.63      0.62       639
weighted avg       0.63      0.63      0.63       639

[[227  93]
 [145 174]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.72      0.68       320
           1       0.69      0.62      0.65       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[230  90]
 [122 197]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.60      0.63       320
           1       0.64      0.71      0.67       319

    accuracy                           0.66       639
   macro avg       0.66      0.66      0.65       639
weighted avg       0.66      0.66      0.65       639

[[191 129]
 [ 91 228]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.69      0.68       320
           1       0.68      0.64      0.66       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[222  98]
 [115 204]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 21:03:44,091][0m Trial 11 finished with value: 0.6664019588379326 and parameters: {'learning_rate': 4.196349729766356e-06, 'per_device_train_batch_size': 8, 'weight_decay': 1.3722999280828923e-06, 'num_train_epochs': 3.696132156373117}. Best is trial 11 with value: 0.6664019588379326.[0m


Trial 11 finished with value: 0.6664019588379326 and parameters: {'learning_rate': 4.196349729766356e-06, 'per_device_train_batch_size': 8, 'weight_decay': 1.3722999280828923e-06, 'num_train_epochs': 3.696132156373117}. Best is trial 11 with value: 0.6664019588379326.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.642935,0.629108,0.624339,0.636304,0.629286
2,0.674100,0.639273,0.660407,0.659202,0.662554,0.660315
3,0.674100,0.649462,0.654147,0.653383,0.6554,0.654075
4,0.566200,0.762854,0.663537,0.662077,0.666589,0.663641
4,0.339000,0.904378,0.663537,0.66289,0.66469,0.66347


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.52      0.58       320
           1       0.60      0.74      0.67       319

    accuracy                           0.63       639
   macro avg       0.64      0.63      0.62       639
weighted avg       0.64      0.63      0.62       639

[[165 155]
 [ 82 237]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.72      0.68       320
           1       0.68      0.60      0.64       319

    accuracy                           0.66       639
   macro avg       0.66      0.66      0.66       639
weighted avg       0.66      0.66      0.66       639

[[230  90]
 [127 192]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.70      0.67       320
           1       0.67      0.61      0.64       319

    accuracy                           0.65       639
   macro avg       0.66      0.65      0.65       639
weighted avg       0.66      0.65      0.65       639

[[224  96]
 [125 194]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.69      0.60      0.64       320
           1       0.64      0.73      0.68       319

    accuracy                           0.66       639
   macro avg       0.67      0.66      0.66       639
weighted avg       0.67      0.66      0.66       639

[[191 129]
 [ 86 233]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.71      0.68       320
           1       0.68      0.62      0.65       319

    accuracy                           0.66       639
   macro avg       0.66      0.66      0.66       639
weighted avg       0.66      0.66      0.66       639

[[226  94]
 [121 198]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 21:16:51,241][0m Trial 12 finished with value: 0.6628895045112468 and parameters: {'learning_rate': 1.7604727608175072e-05, 'per_device_train_batch_size': 8, 'weight_decay': 2.3271948266010906e-06, 'num_train_epochs': 4.8431201681791585}. Best is trial 11 with value: 0.6664019588379326.[0m


Trial 12 finished with value: 0.6628895045112468 and parameters: {'learning_rate': 1.7604727608175072e-05, 'per_device_train_batch_size': 8, 'weight_decay': 2.3271948266010906e-06, 'num_train_epochs': 4.8431201681791585}. Best is trial 11 with value: 0.6664019588379326.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.675833,0.56338,0.536457,0.583218,0.563759


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.32      0.42       320
           1       0.54      0.81      0.65       319

    accuracy                           0.56       639
   macro avg       0.58      0.56      0.54       639
weighted avg       0.58      0.56      0.54       639

[[103 217]
 [ 62 257]]


[32m[I 2021-12-09 21:19:35,308][0m Trial 13 pruned. [0m


Trial 13 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.636727,0.635368,0.631673,0.641284,0.635526
2,0.663400,0.629509,0.669797,0.669405,0.670516,0.669744
3,0.663400,0.675714,0.649452,0.649383,0.649608,0.649476
4,0.535300,0.794642,0.649452,0.648825,0.650422,0.649388
5,0.302600,1.363852,0.643192,0.637366,0.653372,0.643392
6,0.302600,1.599497,0.666667,0.666458,0.667153,0.666707
6,0.195800,1.637044,0.671362,0.671341,0.671384,0.671351


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.53      0.59       320
           1       0.61      0.74      0.67       319

    accuracy                           0.64       639
   macro avg       0.64      0.64      0.63       639
weighted avg       0.64      0.64      0.63       639

[[171 149]
 [ 84 235]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.70      0.68       320
           1       0.68      0.64      0.66       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[225  95]
 [116 203]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.63      0.64       320
           1       0.64      0.66      0.65       319

    accuracy                           0.65       639
   macro avg       0.65      0.65      0.65       639
weighted avg       0.65      0.65      0.65       639

[[203 117]
 [107 212]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.69      0.66       320
           1       0.66      0.61      0.63       319

    accuracy                           0.65       639
   macro avg       0.65      0.65      0.65       639
weighted avg       0.65      0.65      0.65       639

[[221  99]
 [125 194]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.69      0.52      0.59       320
           1       0.61      0.77      0.68       319

    accuracy                           0.64       639
   macro avg       0.65      0.64      0.64       639
weighted avg       0.65      0.64      0.64       639

[[165 155]
 [ 73 246]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.64      0.66       320
           1       0.66      0.69      0.67       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[205 115]
 [ 98 221]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.68      0.67       320
           1       0.67      0.66      0.67       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[217 103]
 [107 212]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 21:37:48,081][0m Trial 14 finished with value: 0.6713413797832946 and parameters: {'learning_rate': 1.2657005711951106e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.3512995499608481, 'num_train_epochs': 6.714357839255461}. Best is trial 14 with value: 0.6713413797832946.[0m


Trial 14 finished with value: 0.6713413797832946 and parameters: {'learning_rate': 1.2657005711951106e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.3512995499608481, 'num_train_epochs': 6.714357839255461}. Best is trial 14 with value: 0.6713413797832946.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.824802,0.500782,0.333681,0.250391,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.50      1.00      0.67       320
           1       0.00      0.00      0.00       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[320   0]
 [319   0]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 21:40:32,390][0m Trial 15 pruned. [0m


Trial 15 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.692653,0.500782,0.3497,0.522727,0.501538


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.02      0.04       320
           1       0.50      0.98      0.66       319

    accuracy                           0.50       639
   macro avg       0.52      0.50      0.35       639
weighted avg       0.52      0.50      0.35       639

[[  6 314]
 [  5 314]]


[32m[I 2021-12-09 21:43:16,891][0m Trial 16 pruned. [0m


Trial 16 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.649703,0.621283,0.619867,0.622975,0.621189
2,0.667300,0.637372,0.644757,0.641806,0.649457,0.644617
3,0.667300,0.623451,0.665102,0.664917,0.665407,0.665067
3,0.667300,0.623103,0.663537,0.663418,0.663721,0.663509


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.68      0.64       320
           1       0.64      0.56      0.60       319

    accuracy                           0.62       639
   macro avg       0.62      0.62      0.62       639
weighted avg       0.62      0.62      0.62       639

[[218 102]
 [140 179]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.73      0.67       320
           1       0.68      0.55      0.61       319

    accuracy                           0.64       639
   macro avg       0.65      0.64      0.64       639
weighted avg       0.65      0.64      0.64       639

[[235  85]
 [142 177]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.69      0.67       320
           1       0.67      0.64      0.66       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.66       639
weighted avg       0.67      0.67      0.66       639

[[220 100]
 [114 205]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.68      0.67       320
           1       0.67      0.65      0.66       319

    accuracy                           0.66       639
   macro avg       0.66      0.66      0.66       639
weighted avg       0.66      0.66      0.66       639

[[218 102]
 [113 206]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 21:51:48,257][0m Trial 17 finished with value: 0.6634180759817432 and parameters: {'learning_rate': 3.7609334547556697e-06, 'per_device_train_batch_size': 8, 'weight_decay': 5.0724512404089625e-12, 'num_train_epochs': 3.070767676897508}. Best is trial 14 with value: 0.6713413797832946.[0m


Trial 17 finished with value: 0.6634180759817432 and parameters: {'learning_rate': 3.7609334547556697e-06, 'per_device_train_batch_size': 8, 'weight_decay': 5.0724512404089625e-12, 'num_train_epochs': 3.070767676897508}. Best is trial 14 with value: 0.6713413797832946.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.693246,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 21:54:32,810][0m Trial 18 pruned. [0m


Trial 18 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.645468,0.625978,0.623485,0.629209,0.625852
2,0.663900,0.632787,0.669797,0.669067,0.671184,0.669725
3,0.663900,0.620726,0.669797,0.669473,0.670549,0.669847
3,0.600100,0.621506,0.674491,0.674395,0.674654,0.674466


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.71      0.65       320
           1       0.65      0.55      0.59       319

    accuracy                           0.63       639
   macro avg       0.63      0.63      0.62       639
weighted avg       0.63      0.63      0.62       639

[[226  94]
 [145 174]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.72      0.68       320
           1       0.69      0.62      0.65       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[229  91]
 [120 199]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.64      0.66       320
           1       0.66      0.70      0.68       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[204 116]
 [ 95 224]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.69      0.68       320
           1       0.68      0.66      0.67       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[221  99]
 [109 210]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 22:03:36,507][0m Trial 19 finished with value: 0.6743949044585987 and parameters: {'learning_rate': 4.5230064997342265e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.009101413632288216, 'num_train_epochs': 3.2876532102102347}. Best is trial 19 with value: 0.6743949044585987.[0m


Trial 19 finished with value: 0.6743949044585987 and parameters: {'learning_rate': 4.5230064997342265e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.009101413632288216, 'num_train_epochs': 3.2876532102102347}. Best is trial 19 with value: 0.6743949044585987.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.690314,0.552426,0.521276,0.571568,0.552826


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.30      0.40       320
           1       0.53      0.81      0.64       319

    accuracy                           0.55       639
   macro avg       0.57      0.55      0.52       639
weighted avg       0.57      0.55      0.52       639

[[ 95 225]
 [ 61 258]]


[32m[I 2021-12-09 22:06:11,010][0m Trial 20 pruned. [0m


Trial 20 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.642455,0.633803,0.631266,0.637384,0.633675
2,0.662200,0.631491,0.677621,0.676956,0.678976,0.677552
3,0.662200,0.623254,0.660407,0.659202,0.662866,0.660502
3,0.594100,0.626375,0.679186,0.678478,0.680654,0.679114


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.72      0.66       320
           1       0.66      0.55      0.60       319

    accuracy                           0.63       639
   macro avg       0.64      0.63      0.63       639
weighted avg       0.64      0.63      0.63       639

[[229  91]
 [143 176]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.72      0.69       320
           1       0.69      0.63      0.66       319

    accuracy                           0.68       639
   macro avg       0.68      0.68      0.68       639
weighted avg       0.68      0.68      0.68       639

[[231  89]
 [117 202]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.60      0.64       320
           1       0.64      0.72      0.68       319

    accuracy                           0.66       639
   macro avg       0.66      0.66      0.66       639
weighted avg       0.66      0.66      0.66       639

[[192 128]
 [ 89 230]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.72      0.69       320
           1       0.70      0.63      0.66       319

    accuracy                           0.68       639
   macro avg       0.68      0.68      0.68       639
weighted avg       0.68      0.68      0.68       639

[[232  88]
 [117 202]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 22:15:47,500][0m Trial 21 finished with value: 0.6784775453400782 and parameters: {'learning_rate': 4.962593526315429e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.00916864242414039, 'num_train_epochs': 3.5077271970163797}. Best is trial 21 with value: 0.6784775453400782.[0m


Trial 21 finished with value: 0.6784775453400782 and parameters: {'learning_rate': 4.962593526315429e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.00916864242414039, 'num_train_epochs': 3.5077271970163797}. Best is trial 21 with value: 0.6784775453400782.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.632728,0.647887,0.647856,0.647918,0.647874
2,0.656400,0.622892,0.687011,0.686918,0.687187,0.686986
2,0.656400,0.618576,0.691706,0.691113,0.693069,0.691639


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.66      0.65       320
           1       0.65      0.64      0.64       319

    accuracy                           0.65       639
   macro avg       0.65      0.65      0.65       639
weighted avg       0.65      0.65      0.65       639

[[210 110]
 [115 204]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.70      0.69       320
           1       0.69      0.67      0.68       319

    accuracy                           0.69       639
   macro avg       0.69      0.69      0.69       639
weighted avg       0.69      0.69      0.69       639

[[225  95]
 [105 214]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.73      0.70       320
           1       0.71      0.65      0.68       319

    accuracy                           0.69       639
   macro avg       0.69      0.69      0.69       639
weighted avg       0.69      0.69      0.69       639

[[235  85]
 [112 207]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 22:23:33,727][0m Trial 22 finished with value: 0.6911127087847238 and parameters: {'learning_rate': 8.325171558473647e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.022140347515650492, 'num_train_epochs': 2.8578485716657727}. Best is trial 22 with value: 0.6911127087847238.[0m


Trial 22 finished with value: 0.6911127087847238 and parameters: {'learning_rate': 8.325171558473647e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.022140347515650492, 'num_train_epochs': 2.8578485716657727}. Best is trial 22 with value: 0.6911127087847238.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.681242,0.591549,0.588156,0.594468,0.591409


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.68      0.63       320
           1       0.61      0.50      0.55       319

    accuracy                           0.59       639
   macro avg       0.59      0.59      0.59       639
weighted avg       0.59      0.59      0.59       639

[[218 102]
 [159 160]]


[32m[I 2021-12-09 22:26:17,764][0m Trial 23 pruned. [0m


Trial 23 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.690657,0.539906,0.440808,0.640611,0.540566


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.76      0.12      0.21       320
           1       0.52      0.96      0.68       319

    accuracy                           0.54       639
   macro avg       0.64      0.54      0.44       639
weighted avg       0.64      0.54      0.44       639

[[ 38 282]
 [ 12 307]]


[32m[I 2021-12-09 22:29:01,741][0m Trial 24 pruned. [0m


Trial 24 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.643207,0.635368,0.632745,0.639138,0.635237
2,0.662700,0.631925,0.674491,0.673723,0.676021,0.674417
3,0.662700,0.624699,0.657277,0.65579,0.660216,0.657381
3,0.595500,0.624299,0.680751,0.680469,0.681314,0.680706


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.72      0.66       320
           1       0.66      0.55      0.60       319

    accuracy                           0.64       639
   macro avg       0.64      0.64      0.63       639
weighted avg       0.64      0.64      0.63       639

[[230  90]
 [143 176]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.72      0.69       320
           1       0.69      0.63      0.66       319

    accuracy                           0.67       639
   macro avg       0.68      0.67      0.67       639
weighted avg       0.68      0.67      0.67       639

[[231  89]
 [119 200]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.59      0.63       320
           1       0.64      0.72      0.68       319

    accuracy                           0.66       639
   macro avg       0.66      0.66      0.66       639
weighted avg       0.66      0.66      0.66       639

[[189 131]
 [ 88 231]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.71      0.69       320
           1       0.69      0.65      0.67       319

    accuracy                           0.68       639
   macro avg       0.68      0.68      0.68       639
weighted avg       0.68      0.68      0.68       639

[[227  93]
 [111 208]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 22:38:57,956][0m Trial 25 finished with value: 0.6804686733993529 and parameters: {'learning_rate': 4.802252640150496e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.034207075806692015, 'num_train_epochs': 3.644878034638589}. Best is trial 22 with value: 0.6911127087847238.[0m


Trial 25 finished with value: 0.6804686733993529 and parameters: {'learning_rate': 4.802252640150496e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.034207075806692015, 'num_train_epochs': 3.644878034638589}. Best is trial 22 with value: 0.6911127087847238.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.689174,0.568075,0.562364,0.572069,0.568255


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.45      0.51       320
           1       0.55      0.68      0.61       319

    accuracy                           0.57       639
   macro avg       0.57      0.57      0.56       639
weighted avg       0.57      0.57      0.56       639

[[145 175]
 [101 218]]


[32m[I 2021-12-09 22:41:41,860][0m Trial 26 pruned. [0m


Trial 26 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.716371,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 22:44:40,540][0m Trial 27 pruned. [0m


Trial 27 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.693505,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 22:47:15,117][0m Trial 28 pruned. [0m


Trial 28 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.678317,0.594679,0.594007,0.595223,0.594617


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.59      0.63      0.61       320
           1       0.60      0.55      0.58       319

    accuracy                           0.59       639
   macro avg       0.60      0.59      0.59       639
weighted avg       0.60      0.59      0.59       639

[[203 117]
 [142 177]]


[32m[I 2021-12-09 22:49:59,398][0m Trial 29 pruned. [0m


Trial 29 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6862,0.64182,0.638498,0.634357,0.644795,0.638333
2,0.6407,0.642343,0.669797,0.668223,0.672895,0.66969
2,0.6014,0.651982,0.671362,0.671225,0.671594,0.671331


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.74      0.67       320
           1       0.67      0.53      0.60       319

    accuracy                           0.64       639
   macro avg       0.64      0.64      0.63       639
weighted avg       0.64      0.64      0.63       639

[[238  82]
 [149 170]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.74      0.69       320
           1       0.70      0.60      0.65       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[236  84]
 [127 192]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.69      0.68       320
           1       0.68      0.65      0.66       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[221  99]
 [111 208]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 22:58:22,443][0m Trial 30 finished with value: 0.6712254258217527 and parameters: {'learning_rate': 8.71967286054325e-06, 'per_device_train_batch_size': 4, 'weight_decay': 0.0034245539938323, 'num_train_epochs': 2.8271625115348256}. Best is trial 22 with value: 0.6911127087847238.[0m


Trial 30 finished with value: 0.6712254258217527 and parameters: {'learning_rate': 8.71967286054325e-06, 'per_device_train_batch_size': 4, 'weight_decay': 0.0034245539938323, 'num_train_epochs': 2.8271625115348256}. Best is trial 22 with value: 0.6911127087847238.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.689402,0.57903,0.567118,0.589202,0.579291


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.41      0.50       320
           1       0.56      0.75      0.64       319

    accuracy                           0.58       639
   macro avg       0.59      0.58      0.57       639
weighted avg       0.59      0.58      0.57       639

[[132 188]
 [ 81 238]]


[32m[I 2021-12-09 23:01:06,640][0m Trial 31 pruned. [0m


Trial 31 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.641844,0.632238,0.629786,0.635636,0.632112
2,0.660700,0.633494,0.669797,0.667421,0.67456,0.669666
2,0.660700,0.623057,0.676056,0.675853,0.676434,0.676019


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.71      0.66       320
           1       0.66      0.55      0.60       319

    accuracy                           0.63       639
   macro avg       0.64      0.63      0.63       639
weighted avg       0.64      0.63      0.63       639

[[228  92]
 [143 176]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.65      0.75      0.70       320
           1       0.70      0.59      0.64       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[241  79]
 [132 187]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.70      0.68       320
           1       0.68      0.65      0.67       319

    accuracy                           0.68       639
   macro avg       0.68      0.68      0.68       639
weighted avg       0.68      0.68      0.68       639

[[224  96]
 [111 208]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 23:07:37,687][0m Trial 32 finished with value: 0.6758531116366265 and parameters: {'learning_rate': 5.74144597254675e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.0019231609094190923, 'num_train_epochs': 2.343383512642592}. Best is trial 22 with value: 0.6911127087847238.[0m


Trial 32 finished with value: 0.6758531116366265 and parameters: {'learning_rate': 5.74144597254675e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.0019231609094190923, 'num_train_epochs': 2.343383512642592}. Best is trial 22 with value: 0.6911127087847238.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.682181,0.58529,0.580539,0.589101,0.585124


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.57      0.69      0.63       320
           1       0.61      0.48      0.54       319

    accuracy                           0.59       639
   macro avg       0.59      0.59      0.58       639
weighted avg       0.59      0.59      0.58       639

[[221  99]
 [166 153]]


[32m[I 2021-12-09 23:10:21,735][0m Trial 33 pruned. [0m


Trial 33 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.677706,0.577465,0.531067,0.62698,0.576974


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.55      0.89      0.68       320
           1       0.71      0.26      0.38       319

    accuracy                           0.58       639
   macro avg       0.63      0.58      0.53       639
weighted avg       0.63      0.58      0.53       639

[[285  35]
 [235  84]]


[32m[I 2021-12-09 23:12:56,429][0m Trial 34 pruned. [0m


Trial 34 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.633743,0.641628,0.641455,0.641848,0.641595
2,0.657000,0.62407,0.687011,0.686789,0.687471,0.686971
2,0.657000,0.616726,0.685446,0.685196,0.685961,0.685404


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.64      0.66      0.65       320
           1       0.65      0.62      0.63       319

    accuracy                           0.64       639
   macro avg       0.64      0.64      0.64       639
weighted avg       0.64      0.64      0.64       639

[[212 108]
 [121 198]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.71      0.70       320
           1       0.70      0.66      0.68       319

    accuracy                           0.69       639
   macro avg       0.69      0.69      0.69       639
weighted avg       0.69      0.69      0.69       639

[[228  92]
 [108 211]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.68      0.71      0.69       320
           1       0.70      0.66      0.68       319

    accuracy                           0.69       639
   macro avg       0.69      0.69      0.69       639
weighted avg       0.69      0.69      0.69       639

[[228  92]
 [109 210]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 23:19:48,575][0m Trial 35 finished with value: 0.685196214678049 and parameters: {'learning_rate': 8.21943372523803e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.10401035970271441, 'num_train_epochs': 2.4884574830242934}. Best is trial 22 with value: 0.6911127087847238.[0m


Trial 35 finished with value: 0.685196214678049 and parameters: {'learning_rate': 8.21943372523803e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.10401035970271441, 'num_train_epochs': 2.4884574830242934}. Best is trial 22 with value: 0.6911127087847238.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.673169,0.599374,0.599349,0.599383,0.599363


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.61      0.60       320
           1       0.60      0.59      0.60       319

    accuracy                           0.60       639
   macro avg       0.60      0.60      0.60       639
weighted avg       0.60      0.60      0.60       639

[[194 126]
 [130 189]]


[32m[I 2021-12-09 23:22:32,558][0m Trial 36 pruned. [0m


Trial 36 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.69367,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 23:25:17,018][0m Trial 37 pruned. [0m


Trial 37 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.689391,0.568075,0.567056,0.568821,0.568152


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.58      0.52      0.55       320
           1       0.56      0.62      0.59       319

    accuracy                           0.57       639
   macro avg       0.57      0.57      0.57       639
weighted avg       0.57      0.57      0.57       639

[[166 154]
 [122 197]]


[32m[I 2021-12-09 23:27:51,621][0m Trial 38 pruned. [0m


Trial 38 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6906,0.647128,0.622848,0.619715,0.626815,0.622708


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.71      0.65       320
           1       0.65      0.53      0.59       319

    accuracy                           0.62       639
   macro avg       0.63      0.62      0.62       639
weighted avg       0.63      0.62      0.62       639

[[228  92]
 [149 170]]


[32m[I 2021-12-09 23:30:50,545][0m Trial 39 pruned. [0m


Trial 39 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7443,0.718071,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 23:34:19,533][0m Trial 40 pruned. [0m


Trial 40 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.665807,0.599374,0.599208,0.599585,0.599407


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.58      0.59       320
           1       0.59      0.62      0.61       319

    accuracy                           0.60       639
   macro avg       0.60      0.60      0.60       639
weighted avg       0.60      0.60      0.60       639

[[185 135]
 [121 198]]


[32m[I 2021-12-09 23:37:03,751][0m Trial 41 pruned. [0m


Trial 41 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.637469,0.638498,0.637076,0.640549,0.638401
2,0.658600,0.630917,0.677621,0.676289,0.680421,0.677523
2,0.658600,0.620728,0.672926,0.672606,0.673525,0.672879


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.62      0.70      0.66       320
           1       0.66      0.58      0.61       319

    accuracy                           0.64       639
   macro avg       0.64      0.64      0.64       639
weighted avg       0.64      0.64      0.64       639

[[224  96]
 [135 184]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.74      0.70       320
           1       0.70      0.61      0.66       319

    accuracy                           0.68       639
   macro avg       0.68      0.68      0.68       639
weighted avg       0.68      0.68      0.68       639

[[237  83]
 [123 196]]


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.66      0.70      0.68       320
           1       0.68      0.64      0.66       319

    accuracy                           0.67       639
   macro avg       0.67      0.67      0.67       639
weighted avg       0.67      0.67      0.67       639

[[225  95]
 [114 205]]




Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2021-12-09 23:43:32,958][0m Trial 42 finished with value: 0.6726057251281499 and parameters: {'learning_rate': 6.761451951679797e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.0007753063063951709, 'num_train_epochs': 2.337266974809359}. Best is trial 22 with value: 0.6911127087847238.[0m


Trial 42 finished with value: 0.6726057251281499 and parameters: {'learning_rate': 6.761451951679797e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.0007753063063951709, 'num_train_epochs': 2.337266974809359}. Best is trial 22 with value: 0.6911127087847238.


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.655104,0.627543,0.610677,0.654917,0.62787


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.72      0.42      0.53       320
           1       0.59      0.84      0.69       319

    accuracy                           0.63       639
   macro avg       0.65      0.63      0.61       639
weighted avg       0.66      0.63      0.61       639

[[134 186]
 [ 52 267]]


[32m[I 2021-12-09 23:46:17,150][0m Trial 43 pruned. [0m


Trial 43 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,5.940373,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-09 23:49:02,901][0m Trial 44 pruned. [0m


Trial 44 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7049,0.64702,0.618153,0.616416,0.620176,0.61805


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.60      0.68      0.64       320
           1       0.64      0.55      0.59       319

    accuracy                           0.62       639
   macro avg       0.62      0.62      0.62       639
weighted avg       0.62      0.62      0.62       639

[[219 101]
 [143 176]]


[32m[I 2021-12-09 23:52:31,925][0m Trial 45 pruned. [0m


Trial 45 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.690988,0.535211,0.45121,0.592982,0.535825


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.67      0.14      0.24       320
           1       0.52      0.93      0.67       319

    accuracy                           0.54       639
   macro avg       0.59      0.54      0.45       639
weighted avg       0.59      0.54      0.45       639

[[ 46 274]
 [ 23 296]]


[32m[I 2021-12-09 23:55:16,050][0m Trial 46 pruned. [0m


Trial 46 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6889,0.682797,0.5759,0.575197,0.576324,0.575838


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.57      0.62      0.59       320
           1       0.58      0.54      0.56       319

    accuracy                           0.58       639
   macro avg       0.58      0.58      0.58       639
weighted avg       0.58      0.58      0.58       639

[[197 123]
 [148 171]]


[32m[I 2021-12-09 23:58:14,744][0m Trial 47 pruned. [0m


Trial 47 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.693208,0.499218,0.332985,0.249609,0.5


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       320
           1       0.50      1.00      0.67       319

    accuracy                           0.50       639
   macro avg       0.25      0.50      0.33       639
weighted avg       0.25      0.50      0.33       639

[[  0 320]
 [  0 319]]



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-12-10 00:00:59,263][0m Trial 48 pruned. [0m


Trial 48 pruned. 


Trial:
loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1480925a23f7db13cea1c830922dbd4173c2a1ccab8c57cbb36a1ea693164879.01dc297b74ef2153586ff6f1113a3309f339a11f1cef9d887ae2314924e8d17e
Model config RobertaConfig {
  "_name_or_path": "princeton-nlp/sup-simcse-roberta-base",
  "architectures": [
    "RobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.13.0",
  "type_vocab_size": 1,
  "use_cache": tru

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.660782,0.608764,0.608602,0.608991,0.608797


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: __index_level_0__, text.
***** Running Evaluation *****
  Num examples = 639
  Batch size = 8


              precision    recall  f1-score   support

           0       0.61      0.59      0.60       320
           1       0.60      0.63      0.62       319

    accuracy                           0.61       639
   macro avg       0.61      0.61      0.61       639
weighted avg       0.61      0.61      0.61       639

[[188 132]
 [118 201]]


[32m[I 2021-12-10 00:03:43,583][0m Trial 49 pruned. [0m


Trial 49 pruned. 


[32m[I 2021-12-10 00:03:43,622][0m A new study created in memory with name: no-name-11e2e37c-ff52-4eac-9804-b73e45098ff1[0m


A new study created in memory with name: no-name-11e2e37c-ff52-4eac-9804-b73e45098ff1


In [None]:
storage_name

'sqlite:///SIMCSE_RoBERTa_MASK_IE.db'

In [None]:
study_name

'SIMCSE_RoBERTa_MASK_IE'

In [None]:
study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True, direction="maximize")
df = study.trials_dataframe(attrs=("number", "value", "params", "state"))

[32m[I 2021-12-10 00:03:43,720][0m Using an existing study with name 'SIMCSE_RoBERTa_MASK_IE' instead of creating a new one.[0m


Using an existing study with name 'SIMCSE_RoBERTa_MASK_IE' instead of creating a new one.


In [None]:
df

Unnamed: 0,number,value,params_learning_rate,params_num_train_epochs,params_per_device_train_batch_size,params_weight_decay,state
0,0,0.581087,4.215192e-07,1.971006,8,6.152049e-11,COMPLETE
1,1,0.332985,0.2086499,2.320751,16,1.526087e-08,COMPLETE
2,2,0.573493,1.135507e-06,2.081758,16,8.377336e-10,COMPLETE
3,3,0.332985,0.000543934,1.236895,2,2.326873e-10,COMPLETE
4,4,0.656304,3.469887e-06,4.283602,4,0.0001000452,COMPLETE
5,5,0.333681,0.0002498881,1.737044,2,0.0003842662,PRUNED
6,6,0.332985,0.1245444,1.760301,2,1.433569e-09,PRUNED
7,7,0.332985,0.00996071,2.335072,2,1.637649e-10,PRUNED
8,8,0.632701,1.819641e-05,6.864353,4,0.00646607,PRUNED
9,9,0.333681,0.0002834096,1.956022,2,4.112338e-09,PRUNED


In [None]:
fig = optuna.visualization.plot_param_importances(study)
fig.show()

In [None]:
best_run

BestRun(run_id='22', objective=0.6911127087847238, hyperparameters={'learning_rate': 8.325171558473647e-06, 'num_train_epochs': 2.8578485716657727, 'per_device_train_batch_size': 8, 'weight_decay': 0.022140347515650492})

In [None]:
optuna.visualization.plot_intermediate_values(study)

In [None]:
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_contour(study)

In [None]:
optuna.visualization.plot_slice(study)

In [None]:
optuna.visualization.plot_edf(study)