In [None]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-52361079-3dd1-307a-4402-e81341128dc7)


In [None]:
%%capture install_log 

!pip install transformers datasets evaluate optuna

In [None]:
import torch
import optuna
import evaluate
import numpy as np
from transformers import (
    AutoModelForSequenceClassification,
    DataCollatorForLanguageModeling,
    PreTrainedTokenizerFast,
    Trainer,
    TrainingArguments
)
from datasets import load_dataset

In [None]:
tokenizer = PreTrainedTokenizerFast.from_pretrained('gngpostalsrvc/BERiT_2000_custom_architecture_150_epochs_2')

def preprocess(examples):
  
    encoding = tokenizer(examples['Text'], max_length=128, truncation=True, padding=True)
    encoding['labels'] = [[stage] for stage in examples['Stage']]

    return encoding

raw_data = load_dataset('gngpostalsrvc/COHeN')

tokenized_data = raw_data.map(preprocess, batched=True, remove_columns=raw_data['train'].column_names)
tokenized_data.set_format("pt", columns=["input_ids", "attention_mask", "labels"], output_all_columns=True)

Downloading (…)okenizer_config.json:   0%|          | 0.00/51.0 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/110k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/916 [00:00<?, ?B/s]

Downloading and preparing dataset csv/default (download: 1.08 MiB, generated: 2.56 MiB, post-processed: Unknown size, total: 3.64 MiB) to /root/.cache/huggingface/datasets/gngpostalsrvc___parquet/gngpostalsrvc--COHeN-97096b619f4d4787/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/120k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/121k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/890k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating test split:   0%|          | 0/1197 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/1197 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/9574 [00:00<?, ? examples/s]

Dataset parquet downloaded and prepared to /root/.cache/huggingface/datasets/gngpostalsrvc___parquet/gngpostalsrvc--COHeN-97096b619f4d4787/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

Map:   0%|          | 0/1197 [00:00<?, ? examples/s]

Map:   0%|          | 0/1197 [00:00<?, ? examples/s]

Map:   0%|          | 0/9574 [00:00<?, ? examples/s]

In [5]:
def compute_metrics(eval_preds):
    metrics = evaluate.load('accuracy')
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metrics.compute(predictions=predictions, references=labels)


def objective(trial):
  model = AutoModelForSequenceClassification.from_pretrained('gngpostalsrvc/BERiT', num_labels=4)
  batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64, 128])
  args = TrainingArguments(output_dir="opt-test", 
                         evaluation_strategy="epoch",
                         learning_rate=trial.suggest_float('learning_rate', low=4e-5, high=.01),
                         weight_decay=trial.suggest_float('weight_decay', low=4e-5, high=.01),
                         num_train_epochs=3,
                         per_device_train_batch_size=batch_size, 
                         per_device_eval_batch_size=batch_size, 
                         seed=42,
                         disable_tqdm=True
                        )
  
  trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_data['train'],
    eval_dataset=tokenized_data['test'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
  )

  result = trainer.train()

  return result.training_loss

study = optuna.create_study(study_name='hp-search-COHeN', direction='minimize')
study.optimize(func=objective, n_trials=20)

[32m[I 2023-03-05 16:49:39,626][0m A new study created in memory with name: hp-search-COHeN[0m


Downloading (…)lve/main/config.json:   0%|          | 0.00/669 [00:00<?, ?B/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/5.68M [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/pytorch_model.bin
Some weights of the model checkpoint at gngpostalsrvc/BERiT were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from t

{'loss': 1.3993, 'learning_rate': 0.005234378203240644, 'epoch': 0.83}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 16


{'eval_loss': 1.27681303024292, 'eval_accuracy': 0.4093567251461988, 'eval_runtime': 2.5228, 'eval_samples_per_second': 474.479, 'eval_steps_per_second': 29.729, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-1000
Configuration saved in opt-test/checkpoint-1000/config.json
Model weights saved in opt-test/checkpoint-1000/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-1000/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-1000/special_tokens_map.json


{'loss': 1.3452, 'learning_rate': 0.0032164991734639887, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 16


{'eval_loss': 1.2064297199249268, 'eval_accuracy': 0.4578111946532999, 'eval_runtime': 1.85, 'eval_samples_per_second': 647.011, 'eval_steps_per_second': 40.54, 'epoch': 2.0}


Saving model checkpoint to opt-test/checkpoint-1500
Configuration saved in opt-test/checkpoint-1500/config.json
Model weights saved in opt-test/checkpoint-1500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-1500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-1500/special_tokens_map.json


{'loss': 1.3034, 'learning_rate': 0.0011986201436873334, 'epoch': 2.5}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:50:32,171][0m Trial 0 finished with value: 1.3324641003234559 and parameters: {'batch_size': 16, 'learning_rate': 0.0072522572330173, 'weight_decay': 0.007029157880518676}. Best is trial 0 with value: 1.3324641003234559.[0m


{'eval_loss': 1.1374976634979248, 'eval_accuracy': 0.49373433583959897, 'eval_runtime': 1.8652, 'eval_samples_per_second': 641.755, 'eval_steps_per_second': 40.21, 'epoch': 3.0}
{'train_runtime': 49.7997, 'train_samples_per_second': 576.75, 'train_steps_per_second': 36.085, 'train_loss': 1.3324641003234559, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.0129808187484741, 'eval_accuracy': 0.568922305764411, 'eval_runtime': 1.7635, 'eval_samples_per_second': 678.751, 'eval_steps_per_second': 10.774, 'epoch': 1.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


{'eval_loss': 0.8812138438224792, 'eval_accuracy': 0.6382623224728488, 'eval_runtime': 1.7371, 'eval_samples_per_second': 689.062, 'eval_steps_per_second': 10.937, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:51:11,596][0m Trial 1 finished with value: 1.0931613498263888 and parameters: {'batch_size': 64, 'learning_rate': 0.001330810111532763, 'weight_decay': 0.00572849123009566}. Best is trial 1 with value: 1.0931613498263888.[0m


{'eval_loss': 0.8670345544815063, 'eval_accuracy': 0.6449456975772765, 'eval_runtime': 1.7281, 'eval_samples_per_second': 692.67, 'eval_steps_per_second': 10.995, 'epoch': 3.0}
{'train_runtime': 38.9973, 'train_samples_per_second': 736.513, 'train_steps_per_second': 11.539, 'train_loss': 1.0931613498263888, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'loss': 1.5465, 'learning_rate': 0.008184938995544847, 'epoch': 0.42}


Saving model checkpoint to opt-test/checkpoint-1000
Configuration saved in opt-test/checkpoint-1000/config.json
Model weights saved in opt-test/checkpoint-1000/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-1000/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-1000/special_tokens_map.json


{'loss': 1.4951, 'learning_rate': 0.006860943687304011, 'epoch': 0.84}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 8


{'eval_loss': 1.3343666791915894, 'eval_accuracy': 0.3817878028404344, 'eval_runtime': 2.6951, 'eval_samples_per_second': 444.137, 'eval_steps_per_second': 55.656, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-1500
Configuration saved in opt-test/checkpoint-1500/config.json
Model weights saved in opt-test/checkpoint-1500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-1500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-1500/special_tokens_map.json


{'loss': 1.4294, 'learning_rate': 0.005536948379063176, 'epoch': 1.25}


Saving model checkpoint to opt-test/checkpoint-2000
Configuration saved in opt-test/checkpoint-2000/config.json
Model weights saved in opt-test/checkpoint-2000/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-2000/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-2000/special_tokens_map.json


{'loss': 1.4063, 'learning_rate': 0.0042129530708223395, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 8


{'eval_loss': 1.2973289489746094, 'eval_accuracy': 0.3533834586466165, 'eval_runtime': 2.0553, 'eval_samples_per_second': 582.406, 'eval_steps_per_second': 72.983, 'epoch': 2.0}


Saving model checkpoint to opt-test/checkpoint-2500
Configuration saved in opt-test/checkpoint-2500/config.json
Model weights saved in opt-test/checkpoint-2500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-2500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-2500/special_tokens_map.json


{'loss': 1.3698, 'learning_rate': 0.0028889577625815033, 'epoch': 2.09}


Saving model checkpoint to opt-test/checkpoint-3000
Configuration saved in opt-test/checkpoint-3000/config.json
Model weights saved in opt-test/checkpoint-3000/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-3000/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-3000/special_tokens_map.json


{'loss': 1.3258, 'learning_rate': 0.0015649624543406678, 'epoch': 2.51}


Saving model checkpoint to opt-test/checkpoint-3500
Configuration saved in opt-test/checkpoint-3500/config.json
Model weights saved in opt-test/checkpoint-3500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-3500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-3500/special_tokens_map.json


{'loss': 1.3075, 'learning_rate': 0.0002409671460998321, 'epoch': 2.92}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:52:18,198][0m Trial 2 finished with value: 1.4082931437820354 and parameters: {'batch_size': 8, 'learning_rate': 0.009508934303785683, 'weight_decay': 0.0032297273160342265}. Best is trial 1 with value: 1.0931613498263888.[0m


{'eval_loss': 1.228412389755249, 'eval_accuracy': 0.454469507101086, 'eval_runtime': 2.102, 'eval_samples_per_second': 569.466, 'eval_steps_per_second': 71.362, 'epoch': 3.0}
{'train_runtime': 66.1683, 'train_samples_per_second': 434.075, 'train_steps_per_second': 54.271, 'train_loss': 1.4082931437820354, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 0.9793399572372437, 'eval_accuracy': 0.5973266499582289, 'eval_runtime': 2.3168, 'eval_samples_per_second': 516.664, 'eval_steps_per_second': 4.316, 'epoch': 1.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 128


{'eval_loss': 0.804386556148529, 'eval_accuracy': 0.6892230576441103, 'eval_runtime': 2.1088, 'eval_samples_per_second': 567.626, 'eval_steps_per_second': 4.742, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 128


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:52:52,433][0m Trial 3 finished with value: 1.0458947075737848 and parameters: {'batch_size': 128, 'learning_rate': 0.0037823661822296854, 'weight_decay': 0.008199998128890522}. Best is trial 3 with value: 1.0458947075737848.[0m


{'eval_loss': 0.7615264654159546, 'eval_accuracy': 0.7134502923976608, 'eval_runtime': 1.7089, 'eval_samples_per_second': 700.469, 'eval_steps_per_second': 5.852, 'epoch': 3.0}
{'train_runtime': 33.7945, 'train_samples_per_second': 849.902, 'train_steps_per_second': 6.658, 'train_loss': 1.0458947075737848, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'loss': 1.3092, 'learning_rate': 0.002716816541516378, 'epoch': 0.83}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 16


{'eval_loss': 1.1558396816253662, 'eval_accuracy': 0.48872180451127817, 'eval_runtime': 2.657, 'eval_samples_per_second': 450.51, 'eval_steps_per_second': 28.227, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-1000
Configuration saved in opt-test/checkpoint-1000/config.json
Model weights saved in opt-test/checkpoint-1000/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-1000/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-1000/special_tokens_map.json


{'loss': 1.2182, 'learning_rate': 0.001669470149258715, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 16


{'eval_loss': 1.019949197769165, 'eval_accuracy': 0.5405179615705932, 'eval_runtime': 1.8509, 'eval_samples_per_second': 646.712, 'eval_steps_per_second': 40.521, 'epoch': 2.0}


Saving model checkpoint to opt-test/checkpoint-1500
Configuration saved in opt-test/checkpoint-1500/config.json
Model weights saved in opt-test/checkpoint-1500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-1500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-1500/special_tokens_map.json


{'loss': 1.1226, 'learning_rate': 0.0006221237570010519, 'epoch': 2.5}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:53:42,837][0m Trial 4 finished with value: 1.190702120993757 and parameters: {'batch_size': 16, 'learning_rate': 0.0037641629337740413, 'weight_decay': 0.0034939299191096953}. Best is trial 3 with value: 1.0458947075737848.[0m


{'eval_loss': 0.9604802131652832, 'eval_accuracy': 0.5772765246449457, 'eval_runtime': 1.8554, 'eval_samples_per_second': 645.144, 'eval_steps_per_second': 40.423, 'epoch': 3.0}
{'train_runtime': 49.9684, 'train_samples_per_second': 574.803, 'train_steps_per_second': 35.963, 'train_loss': 1.190702120993757, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.0737048387527466, 'eval_accuracy': 0.5071010860484545, 'eval_runtime': 1.7075, 'eval_samples_per_second': 701.027, 'eval_steps_per_second': 5.857, 'epoch': 1.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 128


{'eval_loss': 0.8680345416069031, 'eval_accuracy': 0.6357560568086884, 'eval_runtime': 1.7446, 'eval_samples_per_second': 686.118, 'eval_steps_per_second': 5.732, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 128


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:54:19,295][0m Trial 5 finished with value: 1.1131176079644096 and parameters: {'batch_size': 128, 'learning_rate': 0.008490164173397822, 'weight_decay': 0.0003297536762993297}. Best is trial 3 with value: 1.0458947075737848.[0m


{'eval_loss': 0.8010269999504089, 'eval_accuracy': 0.6825396825396826, 'eval_runtime': 2.3198, 'eval_samples_per_second': 515.993, 'eval_steps_per_second': 4.311, 'epoch': 3.0}
{'train_runtime': 36.0157, 'train_samples_per_second': 797.485, 'train_steps_per_second': 6.247, 'train_loss': 1.1131176079644096, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 0.9577077627182007, 'eval_accuracy': 0.6106934001670844, 'eval_runtime': 2.0976, 'eval_samples_per_second': 570.66, 'eval_steps_per_second': 18.116, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-500
Configuration saved in opt-test/checkpoint-500/config.json
Model weights saved in opt-test/checkpoint-500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-500/special_tokens_map.json


{'loss': 1.115, 'learning_rate': 0.0008670800786538124, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


{'eval_loss': 0.808030903339386, 'eval_accuracy': 0.6775271512113618, 'eval_runtime': 1.9866, 'eval_samples_per_second': 602.528, 'eval_steps_per_second': 19.128, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:55:00,857][0m Trial 6 finished with value: 1.0253081936306423 and parameters: {'batch_size': 32, 'learning_rate': 0.001950930176971078, 'weight_decay': 0.007649742251688652}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 0.7304974794387817, 'eval_accuracy': 0.7184628237259816, 'eval_runtime': 1.9614, 'eval_samples_per_second': 610.292, 'eval_steps_per_second': 19.374, 'epoch': 3.0}
{'train_runtime': 41.1243, 'train_samples_per_second': 698.418, 'train_steps_per_second': 21.885, 'train_loss': 1.0253081936306423, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 0.9491210579872131, 'eval_accuracy': 0.6123642439431913, 'eval_runtime': 2.19, 'eval_samples_per_second': 546.577, 'eval_steps_per_second': 8.676, 'epoch': 1.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


{'eval_loss': 0.7972204089164734, 'eval_accuracy': 0.7034252297410192, 'eval_runtime': 2.4542, 'eval_samples_per_second': 487.741, 'eval_steps_per_second': 7.742, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:55:37,957][0m Trial 7 finished with value: 1.0445826551649307 and parameters: {'batch_size': 64, 'learning_rate': 0.0043985276086475926, 'weight_decay': 0.00027089057483651045}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 0.7274888753890991, 'eval_accuracy': 0.7376775271512114, 'eval_runtime': 2.0447, 'eval_samples_per_second': 585.425, 'eval_steps_per_second': 9.292, 'epoch': 3.0}
{'train_runtime': 36.6608, 'train_samples_per_second': 783.454, 'train_steps_per_second': 12.275, 'train_loss': 1.0445826551649307, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'loss': 1.5618, 'learning_rate': 0.008425511214532925, 'epoch': 0.42}


Saving model checkpoint to opt-test/checkpoint-1000
Configuration saved in opt-test/checkpoint-1000/config.json
Model weights saved in opt-test/checkpoint-1000/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-1000/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-1000/special_tokens_map.json


{'loss': 1.5068, 'learning_rate': 0.007062600956601361, 'epoch': 0.84}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 8


{'eval_loss': 1.4408103227615356, 'eval_accuracy': 0.2807017543859649, 'eval_runtime': 2.0457, 'eval_samples_per_second': 585.121, 'eval_steps_per_second': 73.323, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-1500
Configuration saved in opt-test/checkpoint-1500/config.json
Model weights saved in opt-test/checkpoint-1500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-1500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-1500/special_tokens_map.json


{'loss': 1.4681, 'learning_rate': 0.0056996906986697985, 'epoch': 1.25}


Saving model checkpoint to opt-test/checkpoint-2000
Configuration saved in opt-test/checkpoint-2000/config.json
Model weights saved in opt-test/checkpoint-2000/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-2000/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-2000/special_tokens_map.json


{'loss': 1.4284, 'learning_rate': 0.004336780440738234, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 8


{'eval_loss': 1.2848174571990967, 'eval_accuracy': 0.36006683375104426, 'eval_runtime': 2.1205, 'eval_samples_per_second': 564.499, 'eval_steps_per_second': 70.739, 'epoch': 2.0}


Saving model checkpoint to opt-test/checkpoint-2500
Configuration saved in opt-test/checkpoint-2500/config.json
Model weights saved in opt-test/checkpoint-2500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-2500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-2500/special_tokens_map.json


{'loss': 1.3835, 'learning_rate': 0.0029738701828066713, 'epoch': 2.09}


Saving model checkpoint to opt-test/checkpoint-3000
Configuration saved in opt-test/checkpoint-3000/config.json
Model weights saved in opt-test/checkpoint-3000/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-3000/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-3000/special_tokens_map.json


{'loss': 1.3505, 'learning_rate': 0.0016109599248751078, 'epoch': 2.51}


Saving model checkpoint to opt-test/checkpoint-3500
Configuration saved in opt-test/checkpoint-3500/config.json
Model weights saved in opt-test/checkpoint-3500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-3500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-3500/special_tokens_map.json


{'loss': 1.324, 'learning_rate': 0.0002480496669435445, 'epoch': 2.92}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:56:43,830][0m Trial 8 finished with value: 1.4286482132965062 and parameters: {'batch_size': 8, 'learning_rate': 0.009788421472464488, 'weight_decay': 0.0014730071705606927}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 1.248661994934082, 'eval_accuracy': 0.4319131161236424, 'eval_runtime': 2.07, 'eval_samples_per_second': 578.267, 'eval_steps_per_second': 72.464, 'epoch': 3.0}
{'train_runtime': 65.3764, 'train_samples_per_second': 439.333, 'train_steps_per_second': 54.928, 'train_loss': 1.4286482132965062, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.1214994192123413, 'eval_accuracy': 0.5405179615705932, 'eval_runtime': 1.7709, 'eval_samples_per_second': 675.937, 'eval_steps_per_second': 21.458, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-500
Configuration saved in opt-test/checkpoint-500/config.json
Model weights saved in opt-test/checkpoint-500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-500/special_tokens_map.json


{'loss': 1.2125, 'learning_rate': 0.0021481494886553152, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


{'eval_loss': 0.8886598348617554, 'eval_accuracy': 0.6491228070175439, 'eval_runtime': 1.7997, 'eval_samples_per_second': 665.123, 'eval_steps_per_second': 21.115, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:57:25,281][0m Trial 9 finished with value: 1.10341305202908 and parameters: {'batch_size': 32, 'learning_rate': 0.0048333363494744595, 'weight_decay': 0.005670945427929289}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 0.7928232550621033, 'eval_accuracy': 0.6850459482038429, 'eval_runtime': 1.8164, 'eval_samples_per_second': 658.996, 'eval_steps_per_second': 20.921, 'epoch': 3.0}
{'train_runtime': 41.0087, 'train_samples_per_second': 700.388, 'train_steps_per_second': 21.947, 'train_loss': 1.10341305202908, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.3222905397415161, 'eval_accuracy': 0.4452798663324979, 'eval_runtime': 1.8043, 'eval_samples_per_second': 663.425, 'eval_steps_per_second': 21.061, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-500
Configuration saved in opt-test/checkpoint-500/config.json
Model weights saved in opt-test/checkpoint-500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-500/special_tokens_map.json


{'loss': 1.3476, 'learning_rate': 3.084038117453277e-05, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


{'eval_loss': 1.239760398864746, 'eval_accuracy': 0.4394319131161236, 'eval_runtime': 1.8188, 'eval_samples_per_second': 658.128, 'eval_steps_per_second': 20.893, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:58:05,917][0m Trial 10 finished with value: 1.3105258178710937 and parameters: {'batch_size': 32, 'learning_rate': 6.939085764269873e-05, 'weight_decay': 0.009495696149381526}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 1.2284375429153442, 'eval_accuracy': 0.4452798663324979, 'eval_runtime': 1.8117, 'eval_samples_per_second': 660.712, 'eval_steps_per_second': 20.975, 'epoch': 3.0}
{'train_runtime': 40.1522, 'train_samples_per_second': 715.329, 'train_steps_per_second': 22.415, 'train_loss': 1.3105258178710937, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.0283247232437134, 'eval_accuracy': 0.5789473684210527, 'eval_runtime': 1.7453, 'eval_samples_per_second': 685.846, 'eval_steps_per_second': 10.886, 'epoch': 1.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


{'eval_loss': 0.835058331489563, 'eval_accuracy': 0.6641604010025063, 'eval_runtime': 1.7316, 'eval_samples_per_second': 691.26, 'eval_steps_per_second': 10.972, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:58:43,721][0m Trial 11 finished with value: 1.0532293701171875 and parameters: {'batch_size': 64, 'learning_rate': 0.0023967333518982313, 'weight_decay': 0.0036558052101246423}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 0.7766752243041992, 'eval_accuracy': 0.6992481203007519, 'eval_runtime': 1.7371, 'eval_samples_per_second': 689.067, 'eval_steps_per_second': 10.938, 'epoch': 3.0}
{'train_runtime': 37.3236, 'train_samples_per_second': 769.539, 'train_steps_per_second': 12.057, 'train_loss': 1.0532293701171875, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.1547768115997314, 'eval_accuracy': 0.49707602339181284, 'eval_runtime': 1.7768, 'eval_samples_per_second': 673.682, 'eval_steps_per_second': 21.387, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-500
Configuration saved in opt-test/checkpoint-500/config.json
Model weights saved in opt-test/checkpoint-500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-500/special_tokens_map.json


{'loss': 1.2767, 'learning_rate': 0.0026859036303730674, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


{'eval_loss': 1.1116973161697388, 'eval_accuracy': 0.5121136173767753, 'eval_runtime': 1.878, 'eval_samples_per_second': 637.366, 'eval_steps_per_second': 20.234, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 16:59:25,272][0m Trial 12 finished with value: 1.2152385457356771 and parameters: {'batch_size': 32, 'learning_rate': 0.006043283168339402, 'weight_decay': 0.007290632407032851}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 1.0361379384994507, 'eval_accuracy': 0.5664160401002506, 'eval_runtime': 1.7767, 'eval_samples_per_second': 673.74, 'eval_steps_per_second': 21.389, 'epoch': 3.0}
{'train_runtime': 41.0891, 'train_samples_per_second': 699.018, 'train_steps_per_second': 21.904, 'train_loss': 1.2152385457356771, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.0106077194213867, 'eval_accuracy': 0.5831244778613199, 'eval_runtime': 1.739, 'eval_samples_per_second': 688.319, 'eval_steps_per_second': 10.926, 'epoch': 1.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


{'eval_loss': 0.8403245806694031, 'eval_accuracy': 0.6700083542188805, 'eval_runtime': 1.7403, 'eval_samples_per_second': 687.812, 'eval_steps_per_second': 10.918, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 17:00:04,012][0m Trial 13 finished with value: 1.0479649522569445 and parameters: {'batch_size': 64, 'learning_rate': 0.0025119036826251875, 'weight_decay': 0.00011464331705632672}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 0.7568548917770386, 'eval_accuracy': 0.7176274018379282, 'eval_runtime': 1.7261, 'eval_samples_per_second': 693.474, 'eval_steps_per_second': 11.008, 'epoch': 3.0}
{'train_runtime': 38.2881, 'train_samples_per_second': 750.155, 'train_steps_per_second': 11.753, 'train_loss': 1.0479649522569445, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.084716558456421, 'eval_accuracy': 0.5271512113617377, 'eval_runtime': 1.8003, 'eval_samples_per_second': 664.875, 'eval_steps_per_second': 21.107, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-500
Configuration saved in opt-test/checkpoint-500/config.json
Model weights saved in opt-test/checkpoint-500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-500/special_tokens_map.json


{'loss': 1.2434, 'learning_rate': 0.0024120922310264734, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


{'eval_loss': 0.9552063941955566, 'eval_accuracy': 0.5914786967418546, 'eval_runtime': 1.7764, 'eval_samples_per_second': 673.842, 'eval_steps_per_second': 21.392, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 17:00:45,878][0m Trial 14 finished with value: 1.1480138821072048 and parameters: {'batch_size': 32, 'learning_rate': 0.005427207519809565, 'weight_decay': 0.009860776929397401}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 0.8361523747444153, 'eval_accuracy': 0.6516290726817042, 'eval_runtime': 1.8202, 'eval_samples_per_second': 657.615, 'eval_steps_per_second': 20.877, 'epoch': 3.0}
{'train_runtime': 41.4115, 'train_samples_per_second': 693.576, 'train_steps_per_second': 21.733, 'train_loss': 1.1480138821072048, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.022154688835144, 'eval_accuracy': 0.5864661654135338, 'eval_runtime': 1.7165, 'eval_samples_per_second': 697.338, 'eval_steps_per_second': 11.069, 'epoch': 1.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


{'eval_loss': 0.7917675375938416, 'eval_accuracy': 0.7000835421888053, 'eval_runtime': 2.117, 'eval_samples_per_second': 565.424, 'eval_steps_per_second': 8.975, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 64


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 17:01:26,660][0m Trial 15 finished with value: 1.0415825737847222 and parameters: {'batch_size': 64, 'learning_rate': 0.0038487109526380526, 'weight_decay': 0.004897398355058994}. Best is trial 6 with value: 1.0253081936306423.[0m


{'eval_loss': 0.7296342253684998, 'eval_accuracy': 0.7326649958228906, 'eval_runtime': 1.7554, 'eval_samples_per_second': 681.905, 'eval_steps_per_second': 10.824, 'epoch': 3.0}
{'train_runtime': 40.3278, 'train_samples_per_second': 712.214, 'train_steps_per_second': 11.159, 'train_loss': 1.0415825737847222, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 0.930658757686615, 'eval_accuracy': 0.6282372598162071, 'eval_runtime': 1.7913, 'eval_samples_per_second': 668.222, 'eval_steps_per_second': 21.213, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-500
Configuration saved in opt-test/checkpoint-500/config.json
Model weights saved in opt-test/checkpoint-500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-500/special_tokens_map.json


{'loss': 1.111, 'learning_rate': 0.00121917744310983, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


{'eval_loss': 0.7783859968185425, 'eval_accuracy': 0.7000835421888053, 'eval_runtime': 1.7852, 'eval_samples_per_second': 670.508, 'eval_steps_per_second': 21.286, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 17:02:08,576][0m Trial 16 finished with value: 1.0100221082899306 and parameters: {'batch_size': 32, 'learning_rate': 0.0027431492469971175, 'weight_decay': 0.004900150335195089}. Best is trial 16 with value: 1.0100221082899306.[0m


{'eval_loss': 0.7056787014007568, 'eval_accuracy': 0.7301587301587301, 'eval_runtime': 1.7986, 'eval_samples_per_second': 665.514, 'eval_steps_per_second': 21.127, 'epoch': 3.0}
{'train_runtime': 41.4601, 'train_samples_per_second': 692.763, 'train_steps_per_second': 21.708, 'train_loss': 1.0100221082899306, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 0.9249110817909241, 'eval_accuracy': 0.6165413533834586, 'eval_runtime': 1.7944, 'eval_samples_per_second': 667.086, 'eval_steps_per_second': 21.177, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-500
Configuration saved in opt-test/checkpoint-500/config.json
Model weights saved in opt-test/checkpoint-500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-500/special_tokens_map.json


{'loss': 1.1138, 'learning_rate': 0.0010398614716122443, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


{'eval_loss': 0.7815350294113159, 'eval_accuracy': 0.6716791979949874, 'eval_runtime': 1.774, 'eval_samples_per_second': 674.737, 'eval_steps_per_second': 21.42, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 17:02:50,560][0m Trial 17 finished with value: 1.0171932305230034 and parameters: {'batch_size': 32, 'learning_rate': 0.00233968831112755, 'weight_decay': 0.0068614372121561535}. Best is trial 16 with value: 1.0100221082899306.[0m


{'eval_loss': 0.7292386889457703, 'eval_accuracy': 0.7151211361737677, 'eval_runtime': 1.8138, 'eval_samples_per_second': 659.954, 'eval_steps_per_second': 20.951, 'epoch': 3.0}
{'train_runtime': 41.5276, 'train_samples_per_second': 691.637, 'train_steps_per_second': 21.672, 'train_loss': 1.0171932305230034, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 1.0313823223114014, 'eval_accuracy': 0.5647451963241437, 'eval_runtime': 1.8176, 'eval_samples_per_second': 658.565, 'eval_steps_per_second': 20.907, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-500
Configuration saved in opt-test/checkpoint-500/config.json
Model weights saved in opt-test/checkpoint-500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-500/special_tokens_map.json


{'loss': 1.1652, 'learning_rate': 0.0002635496500834576, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


{'eval_loss': 0.9588459730148315, 'eval_accuracy': 0.6215538847117794, 'eval_runtime': 1.8634, 'eval_samples_per_second': 642.359, 'eval_steps_per_second': 20.392, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 17:03:31,447][0m Trial 18 finished with value: 1.1151045735677083 and parameters: {'batch_size': 32, 'learning_rate': 0.0005929867126877797, 'weight_decay': 0.006269757810787477}. Best is trial 16 with value: 1.0100221082899306.[0m


{'eval_loss': 0.9213922023773193, 'eval_accuracy': 0.6357560568086884, 'eval_runtime': 1.8014, 'eval_samples_per_second': 664.474, 'eval_steps_per_second': 21.094, 'epoch': 3.0}
{'train_runtime': 40.3856, 'train_samples_per_second': 711.194, 'train_steps_per_second': 22.285, 'train_loss': 1.1151045735677083, 'epoch': 3.0}


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--gngpostalsrvc--BERiT/snapshots/5799f2933c5845e55984f628ee81b985752c3897/config.json
Model config RobertaConfig {
  "_name_or_path": "gngpostalsrvc/BERiT",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.5,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.5,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 128,
  "model_type": "roberta",
  "num_attention_heads": 4,
  "num_hidden_layers": 1,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",

{'eval_loss': 0.9311962723731995, 'eval_accuracy': 0.6157059314954052, 'eval_runtime': 2.2046, 'eval_samples_per_second': 542.948, 'eval_steps_per_second': 17.236, 'epoch': 1.0}


Saving model checkpoint to opt-test/checkpoint-500
Configuration saved in opt-test/checkpoint-500/config.json
Model weights saved in opt-test/checkpoint-500/pytorch_model.bin
tokenizer config file saved in opt-test/checkpoint-500/tokenizer_config.json
Special tokens file saved in opt-test/checkpoint-500/special_tokens_map.json


{'loss': 1.1347, 'learning_rate': 0.001438975144983106, 'epoch': 1.67}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


{'eval_loss': 0.8189311027526855, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 2.4248, 'eval_samples_per_second': 493.649, 'eval_steps_per_second': 15.671, 'epoch': 2.0}


***** Running Evaluation *****
  Num examples = 1197
  Batch size = 32


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m[I 2023-03-05 17:04:12,729][0m Trial 19 finished with value: 1.0276928032769097 and parameters: {'batch_size': 32, 'learning_rate': 0.0032376940762119883, 'weight_decay': 0.005012474188757684}. Best is trial 16 with value: 1.0100221082899306.[0m


{'eval_loss': 0.7353609204292297, 'eval_accuracy': 0.720969089390142, 'eval_runtime': 2.4489, 'eval_samples_per_second': 488.799, 'eval_steps_per_second': 15.517, 'epoch': 3.0}
{'train_runtime': 40.7823, 'train_samples_per_second': 704.277, 'train_steps_per_second': 22.068, 'train_loss': 1.0276928032769097, 'epoch': 3.0}


{'batch_size': 32, 'learning_rate': 0.0027431492469971175, 'weight_decay': 0.004900150335195089}