In [24]:
import pandas as pd
import torch
import os

from torch.utils.data import Dataset
from transformers import (BertTokenizer, BertForSequenceClassification, Trainer,
                          TrainingArguments, BertPreTrainedModel)
# from simpletransformers.language_modeling import LanguageModelingModel
from sklearn.metrics import accuracy_score, f1_score


In [25]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [26]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert'
TRAIN_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/youtube_sentiment/sentiment_train.tsv'
TEST_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/youtube_sentiment/sentiment_test.tsv'
EVAL_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/youtube_sentiment/sentiment_dev.tsv'


In [27]:
def tsv_to_text(tsv_file_loc):
    file_name = tsv_file_loc.split("/")[-1].split(".")[0]
    txt_name = tsv_file_loc.replace(".tsv", ".txt")
    txt_name = txt_name.replace("split_merged", "texts")

    if os.path.exists(txt_name):
        return [txt_name, file_name]

    df = pd.read_csv(tsv_file_loc, sep="\t")

    for txt in df["text"]:
        with open(txt_name, "a", encoding="utf8") as f:
            f.writelines(txt + "\n")
    return [txt_name, file_name]


tsv_to_text(TRAIN_FILE_LOC)


['/home/abdullah/Code/dl/bnlp-resources/sentiment/youtube_sentiment/sentiment_train.txt',
 'sentiment_train']

In [28]:
def tsv_to_df(csv_file_loc):
    df = pd.read_csv(csv_file_loc, sep='\t')
    # remove id column
    df = df.drop(columns=['id'])

    # replace neutral with 0, positive with 1, negative with 2 in class_label column
    df.loc[df['class_label'] == 'Neutral', 'class_label'] = 0
    df.loc[df['class_label'] == 'Positive', 'class_label'] = 1
    df.loc[df['class_label'] == 'Negative', 'class_label'] = 2

    return df


train_df = tsv_to_df(TEST_FILE_LOC)

texts = train_df['text'].tolist()
print(train_df)


                                                  text class_label
0     Md খাইছি তােরে oiiii bolder bacha thiki to bolce           2
1            এগিয়ে জান তালাশ টিম আমরা আছি আপনাদের পাশে           1
2         shakil ahmed মাইরালা অামারে হানি খাই মরি রাই           0
3                                             ঘেউ!  :P           2
4                          মাল যথার্থ ঠিক কথাই বলেছেন।           1
..                                                 ...         ...
415               ওরে টাইগার, আমরা তোদের ছাগল মনে করি,           2
416  গান টা মোটামুটি ভালোই লাগছিলো নুসরাত ফারিয়ার গ...           0
417  এমন কিচু জানোয়ার জন্ম নিয়ে সে সাধারন মানুষ কে ...           2
418  সবি ভালো লেগেছে নাটকের তবে ছাত্রীর ঢং করে কথা ...           0
419  ইমরানরে জুতা খুলে মুজা দিয়ে পিটানো উচিৎ। শালার...           2

[420 rows x 2 columns]


In [29]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  return {
      'accuracy': acc,
  }


In [30]:
def f1_calculator(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  f1 = f1_score(labels, preds, average='weighted')
  return {
      'f1': f1,
  }


In [31]:
train_list = tsv_to_text(TRAIN_FILE_LOC)
test_list = tsv_to_text(TEST_FILE_LOC)
eval_list = tsv_to_text(EVAL_FILE_LOC)


In [32]:
os.environ["WANDB_DISABLED"] = "true"

train_df = tsv_to_df(TRAIN_FILE_LOC)
test_df = tsv_to_df(TEST_FILE_LOC)
eval_df = tsv_to_df(EVAL_FILE_LOC)


class TINYDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(value[idx])
                for key, value in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

train_encodings = tokenizer(train_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
test_encodings = tokenizer(test_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
eval_encodings = tokenizer(eval_df['text'].tolist(
), padding=True, truncation=True, max_length=512)

train_dataset = TINYDataset(train_encodings, train_df['class_label'].tolist())
test_dataset = TINYDataset(test_encodings, test_df['class_label'].tolist())
eval_dataset = TINYDataset(eval_encodings, eval_df['class_label'].tolist())

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14


Didn't find file /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert/added_tokens.json. We won't load it.
Didn't find file /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert/tokenizer.json. We won't load it.
loading file /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert/vocab.txt
loading file None
loading file /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert/special_tokens_map.json
loading file /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert/tokenizer_config.json
loading file None
 44%|████▍     | 488/1100 [02:34<03:13,  3.16it/s]
 25%|██▌       | 279/1100 [01:03<03:07,  4.37it/s]
loading configuration file /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout"

## 1 Epoch Train

In [33]:
# training_args = TrainingArguments(
#     output_dir=f"temp",
#     num_train_epochs=1,
#     per_device_train_batch_size=48,
#     per_device_eval_batch_size=96,
#     warmup_steps=500,
#     learning_rate=5e-5,
#     weight_decay=0.01,
#     overwrite_output_dir=True,
#     logging_dir=f"temp/logs",
#     logging_steps=29,
#     save_steps=29,
#     load_best_model_at_end=True,
#     evaluation_strategy="steps",
#     seed=14,
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=eval_dataset

# )

# trainer.train()


## 100 Epoch with 1 epoch model

In [34]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/1/youtube_sentiment'
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14

for name, param in model.named_parameters():
    if 'bert.encoder.layer.0' in name:
        # print(name, param.requires_grad)
        param.requires_grad = False
# print("\n\n\n")
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=100,
    save_steps=100,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=f1_calculator,

)

trainer.train()

# print(cool.metrics)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,

)

trainer.train()


loading configuration file /home/abdullah/Code/dl/499A/best_models/1/youtube_sentiment/config.json
Model config BertConfig {
  "_name_or_path": "/home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 14,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
  "use_cache": true,
  "voc

{'loss': 108.1997, 'learning_rate': 1e-05, 'epoch': 9.09}



                                                  

  9%|▉         | 100/1100 [00:16<02:26,  6.81it/s]
[ASaving model checkpoint to temp/checkpoint-100
Configuration saved in temp/checkpoint-100/config.json


{'eval_loss': 64.85067749023438, 'eval_f1': 0.3875538939978081, 'eval_runtime': 0.1678, 'eval_samples_per_second': 2497.032, 'eval_steps_per_second': 11.919, 'epoch': 9.09}


Model weights saved in temp/checkpoint-100/pytorch_model.bin
                                                  

 18%|█▊        | 200/1100 [00:34<02:18,  6.51it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 85.8482, 'learning_rate': 2e-05, 'epoch': 18.18}



                                                  

 18%|█▊        | 200/1100 [00:34<02:18,  6.51it/s]
[ASaving model checkpoint to temp/checkpoint-200
Configuration saved in temp/checkpoint-200/config.json


{'eval_loss': 50.62283706665039, 'eval_f1': 0.39969862806206086, 'eval_runtime': 0.168, 'eval_samples_per_second': 2493.797, 'eval_steps_per_second': 11.904, 'epoch': 18.18}


Model weights saved in temp/checkpoint-200/pytorch_model.bin
                                                  

 27%|██▋       | 300/1100 [00:51<02:07,  6.30it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 75.5295, 'learning_rate': 3e-05, 'epoch': 27.27}



                                                  

 27%|██▋       | 300/1100 [00:52<02:07,  6.30it/s]
[ASaving model checkpoint to temp/checkpoint-300
Configuration saved in temp/checkpoint-300/config.json


{'eval_loss': 48.24852752685547, 'eval_f1': 0.39695845109640737, 'eval_runtime': 0.1616, 'eval_samples_per_second': 2593.359, 'eval_steps_per_second': 12.379, 'epoch': 27.27}


Model weights saved in temp/checkpoint-300/pytorch_model.bin
                                                  

 36%|███▋      | 400/1100 [01:09<01:56,  5.99it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 68.9076, 'learning_rate': 4e-05, 'epoch': 36.36}



                                                  

 36%|███▋      | 400/1100 [01:09<01:56,  5.99it/s]
[ASaving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 38.45890426635742, 'eval_f1': 0.41808969654283284, 'eval_runtime': 0.1668, 'eval_samples_per_second': 2511.516, 'eval_steps_per_second': 11.988, 'epoch': 36.36}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
                                                  

 45%|████▌     | 500/1100 [01:26<01:42,  5.86it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 60.8213, 'learning_rate': 5e-05, 'epoch': 45.45}



                                                  

 45%|████▌     | 500/1100 [01:26<01:42,  5.86it/s]
[ASaving model checkpoint to temp/checkpoint-500
Configuration saved in temp/checkpoint-500/config.json


{'eval_loss': 30.985538482666016, 'eval_f1': 0.41455433552976023, 'eval_runtime': 0.1636, 'eval_samples_per_second': 2560.976, 'eval_steps_per_second': 12.224, 'epoch': 45.45}


Model weights saved in temp/checkpoint-500/pytorch_model.bin
                                                  

 55%|█████▍    | 600/1100 [01:43<01:23,  5.97it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 53.4131, 'learning_rate': 4.166666666666667e-05, 'epoch': 54.55}



                                                  

 55%|█████▍    | 600/1100 [01:43<01:23,  5.97it/s]
[ASaving model checkpoint to temp/checkpoint-600
Configuration saved in temp/checkpoint-600/config.json


{'eval_loss': 25.73398780822754, 'eval_f1': 0.46386997438918826, 'eval_runtime': 0.1635, 'eval_samples_per_second': 2563.116, 'eval_steps_per_second': 12.234, 'epoch': 54.55}


Model weights saved in temp/checkpoint-600/pytorch_model.bin
                                                  

 64%|██████▎   | 700/1100 [02:00<01:07,  5.89it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 50.2901, 'learning_rate': 3.3333333333333335e-05, 'epoch': 63.64}



                                                  

 64%|██████▎   | 700/1100 [02:00<01:07,  5.89it/s]
[ASaving model checkpoint to temp/checkpoint-700
Configuration saved in temp/checkpoint-700/config.json


{'eval_loss': 22.92599868774414, 'eval_f1': 0.4226483639513991, 'eval_runtime': 0.1628, 'eval_samples_per_second': 2572.94, 'eval_steps_per_second': 12.281, 'epoch': 63.64}


Model weights saved in temp/checkpoint-700/pytorch_model.bin
                                                  

 73%|███████▎  | 800/1100 [02:17<00:51,  5.83it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 47.4983, 'learning_rate': 2.5e-05, 'epoch': 72.73}



                                                  

 73%|███████▎  | 800/1100 [02:17<00:51,  5.83it/s]
[ASaving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 21.48202133178711, 'eval_f1': 0.3981293084380312, 'eval_runtime': 0.1726, 'eval_samples_per_second': 2428.275, 'eval_steps_per_second': 11.591, 'epoch': 72.73}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
                                                  

 82%|████████▏ | 900/1100 [02:34<00:34,  5.85it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 44.8147, 'learning_rate': 1.6666666666666667e-05, 'epoch': 81.82}



                                                  

 82%|████████▏ | 900/1100 [02:34<00:34,  5.85it/s]
[ASaving model checkpoint to temp/checkpoint-900
Configuration saved in temp/checkpoint-900/config.json


{'eval_loss': 17.012487411499023, 'eval_f1': 0.452662179514351, 'eval_runtime': 0.1608, 'eval_samples_per_second': 2605.459, 'eval_steps_per_second': 12.437, 'epoch': 81.82}


Model weights saved in temp/checkpoint-900/pytorch_model.bin
                                                   

 91%|█████████ | 1000/1100 [02:51<00:17,  5.79it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 44.6519, 'learning_rate': 8.333333333333334e-06, 'epoch': 90.91}



                                                   

 91%|█████████ | 1000/1100 [02:51<00:17,  5.79it/s]
[ASaving model checkpoint to temp/checkpoint-1000
Configuration saved in temp/checkpoint-1000/config.json


{'eval_loss': 17.105674743652344, 'eval_f1': 0.4121390344598758, 'eval_runtime': 0.1598, 'eval_samples_per_second': 2622.664, 'eval_steps_per_second': 12.519, 'epoch': 90.91}


Model weights saved in temp/checkpoint-1000/pytorch_model.bin
                                                   

100%|██████████| 1100/1100 [03:08<00:00,  5.71it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 44.4852, 'learning_rate': 0.0, 'epoch': 100.0}



                                                   

100%|██████████| 1100/1100 [03:08<00:00,  5.71it/s]
[ASaving model checkpoint to temp/checkpoint-1100
Configuration saved in temp/checkpoint-1100/config.json


{'eval_loss': 15.795829772949219, 'eval_f1': 0.4238468050680636, 'eval_runtime': 0.1617, 'eval_samples_per_second': 2591.929, 'eval_steps_per_second': 12.372, 'epoch': 100.0}


Model weights saved in temp/checkpoint-1100/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-1100 (score: 15.795829772949219).
                                                   

100%|██████████| 1100/1100 [03:09<00:00,  5.71it/s]
100%|██████████| 1100/1100 [03:09<00:00,  5.80it/s]
***** Running training *****
  Num examples = 1957
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 1100


{'train_runtime': 189.7062, 'train_samples_per_second': 1031.595, 'train_steps_per_second': 5.798, 'train_loss': 62.22360174005682, 'epoch': 100.0}


                                                  

  9%|▉         | 100/1100 [00:16<02:18,  7.20it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 44.1984, 'learning_rate': 1e-05, 'epoch': 9.09}



                                                  

  9%|▉         | 100/1100 [00:16<02:18,  7.20it/s]
[ASaving model checkpoint to temp/checkpoint-100
Configuration saved in temp/checkpoint-100/config.json


{'eval_loss': 16.527238845825195, 'eval_accuracy': 0.513126491646778, 'eval_runtime': 0.1507, 'eval_samples_per_second': 2780.673, 'eval_steps_per_second': 13.273, 'epoch': 9.09}


Model weights saved in temp/checkpoint-100/pytorch_model.bin
                                                  

 18%|█▊        | 200/1100 [00:33<02:15,  6.64it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 43.6067, 'learning_rate': 2e-05, 'epoch': 18.18}



                                                  

 18%|█▊        | 200/1100 [00:33<02:15,  6.64it/s]
[ASaving model checkpoint to temp/checkpoint-200
Configuration saved in temp/checkpoint-200/config.json


{'eval_loss': 15.788787841796875, 'eval_accuracy': 0.522673031026253, 'eval_runtime': 0.1573, 'eval_samples_per_second': 2663.563, 'eval_steps_per_second': 12.714, 'epoch': 18.18}


Model weights saved in temp/checkpoint-200/pytorch_model.bin
                                                  

 27%|██▋       | 300/1100 [00:50<02:06,  6.35it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 42.8801, 'learning_rate': 3e-05, 'epoch': 27.27}



                                                  

 27%|██▋       | 300/1100 [00:50<02:06,  6.35it/s]
[ASaving model checkpoint to temp/checkpoint-300
Configuration saved in temp/checkpoint-300/config.json


{'eval_loss': 16.542984008789062, 'eval_accuracy': 0.522673031026253, 'eval_runtime': 0.1609, 'eval_samples_per_second': 2604.64, 'eval_steps_per_second': 12.433, 'epoch': 27.27}


Model weights saved in temp/checkpoint-300/pytorch_model.bin
                                                  

 36%|███▋      | 400/1100 [01:07<01:52,  6.20it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 41.8192, 'learning_rate': 4e-05, 'epoch': 36.36}



                                                  

 36%|███▋      | 400/1100 [01:07<01:52,  6.20it/s]
[ASaving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 14.203670501708984, 'eval_accuracy': 0.4892601431980907, 'eval_runtime': 0.1602, 'eval_samples_per_second': 2615.518, 'eval_steps_per_second': 12.485, 'epoch': 36.36}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
                                                  

 45%|████▌     | 500/1100 [01:24<01:38,  6.10it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 42.3402, 'learning_rate': 5e-05, 'epoch': 45.45}



                                                  

 45%|████▌     | 500/1100 [01:24<01:38,  6.10it/s]
[ASaving model checkpoint to temp/checkpoint-500
Configuration saved in temp/checkpoint-500/config.json


{'eval_loss': 14.319704055786133, 'eval_accuracy': 0.5274463007159904, 'eval_runtime': 0.1717, 'eval_samples_per_second': 2439.629, 'eval_steps_per_second': 11.645, 'epoch': 45.45}


Model weights saved in temp/checkpoint-500/pytorch_model.bin
                                                  

 55%|█████▍    | 600/1100 [01:41<01:24,  5.95it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 41.6395, 'learning_rate': 4.166666666666667e-05, 'epoch': 54.55}



                                                  

 55%|█████▍    | 600/1100 [01:41<01:24,  5.95it/s]
[ASaving model checkpoint to temp/checkpoint-600
Configuration saved in temp/checkpoint-600/config.json


{'eval_loss': 15.426987648010254, 'eval_accuracy': 0.5274463007159904, 'eval_runtime': 0.16, 'eval_samples_per_second': 2619.397, 'eval_steps_per_second': 12.503, 'epoch': 54.55}


Model weights saved in temp/checkpoint-600/pytorch_model.bin
                                                  

 64%|██████▎   | 700/1100 [01:57<01:04,  6.17it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 40.6812, 'learning_rate': 3.3333333333333335e-05, 'epoch': 63.64}



                                                  

 64%|██████▎   | 700/1100 [01:57<01:04,  6.17it/s]
[ASaving model checkpoint to temp/checkpoint-700
Configuration saved in temp/checkpoint-700/config.json


{'eval_loss': 13.692047119140625, 'eval_accuracy': 0.5202863961813843, 'eval_runtime': 0.1622, 'eval_samples_per_second': 2582.548, 'eval_steps_per_second': 12.327, 'epoch': 63.64}


Model weights saved in temp/checkpoint-700/pytorch_model.bin
                                                  

 73%|███████▎  | 800/1100 [02:14<00:49,  6.12it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 40.1993, 'learning_rate': 2.5e-05, 'epoch': 72.73}



                                                  

 73%|███████▎  | 800/1100 [02:14<00:49,  6.12it/s]
[ASaving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 11.192315101623535, 'eval_accuracy': 0.4916467780429594, 'eval_runtime': 0.1642, 'eval_samples_per_second': 2551.302, 'eval_steps_per_second': 12.178, 'epoch': 72.73}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
                                                  

 82%|████████▏ | 900/1100 [02:31<00:33,  6.04it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 39.2164, 'learning_rate': 1.6666666666666667e-05, 'epoch': 81.82}



                                                  

 82%|████████▏ | 900/1100 [02:31<00:33,  6.04it/s]
[ASaving model checkpoint to temp/checkpoint-900
Configuration saved in temp/checkpoint-900/config.json


{'eval_loss': 14.0028076171875, 'eval_accuracy': 0.513126491646778, 'eval_runtime': 0.154, 'eval_samples_per_second': 2720.812, 'eval_steps_per_second': 12.987, 'epoch': 81.82}


Model weights saved in temp/checkpoint-900/pytorch_model.bin
                                                   

 91%|█████████ | 1000/1100 [02:48<00:17,  5.62it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 39.2622, 'learning_rate': 8.333333333333334e-06, 'epoch': 90.91}



                                                   

 91%|█████████ | 1000/1100 [02:48<00:17,  5.62it/s]
[ASaving model checkpoint to temp/checkpoint-1000
Configuration saved in temp/checkpoint-1000/config.json


{'eval_loss': 13.992264747619629, 'eval_accuracy': 0.5322195704057279, 'eval_runtime': 0.1779, 'eval_samples_per_second': 2355.012, 'eval_steps_per_second': 11.241, 'epoch': 90.91}


Model weights saved in temp/checkpoint-1000/pytorch_model.bin
                                                   

100%|██████████| 1100/1100 [03:06<00:00,  5.78it/s]
[A***** Running Evaluation *****
  Num examples = 419
  Batch size = 384


{'loss': 39.6058, 'learning_rate': 0.0, 'epoch': 100.0}



                                                   

100%|██████████| 1100/1100 [03:06<00:00,  5.78it/s]
[ASaving model checkpoint to temp/checkpoint-1100
Configuration saved in temp/checkpoint-1100/config.json


{'eval_loss': 13.759910583496094, 'eval_accuracy': 0.5393794749403341, 'eval_runtime': 0.1594, 'eval_samples_per_second': 2629.351, 'eval_steps_per_second': 12.551, 'epoch': 100.0}


Model weights saved in temp/checkpoint-1100/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-800 (score: 11.192315101623535).
                                                   

100%|██████████| 1100/1100 [03:07<00:00,  5.78it/s]
100%|██████████| 1100/1100 [03:07<00:00,  5.88it/s]

{'train_runtime': 187.1252, 'train_samples_per_second': 1045.824, 'train_steps_per_second': 5.878, 'train_loss': 41.40444779829546, 'epoch': 100.0}





TrainOutput(global_step=1100, training_loss=41.40444779829546, metrics={'train_runtime': 187.1252, 'train_samples_per_second': 1045.824, 'train_steps_per_second': 5.878, 'train_loss': 41.40444779829546, 'epoch': 100.0})