In [21]:
import pandas as pd
import torch
import os

from torch.utils.data import Dataset
from transformers import (BertTokenizer, BertForSequenceClassification, Trainer,
                          TrainingArguments, BertPreTrainedModel)
# from simpletransformers.language_modeling import LanguageModelingModel
from sklearn.metrics import accuracy_score, f1_score


In [22]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [23]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert'
TRAIN_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/merged_train.csv'
EVAL_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/merged_test.csv'


In [24]:
def tsv_to_text(tsv_file_loc):
    file_name = tsv_file_loc.split("/")[-1].split(".")[0]
    txt_name = tsv_file_loc.replace(".tsv", ".txt")
    txt_name = txt_name.replace("split_merged", "texts")

    if os.path.exists(txt_name):
        return [txt_name, file_name]

    df = pd.read_csv(tsv_file_loc, sep="\t")

    for txt in df["text"]:
        with open(txt_name, "a", encoding="utf8") as f:
            f.writelines(txt + "\n")
    return [txt_name, file_name]


tsv_to_text(TRAIN_FILE_LOC)


['/home/abdullah/Code/dl/bnlp-resources/sentiment/merged_train.csv',
 'merged_train']

In [25]:
def csv_to_df(csv_file_loc):
    df = pd.read_csv(csv_file_loc)
    # remove id column
    # df = df.drop(columns=['id'])

    # replace neutral with 0, positive with 1, negative with 2 in class_label column
    df.loc[df['label'] == 'Neutral', 'label'] = 0
    df.loc[df['label'] == 'Positive', 'label'] = 1
    df.loc[df['label'] == 'Negative', 'label'] = 2

    return df


train_df = csv_to_df(TRAIN_FILE_LOC)

texts = train_df['text'].tolist()
print(train_df)


                                                    text label
0         @BoyBanglar ভালোই,, তাও ত বাংলার বেকার ছেলে না     1
1      'সুন্নাহের গুরুত্ব ও প্রয়োজনীয়তাhttp:/t.co/Kv9...     0
2      সারা দুনিয়া থেকে যদি বাছাই করে ৫০০ জন ভালো মা...     0
3      ক্রিকেট বোর্ড কে যদি দুদকের সম্মুখীন করা হয় এব...     2
4      সপ্তাহের জন্য তারা একটি দুর্দান্ত কাজ করে এই র...     1
...                                                  ...   ...
18739  RT @arafatul: গত এক দশক ধরে জার্মান রাজনীতি খু...     0
18740  না অাছে কোনো পাওয়ার হিটার, না অাছে একটা লেগস্প...     2
18741  গাজীপুরের শ্রীপুরে ১০০ পিছ ইয়াবা ও গাঁজাসহ ৩ জ...     2
18742   করো উন্নতি হউক বা না হউক মেডামের শরীরের ভালো ...     2
18743  খেলোয়াড়দের কথা বাদ দিলাম। বোর্ড, কোচ, নির্বাচক...     2

[18744 rows x 2 columns]


In [26]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  return {
      'accuracy': acc,
  }


In [27]:
def f1_calculator(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  f1 = f1_score(labels, preds, average='weighted')
  return {
      'f1': f1,
  }


In [28]:
train_list = tsv_to_text(TRAIN_FILE_LOC)
eval_list = tsv_to_text(EVAL_FILE_LOC)


In [29]:
os.environ["WANDB_DISABLED"] = "true"

train_df = csv_to_df(TRAIN_FILE_LOC)
eval_df = csv_to_df(EVAL_FILE_LOC)


class TINYDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(value[idx])
                for key, value in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

train_encodings = tokenizer(train_df['text'].tolist(), padding=True, truncation=True, max_length=512)
eval_encodings = tokenizer(eval_df['text'].tolist(), padding=True, truncation=True, max_length=512)

train_dataset = TINYDataset(train_encodings, train_df['label'].tolist())
eval_dataset = TINYDataset(eval_encodings, eval_df['label'].tolist())

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14


Some weights of the model checkpoint at /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized fr

## 1 Epoch Train

In [30]:
# training_args = TrainingArguments(
#     output_dir=f"temp",
#     num_train_epochs=1,
#     per_device_train_batch_size=48,
#     per_device_eval_batch_size=96,
#     warmup_steps=500,
#     learning_rate=5e-5,
#     weight_decay=0.01,
#     overwrite_output_dir=True,
#     logging_dir=f"temp/logs",
#     logging_steps=29,
#     save_steps=29,
#     load_best_model_at_end=True,
#     evaluation_strategy="steps",
#     seed=14,
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=eval_dataset

# )

# trainer.train()


Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running training *****
  Num examples = 18744
  Num Epochs = 1
  Instantaneous batch size per device = 48
  Total train batch size (w. parallel, distributed & accumulation) = 48
  Gradient Accumulation steps = 1
  Total optimization steps = 391
  7%|▋         | 29/391 [00:04<00:21, 16.66it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 113.5151, 'learning_rate': 2.9e-06, 'epoch': 0.07}


                                                
  7%|▋         | 29/391 [00:05<00:21, 16.66it/s]Saving model checkpoint to temp/checkpoint-29
Configuration saved in temp/checkpoint-29/config.json


{'eval_loss': 97.4742431640625, 'eval_runtime': 1.5052, 'eval_samples_per_second': 3113.256, 'eval_steps_per_second': 32.554, 'epoch': 0.07}


Model weights saved in temp/checkpoint-29/pytorch_model.bin
 15%|█▍        | 58/391 [00:08<00:19, 17.52it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 112.936, 'learning_rate': 5.8e-06, 'epoch': 0.15}


                                                
 15%|█▍        | 58/391 [00:09<00:19, 17.52it/s]Saving model checkpoint to temp/checkpoint-58
Configuration saved in temp/checkpoint-58/config.json


{'eval_loss': 94.71003723144531, 'eval_runtime': 1.5028, 'eval_samples_per_second': 3118.241, 'eval_steps_per_second': 32.606, 'epoch': 0.15}


Model weights saved in temp/checkpoint-58/pytorch_model.bin
 22%|██▏       | 87/391 [00:11<00:16, 18.18it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 108.854, 'learning_rate': 8.7e-06, 'epoch': 0.22}


                                                
 22%|██▏       | 87/391 [00:13<00:16, 18.18it/s]Saving model checkpoint to temp/checkpoint-87
Configuration saved in temp/checkpoint-87/config.json
Model weights saved in temp/checkpoint-87/pytorch_model.bin


{'eval_loss': 94.52555084228516, 'eval_runtime': 1.4922, 'eval_samples_per_second': 3140.415, 'eval_steps_per_second': 32.838, 'epoch': 0.22}


 30%|██▉       | 116/391 [00:15<00:15, 17.60it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 97.4753, 'learning_rate': 1.16e-05, 'epoch': 0.3}


                                                 
 30%|██▉       | 116/391 [00:16<00:15, 17.60it/s]Saving model checkpoint to temp/checkpoint-116
Configuration saved in temp/checkpoint-116/config.json


{'eval_loss': 93.77772521972656, 'eval_runtime': 1.493, 'eval_samples_per_second': 3138.752, 'eval_steps_per_second': 32.821, 'epoch': 0.3}


Model weights saved in temp/checkpoint-116/pytorch_model.bin
 37%|███▋      | 145/391 [00:19<00:13, 18.15it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 103.7358, 'learning_rate': 1.45e-05, 'epoch': 0.37}


                                                 
 37%|███▋      | 145/391 [00:20<00:13, 18.15it/s]Saving model checkpoint to temp/checkpoint-145
Configuration saved in temp/checkpoint-145/config.json


{'eval_loss': 90.89369201660156, 'eval_runtime': 1.4971, 'eval_samples_per_second': 3130.021, 'eval_steps_per_second': 32.73, 'epoch': 0.37}


Model weights saved in temp/checkpoint-145/pytorch_model.bin
 45%|████▍     | 174/391 [00:22<00:12, 17.40it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 100.0546, 'learning_rate': 1.74e-05, 'epoch': 0.45}


                                                 
 45%|████▍     | 174/391 [00:24<00:12, 17.40it/s]Saving model checkpoint to temp/checkpoint-174
Configuration saved in temp/checkpoint-174/config.json


{'eval_loss': 84.29419708251953, 'eval_runtime': 1.4956, 'eval_samples_per_second': 3133.216, 'eval_steps_per_second': 32.763, 'epoch': 0.45}


Model weights saved in temp/checkpoint-174/pytorch_model.bin
 52%|█████▏    | 203/391 [00:26<00:10, 18.13it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 100.24, 'learning_rate': 2.0300000000000002e-05, 'epoch': 0.52}


                                                 
 52%|█████▏    | 203/391 [00:28<00:10, 18.13it/s]Saving model checkpoint to temp/checkpoint-203
Configuration saved in temp/checkpoint-203/config.json


{'eval_loss': 79.95582580566406, 'eval_runtime': 1.4495, 'eval_samples_per_second': 3232.787, 'eval_steps_per_second': 33.804, 'epoch': 0.52}


Model weights saved in temp/checkpoint-203/pytorch_model.bin
 59%|█████▉    | 232/391 [00:30<00:08, 17.92it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 93.6764, 'learning_rate': 2.32e-05, 'epoch': 0.59}


                                                 
 59%|█████▉    | 232/391 [00:31<00:08, 17.92it/s]Saving model checkpoint to temp/checkpoint-232
Configuration saved in temp/checkpoint-232/config.json


{'eval_loss': 69.17322540283203, 'eval_runtime': 1.4684, 'eval_samples_per_second': 3191.215, 'eval_steps_per_second': 33.37, 'epoch': 0.59}


Model weights saved in temp/checkpoint-232/pytorch_model.bin
 67%|██████▋   | 261/391 [00:34<00:07, 18.46it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 85.7793, 'learning_rate': 2.61e-05, 'epoch': 0.67}


                                                 
 67%|██████▋   | 261/391 [00:35<00:07, 18.46it/s]Saving model checkpoint to temp/checkpoint-261
Configuration saved in temp/checkpoint-261/config.json


{'eval_loss': 59.271915435791016, 'eval_runtime': 1.469, 'eval_samples_per_second': 3189.902, 'eval_steps_per_second': 33.356, 'epoch': 0.67}


Model weights saved in temp/checkpoint-261/pytorch_model.bin
 74%|███████▍  | 290/391 [00:38<00:05, 17.72it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 82.5134, 'learning_rate': 2.9e-05, 'epoch': 0.74}


                                                 
 74%|███████▍  | 290/391 [00:39<00:05, 17.72it/s]Saving model checkpoint to temp/checkpoint-290
Configuration saved in temp/checkpoint-290/config.json


{'eval_loss': 61.09089660644531, 'eval_runtime': 1.4536, 'eval_samples_per_second': 3223.618, 'eval_steps_per_second': 33.708, 'epoch': 0.74}


Model weights saved in temp/checkpoint-290/pytorch_model.bin
 82%|████████▏ | 319/391 [00:41<00:03, 18.18it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 75.4733, 'learning_rate': 3.19e-05, 'epoch': 0.82}


                                                 
 82%|████████▏ | 319/391 [00:43<00:03, 18.18it/s]Saving model checkpoint to temp/checkpoint-319
Configuration saved in temp/checkpoint-319/config.json


{'eval_loss': 71.53326416015625, 'eval_runtime': 1.4565, 'eval_samples_per_second': 3217.372, 'eval_steps_per_second': 33.643, 'epoch': 0.82}


Model weights saved in temp/checkpoint-319/pytorch_model.bin
 89%|████████▉ | 348/391 [00:45<00:02, 17.48it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 76.8193, 'learning_rate': 3.48e-05, 'epoch': 0.89}


                                                 
 89%|████████▉ | 348/391 [00:46<00:02, 17.48it/s]Saving model checkpoint to temp/checkpoint-348
Configuration saved in temp/checkpoint-348/config.json


{'eval_loss': 38.70534133911133, 'eval_runtime': 1.4697, 'eval_samples_per_second': 3188.377, 'eval_steps_per_second': 33.34, 'epoch': 0.89}


Model weights saved in temp/checkpoint-348/pytorch_model.bin
 96%|█████████▋| 377/391 [00:49<00:00, 18.17it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 67.2835, 'learning_rate': 3.77e-05, 'epoch': 0.96}


                                                 
 96%|█████████▋| 377/391 [00:50<00:00, 18.17it/s]Saving model checkpoint to temp/checkpoint-377
Configuration saved in temp/checkpoint-377/config.json


{'eval_loss': 47.62928009033203, 'eval_runtime': 1.4554, 'eval_samples_per_second': 3219.826, 'eval_steps_per_second': 33.669, 'epoch': 0.96}


Model weights saved in temp/checkpoint-377/pytorch_model.bin
100%|██████████| 391/391 [00:51<00:00, 10.36it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-348 (score: 38.70534133911133).
100%|██████████| 391/391 [00:51<00:00,  7.52it/s]

{'train_runtime': 51.9996, 'train_samples_per_second': 360.464, 'train_steps_per_second': 7.519, 'train_loss': 92.62740800081922, 'epoch': 1.0}





TrainOutput(global_step=391, training_loss=92.62740800081922, metrics={'train_runtime': 51.9996, 'train_samples_per_second': 360.464, 'train_steps_per_second': 7.519, 'train_loss': 92.62740800081922, 'epoch': 1.0})

## 100 Epoch with 1 epoch model

In [32]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/1/youtube_sentiment'
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14

for name, param in model.named_parameters():
    if 'bert.encoder.layer.0' in name:
        # print(name, param.requires_grad)
        param.requires_grad = False
# print("\n\n\n")
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=400,
    save_steps=400,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=f1_calculator,

)

trainer.train()

# print(cool.metrics)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,

)

trainer.train()


loading configuration file /home/abdullah/Code/dl/499A/best_models/1/youtube_sentiment/config.json
Model config BertConfig {
  "_name_or_path": "/home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 14,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
  "use_cache": true,
  "voc

{'loss': 78.1838, 'learning_rate': 4e-05, 'epoch': 4.08}



  4%|▍         | 400/9800 [01:14<28:42,  5.46it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json
Model weights saved in temp/checkpoint-400/pytorch_model.bin


{'eval_loss': 28.850675582885742, 'eval_f1': 0.3913811361732819, 'eval_runtime': 1.8945, 'eval_samples_per_second': 2473.44, 'eval_steps_per_second': 6.862, 'epoch': 4.08}


  8%|▊         | 800/9800 [02:29<27:47,  5.40it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 46.4311, 'learning_rate': 4.8387096774193554e-05, 'epoch': 8.16}



  8%|▊         | 800/9800 [02:30<27:47,  5.40it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json
Model weights saved in temp/checkpoint-800/pytorch_model.bin


{'eval_loss': 13.331594467163086, 'eval_f1': 0.4720718542444767, 'eval_runtime': 1.8613, 'eval_samples_per_second': 2517.616, 'eval_steps_per_second': 6.984, 'epoch': 8.16}


 12%|█▏        | 1200/9800 [03:41<23:28,  6.10it/s] ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 39.179, 'learning_rate': 4.6236559139784944e-05, 'epoch': 12.24}



 12%|█▏        | 1200/9800 [03:43<23:28,  6.10it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json
Model weights saved in temp/checkpoint-1200/pytorch_model.bin


{'eval_loss': 14.05126953125, 'eval_f1': 0.3910255373922273, 'eval_runtime': 1.6387, 'eval_samples_per_second': 2859.59, 'eval_steps_per_second': 7.933, 'epoch': 12.24}


 16%|█▋        | 1600/9800 [04:52<24:21,  5.61it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 36.9239, 'learning_rate': 4.408602150537635e-05, 'epoch': 16.33}



 16%|█▋        | 1600/9800 [04:54<24:21,  5.61it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json
Model weights saved in temp/checkpoint-1600/pytorch_model.bin


{'eval_loss': 9.6487398147583, 'eval_f1': 0.5157627974600689, 'eval_runtime': 1.7919, 'eval_samples_per_second': 2615.172, 'eval_steps_per_second': 7.255, 'epoch': 16.33}


 20%|██        | 2000/9800 [06:04<23:59,  5.42it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 35.26, 'learning_rate': 4.1935483870967746e-05, 'epoch': 20.41}



 20%|██        | 2000/9800 [06:06<23:59,  5.42it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json
Model weights saved in temp/checkpoint-2000/pytorch_model.bin


{'eval_loss': 11.981754302978516, 'eval_f1': 0.5131087410993256, 'eval_runtime': 1.8288, 'eval_samples_per_second': 2562.278, 'eval_steps_per_second': 7.108, 'epoch': 20.41}


 24%|██▍       | 2400/9800 [07:21<22:43,  5.43it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 34.813, 'learning_rate': 3.978494623655914e-05, 'epoch': 24.49}



 24%|██▍       | 2400/9800 [07:22<22:43,  5.43it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json


{'eval_loss': 11.936613082885742, 'eval_f1': 0.48922886022043677, 'eval_runtime': 1.8422, 'eval_samples_per_second': 2543.641, 'eval_steps_per_second': 7.057, 'epoch': 24.49}


Model weights saved in temp/checkpoint-2400/pytorch_model.bin
 29%|██▊       | 2800/9800 [08:38<21:46,  5.36it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 34.0415, 'learning_rate': 3.763440860215054e-05, 'epoch': 28.57}



 29%|██▊       | 2800/9800 [08:39<21:46,  5.36it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json
Model weights saved in temp/checkpoint-2800/pytorch_model.bin


{'eval_loss': 11.824414253234863, 'eval_f1': 0.4989040480371101, 'eval_runtime': 1.8594, 'eval_samples_per_second': 2520.232, 'eval_steps_per_second': 6.992, 'epoch': 28.57}


 33%|███▎      | 3200/9800 [09:54<20:17,  5.42it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 33.6749, 'learning_rate': 3.548387096774194e-05, 'epoch': 32.65}



 33%|███▎      | 3200/9800 [09:56<20:17,  5.42it/s]Saving model checkpoint to temp/checkpoint-3200
Configuration saved in temp/checkpoint-3200/config.json
Model weights saved in temp/checkpoint-3200/pytorch_model.bin


{'eval_loss': 13.413186073303223, 'eval_f1': 0.5214574209682881, 'eval_runtime': 1.8393, 'eval_samples_per_second': 2547.724, 'eval_steps_per_second': 7.068, 'epoch': 32.65}


 37%|███▋      | 3600/9800 [11:11<19:04,  5.42it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 33.2048, 'learning_rate': 3.3333333333333335e-05, 'epoch': 36.73}



 37%|███▋      | 3600/9800 [11:13<19:04,  5.42it/s]Saving model checkpoint to temp/checkpoint-3600
Configuration saved in temp/checkpoint-3600/config.json
Model weights saved in temp/checkpoint-3600/pytorch_model.bin


{'eval_loss': 12.871524810791016, 'eval_f1': 0.5427542014848119, 'eval_runtime': 1.8399, 'eval_samples_per_second': 2546.819, 'eval_steps_per_second': 7.065, 'epoch': 36.73}


 41%|████      | 4000/9800 [12:27<18:00,  5.37it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 33.2993, 'learning_rate': 3.118279569892473e-05, 'epoch': 40.82}



 41%|████      | 4000/9800 [12:29<18:00,  5.37it/s]Saving model checkpoint to temp/checkpoint-4000
Configuration saved in temp/checkpoint-4000/config.json
Model weights saved in temp/checkpoint-4000/pytorch_model.bin


{'eval_loss': 12.951295852661133, 'eval_f1': 0.5887166660568344, 'eval_runtime': 1.8491, 'eval_samples_per_second': 2534.187, 'eval_steps_per_second': 7.03, 'epoch': 40.82}


 45%|████▍     | 4400/9800 [13:44<16:41,  5.39it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 32.9969, 'learning_rate': 2.9032258064516133e-05, 'epoch': 44.9}



 45%|████▍     | 4400/9800 [13:46<16:41,  5.39it/s]Saving model checkpoint to temp/checkpoint-4400
Configuration saved in temp/checkpoint-4400/config.json
Model weights saved in temp/checkpoint-4400/pytorch_model.bin


{'eval_loss': 13.1460599899292, 'eval_f1': 0.6030681519674047, 'eval_runtime': 1.8813, 'eval_samples_per_second': 2490.883, 'eval_steps_per_second': 6.91, 'epoch': 44.9}


 49%|████▉     | 4800/9800 [15:00<15:28,  5.39it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 32.6423, 'learning_rate': 2.6881720430107527e-05, 'epoch': 48.98}



 49%|████▉     | 4800/9800 [15:02<15:28,  5.39it/s]Saving model checkpoint to temp/checkpoint-4800
Configuration saved in temp/checkpoint-4800/config.json
Model weights saved in temp/checkpoint-4800/pytorch_model.bin


{'eval_loss': 13.787633895874023, 'eval_f1': 0.6048269315940543, 'eval_runtime': 1.8401, 'eval_samples_per_second': 2546.647, 'eval_steps_per_second': 7.065, 'epoch': 48.98}


 53%|█████▎    | 5200/9800 [16:16<14:07,  5.43it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 32.5843, 'learning_rate': 2.4731182795698928e-05, 'epoch': 53.06}



 53%|█████▎    | 5200/9800 [16:18<14:07,  5.43it/s]Saving model checkpoint to temp/checkpoint-5200
Configuration saved in temp/checkpoint-5200/config.json
Model weights saved in temp/checkpoint-5200/pytorch_model.bin


{'eval_loss': 13.720497131347656, 'eval_f1': 0.5961110950837165, 'eval_runtime': 1.8665, 'eval_samples_per_second': 2510.521, 'eval_steps_per_second': 6.965, 'epoch': 53.06}


 57%|█████▋    | 5600/9800 [17:33<12:53,  5.43it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 32.078, 'learning_rate': 2.258064516129032e-05, 'epoch': 57.14}



 57%|█████▋    | 5600/9800 [17:35<12:53,  5.43it/s]Saving model checkpoint to temp/checkpoint-5600
Configuration saved in temp/checkpoint-5600/config.json


{'eval_loss': 13.808062553405762, 'eval_f1': 0.6047752352918248, 'eval_runtime': 1.8584, 'eval_samples_per_second': 2521.554, 'eval_steps_per_second': 6.995, 'epoch': 57.14}


Model weights saved in temp/checkpoint-5600/pytorch_model.bin
 61%|██████    | 6000/9800 [18:49<11:37,  5.45it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.9279, 'learning_rate': 2.0430107526881722e-05, 'epoch': 61.22}



 61%|██████    | 6000/9800 [18:51<11:37,  5.45it/s]Saving model checkpoint to temp/checkpoint-6000
Configuration saved in temp/checkpoint-6000/config.json


{'eval_loss': 12.444465637207031, 'eval_f1': 0.6002800284333977, 'eval_runtime': 1.8332, 'eval_samples_per_second': 2556.222, 'eval_steps_per_second': 7.092, 'epoch': 61.22}


Model weights saved in temp/checkpoint-6000/pytorch_model.bin
 65%|██████▌   | 6400/9800 [20:05<10:22,  5.46it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 32.143, 'learning_rate': 1.827956989247312e-05, 'epoch': 65.31}



 65%|██████▌   | 6400/9800 [20:07<10:22,  5.46it/s]Saving model checkpoint to temp/checkpoint-6400
Configuration saved in temp/checkpoint-6400/config.json
Model weights saved in temp/checkpoint-6400/pytorch_model.bin


{'eval_loss': 11.360507011413574, 'eval_f1': 0.6048675909045106, 'eval_runtime': 1.8103, 'eval_samples_per_second': 2588.559, 'eval_steps_per_second': 7.181, 'epoch': 65.31}


 69%|██████▉   | 6800/9800 [21:21<09:11,  5.44it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.7305, 'learning_rate': 1.6129032258064517e-05, 'epoch': 69.39}



 69%|██████▉   | 6800/9800 [21:23<09:11,  5.44it/s]Saving model checkpoint to temp/checkpoint-6800
Configuration saved in temp/checkpoint-6800/config.json


{'eval_loss': 15.06811809539795, 'eval_f1': 0.6090850960512102, 'eval_runtime': 1.856, 'eval_samples_per_second': 2524.837, 'eval_steps_per_second': 7.004, 'epoch': 69.39}


Model weights saved in temp/checkpoint-6800/pytorch_model.bin
 73%|███████▎  | 7200/9800 [22:38<07:58,  5.43it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.5004, 'learning_rate': 1.3978494623655914e-05, 'epoch': 73.47}



 73%|███████▎  | 7200/9800 [22:40<07:58,  5.43it/s]Saving model checkpoint to temp/checkpoint-7200
Configuration saved in temp/checkpoint-7200/config.json
Model weights saved in temp/checkpoint-7200/pytorch_model.bin


{'eval_loss': 14.519152641296387, 'eval_f1': 0.6106903664983102, 'eval_runtime': 1.8225, 'eval_samples_per_second': 2571.166, 'eval_steps_per_second': 7.133, 'epoch': 73.47}


 78%|███████▊  | 7600/9800 [23:55<06:46,  5.41it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.7608, 'learning_rate': 1.1827956989247313e-05, 'epoch': 77.55}



 78%|███████▊  | 7600/9800 [23:57<06:46,  5.41it/s]Saving model checkpoint to temp/checkpoint-7600
Configuration saved in temp/checkpoint-7600/config.json
Model weights saved in temp/checkpoint-7600/pytorch_model.bin


{'eval_loss': 13.08981704711914, 'eval_f1': 0.6043577922957216, 'eval_runtime': 1.8772, 'eval_samples_per_second': 2496.334, 'eval_steps_per_second': 6.925, 'epoch': 77.55}


 82%|████████▏ | 8000/9800 [25:13<05:33,  5.40it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.3773, 'learning_rate': 9.67741935483871e-06, 'epoch': 81.63}



 82%|████████▏ | 8000/9800 [25:14<05:33,  5.40it/s]Saving model checkpoint to temp/checkpoint-8000
Configuration saved in temp/checkpoint-8000/config.json


{'eval_loss': 14.265535354614258, 'eval_f1': 0.6207579028909626, 'eval_runtime': 1.8673, 'eval_samples_per_second': 2509.526, 'eval_steps_per_second': 6.962, 'epoch': 81.63}


Model weights saved in temp/checkpoint-8000/pytorch_model.bin
 86%|████████▌ | 8400/9800 [26:29<04:15,  5.49it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.4079, 'learning_rate': 7.526881720430108e-06, 'epoch': 85.71}



 86%|████████▌ | 8400/9800 [26:31<04:15,  5.49it/s]Saving model checkpoint to temp/checkpoint-8400
Configuration saved in temp/checkpoint-8400/config.json


{'eval_loss': 13.873469352722168, 'eval_f1': 0.6111793581320077, 'eval_runtime': 1.8466, 'eval_samples_per_second': 2537.678, 'eval_steps_per_second': 7.04, 'epoch': 85.71}


Model weights saved in temp/checkpoint-8400/pytorch_model.bin
 90%|████████▉ | 8800/9800 [27:45<03:03,  5.46it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.3462, 'learning_rate': 5.376344086021506e-06, 'epoch': 89.8}



 90%|████████▉ | 8800/9800 [27:47<03:03,  5.46it/s]Saving model checkpoint to temp/checkpoint-8800
Configuration saved in temp/checkpoint-8800/config.json


{'eval_loss': 14.786992073059082, 'eval_f1': 0.6224650164747335, 'eval_runtime': 2.0494, 'eval_samples_per_second': 2286.573, 'eval_steps_per_second': 6.343, 'epoch': 89.8}


Model weights saved in temp/checkpoint-8800/pytorch_model.bin
 94%|█████████▍| 9200/9800 [29:01<01:49,  5.49it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.2787, 'learning_rate': 3.225806451612903e-06, 'epoch': 93.88}



 94%|█████████▍| 9200/9800 [29:03<01:49,  5.49it/s]Saving model checkpoint to temp/checkpoint-9200
Configuration saved in temp/checkpoint-9200/config.json
Model weights saved in temp/checkpoint-9200/pytorch_model.bin


{'eval_loss': 14.492399215698242, 'eval_f1': 0.6240814646945103, 'eval_runtime': 1.8455, 'eval_samples_per_second': 2539.129, 'eval_steps_per_second': 7.044, 'epoch': 93.88}


 98%|█████████▊| 9600/9800 [30:17<00:36,  5.43it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.3527, 'learning_rate': 1.0752688172043011e-06, 'epoch': 97.96}



 98%|█████████▊| 9600/9800 [30:19<00:36,  5.43it/s]Saving model checkpoint to temp/checkpoint-9600
Configuration saved in temp/checkpoint-9600/config.json
Model weights saved in temp/checkpoint-9600/pytorch_model.bin


{'eval_loss': 14.406484603881836, 'eval_f1': 0.6233590135480873, 'eval_runtime': 1.8535, 'eval_samples_per_second': 2528.162, 'eval_steps_per_second': 7.014, 'epoch': 97.96}


100%|██████████| 9800/9800 [30:56<00:00,  6.12it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-1600 (score: 9.6487398147583).
100%|██████████| 9800/9800 [30:57<00:00,  5.28it/s]
***** Running training *****
  Num examples = 18744
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 9800


{'train_runtime': 1857.0075, 'train_samples_per_second': 1009.366, 'train_steps_per_second': 5.277, 'train_loss': 35.38380689971301, 'epoch': 100.0}


  4%|▍         | 400/9800 [01:12<28:10,  5.56it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 35.0289, 'learning_rate': 4e-05, 'epoch': 4.08}



  4%|▍         | 400/9800 [01:14<28:10,  5.56it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json
Model weights saved in temp/checkpoint-400/pytorch_model.bin


{'eval_loss': 12.421996116638184, 'eval_accuracy': 0.5407597097737943, 'eval_runtime': 1.8064, 'eval_samples_per_second': 2594.049, 'eval_steps_per_second': 7.196, 'epoch': 4.08}


  8%|▊         | 800/9800 [02:26<26:51,  5.58it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 35.2159, 'learning_rate': 4.8387096774193554e-05, 'epoch': 8.16}



  8%|▊         | 800/9800 [02:28<26:51,  5.58it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 10.05798053741455, 'eval_accuracy': 0.47588561673068713, 'eval_runtime': 1.7793, 'eval_samples_per_second': 2633.553, 'eval_steps_per_second': 7.306, 'epoch': 8.16}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 12%|█▏        | 1200/9800 [03:43<26:29,  5.41it/s] ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 34.1613, 'learning_rate': 4.6236559139784944e-05, 'epoch': 12.24}



 12%|█▏        | 1200/9800 [03:44<26:29,  5.41it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 10.40015983581543, 'eval_accuracy': 0.5766111822449851, 'eval_runtime': 1.8437, 'eval_samples_per_second': 2541.613, 'eval_steps_per_second': 7.051, 'epoch': 12.24}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 16%|█▋        | 1600/9800 [05:00<25:22,  5.39it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 34.0678, 'learning_rate': 4.408602150537635e-05, 'epoch': 16.33}



 16%|█▋        | 1600/9800 [05:02<25:22,  5.39it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json
Model weights saved in temp/checkpoint-1600/pytorch_model.bin


{'eval_loss': 11.744795799255371, 'eval_accuracy': 0.5744771660264618, 'eval_runtime': 1.8436, 'eval_samples_per_second': 2541.833, 'eval_steps_per_second': 7.052, 'epoch': 16.33}


 20%|██        | 2000/9800 [06:17<24:10,  5.38it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 33.4566, 'learning_rate': 4.1935483870967746e-05, 'epoch': 20.41}



 20%|██        | 2000/9800 [06:19<24:10,  5.38it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 13.868905067443848, 'eval_accuracy': 0.5783183952198037, 'eval_runtime': 1.851, 'eval_samples_per_second': 2531.641, 'eval_steps_per_second': 7.023, 'epoch': 20.41}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
 24%|██▍       | 2400/9800 [07:34<23:08,  5.33it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 33.0645, 'learning_rate': 3.978494623655914e-05, 'epoch': 24.49}



 24%|██▍       | 2400/9800 [07:36<23:08,  5.33it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json
Model weights saved in temp/checkpoint-2400/pytorch_model.bin


{'eval_loss': 9.028046607971191, 'eval_accuracy': 0.596884336320956, 'eval_runtime': 1.8488, 'eval_samples_per_second': 2534.67, 'eval_steps_per_second': 7.032, 'epoch': 24.49}


 29%|██▊       | 2800/9800 [08:51<21:41,  5.38it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 32.4548, 'learning_rate': 3.763440860215054e-05, 'epoch': 28.57}



 29%|██▊       | 2800/9800 [08:53<21:41,  5.38it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json


{'eval_loss': 12.775546073913574, 'eval_accuracy': 0.5973111395646606, 'eval_runtime': 1.8645, 'eval_samples_per_second': 2513.261, 'eval_steps_per_second': 6.972, 'epoch': 28.57}


Model weights saved in temp/checkpoint-2800/pytorch_model.bin
 33%|███▎      | 3200/9800 [10:08<20:09,  5.46it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 32.4857, 'learning_rate': 3.548387096774194e-05, 'epoch': 32.65}



 33%|███▎      | 3200/9800 [10:10<20:09,  5.46it/s]Saving model checkpoint to temp/checkpoint-3200
Configuration saved in temp/checkpoint-3200/config.json


{'eval_loss': 12.767621040344238, 'eval_accuracy': 0.6084080239009817, 'eval_runtime': 1.8347, 'eval_samples_per_second': 2554.152, 'eval_steps_per_second': 7.086, 'epoch': 32.65}


Model weights saved in temp/checkpoint-3200/pytorch_model.bin
 37%|███▋      | 3600/9800 [11:24<18:52,  5.47it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 32.1166, 'learning_rate': 3.3333333333333335e-05, 'epoch': 36.73}



 37%|███▋      | 3600/9800 [11:26<18:52,  5.47it/s]Saving model checkpoint to temp/checkpoint-3600
Configuration saved in temp/checkpoint-3600/config.json


{'eval_loss': 13.722142219543457, 'eval_accuracy': 0.6131028595817328, 'eval_runtime': 1.8389, 'eval_samples_per_second': 2548.251, 'eval_steps_per_second': 7.069, 'epoch': 36.73}


Model weights saved in temp/checkpoint-3600/pytorch_model.bin
 41%|████      | 4000/9800 [12:41<17:47,  5.43it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.9565, 'learning_rate': 3.118279569892473e-05, 'epoch': 40.82}



 41%|████      | 4000/9800 [12:43<17:47,  5.43it/s]Saving model checkpoint to temp/checkpoint-4000
Configuration saved in temp/checkpoint-4000/config.json
Model weights saved in temp/checkpoint-4000/pytorch_model.bin


{'eval_loss': 12.1445894241333, 'eval_accuracy': 0.5958173282116944, 'eval_runtime': 1.8338, 'eval_samples_per_second': 2555.377, 'eval_steps_per_second': 7.089, 'epoch': 40.82}


 45%|████▍     | 4400/9800 [13:57<16:24,  5.49it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.9701, 'learning_rate': 2.9032258064516133e-05, 'epoch': 44.9}



 45%|████▍     | 4400/9800 [13:59<16:24,  5.49it/s]Saving model checkpoint to temp/checkpoint-4400
Configuration saved in temp/checkpoint-4400/config.json


{'eval_loss': 11.009113311767578, 'eval_accuracy': 0.6094750320102432, 'eval_runtime': 1.8408, 'eval_samples_per_second': 2545.654, 'eval_steps_per_second': 7.062, 'epoch': 44.9}


Model weights saved in temp/checkpoint-4400/pytorch_model.bin
 49%|████▉     | 4800/9800 [15:14<16:00,  5.20it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.7646, 'learning_rate': 2.6881720430107527e-05, 'epoch': 48.98}



 49%|████▉     | 4800/9800 [15:16<16:00,  5.20it/s]Saving model checkpoint to temp/checkpoint-4800
Configuration saved in temp/checkpoint-4800/config.json


{'eval_loss': 12.813078880310059, 'eval_accuracy': 0.6156636790439607, 'eval_runtime': 2.0743, 'eval_samples_per_second': 2259.125, 'eval_steps_per_second': 6.267, 'epoch': 48.98}


Model weights saved in temp/checkpoint-4800/pytorch_model.bin
 53%|█████▎    | 5200/9800 [16:31<13:59,  5.48it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.1546, 'learning_rate': 2.4731182795698928e-05, 'epoch': 53.06}



 53%|█████▎    | 5200/9800 [16:33<13:59,  5.48it/s]Saving model checkpoint to temp/checkpoint-5200
Configuration saved in temp/checkpoint-5200/config.json
Model weights saved in temp/checkpoint-5200/pytorch_model.bin


{'eval_loss': 13.643570899963379, 'eval_accuracy': 0.615877080665813, 'eval_runtime': 1.8497, 'eval_samples_per_second': 2533.316, 'eval_steps_per_second': 7.028, 'epoch': 53.06}


 57%|█████▋    | 5600/9800 [17:48<12:59,  5.39it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.234, 'learning_rate': 2.258064516129032e-05, 'epoch': 57.14}



 57%|█████▋    | 5600/9800 [17:49<12:59,  5.39it/s]Saving model checkpoint to temp/checkpoint-5600
Configuration saved in temp/checkpoint-5600/config.json


{'eval_loss': 14.236291885375977, 'eval_accuracy': 0.6182244985061887, 'eval_runtime': 1.85, 'eval_samples_per_second': 2532.917, 'eval_steps_per_second': 7.027, 'epoch': 57.14}


Model weights saved in temp/checkpoint-5600/pytorch_model.bin
 61%|██████    | 6000/9800 [19:05<11:19,  5.59it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.1835, 'learning_rate': 2.0430107526881722e-05, 'epoch': 61.22}



 61%|██████    | 6000/9800 [19:06<11:19,  5.59it/s]Saving model checkpoint to temp/checkpoint-6000
Configuration saved in temp/checkpoint-6000/config.json


{'eval_loss': 14.007142066955566, 'eval_accuracy': 0.6278275714895433, 'eval_runtime': 1.7599, 'eval_samples_per_second': 2662.658, 'eval_steps_per_second': 7.387, 'epoch': 61.22}


Model weights saved in temp/checkpoint-6000/pytorch_model.bin
 65%|██████▌   | 6400/9800 [20:20<09:43,  5.82it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 31.3671, 'learning_rate': 1.827956989247312e-05, 'epoch': 65.31}



 65%|██████▌   | 6400/9800 [20:22<09:43,  5.82it/s]Saving model checkpoint to temp/checkpoint-6400
Configuration saved in temp/checkpoint-6400/config.json


{'eval_loss': 13.57508373260498, 'eval_accuracy': 0.6235595390524968, 'eval_runtime': 1.7723, 'eval_samples_per_second': 2644.055, 'eval_steps_per_second': 7.335, 'epoch': 65.31}


Model weights saved in temp/checkpoint-6400/pytorch_model.bin
 69%|██████▉   | 6800/9800 [21:35<08:54,  5.62it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 30.8448, 'learning_rate': 1.6129032258064517e-05, 'epoch': 69.39}



 69%|██████▉   | 6800/9800 [21:37<08:54,  5.62it/s]Saving model checkpoint to temp/checkpoint-6800
Configuration saved in temp/checkpoint-6800/config.json


{'eval_loss': 12.700119018554688, 'eval_accuracy': 0.6173708920187794, 'eval_runtime': 1.8822, 'eval_samples_per_second': 2489.694, 'eval_steps_per_second': 6.907, 'epoch': 69.39}


Model weights saved in temp/checkpoint-6800/pytorch_model.bin
 73%|███████▎  | 7200/9800 [22:52<08:03,  5.38it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 30.6873, 'learning_rate': 1.3978494623655914e-05, 'epoch': 73.47}



 73%|███████▎  | 7200/9800 [22:54<08:03,  5.38it/s]Saving model checkpoint to temp/checkpoint-7200
Configuration saved in temp/checkpoint-7200/config.json


{'eval_loss': 14.525928497314453, 'eval_accuracy': 0.6173708920187794, 'eval_runtime': 1.8622, 'eval_samples_per_second': 2516.354, 'eval_steps_per_second': 6.981, 'epoch': 73.47}


Model weights saved in temp/checkpoint-7200/pytorch_model.bin
 78%|███████▊  | 7600/9800 [24:08<06:43,  5.45it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 30.9381, 'learning_rate': 1.1827956989247313e-05, 'epoch': 77.55}



 78%|███████▊  | 7600/9800 [24:10<06:43,  5.45it/s]Saving model checkpoint to temp/checkpoint-7600
Configuration saved in temp/checkpoint-7600/config.json


{'eval_loss': 13.433034896850586, 'eval_accuracy': 0.6220657276995305, 'eval_runtime': 1.8651, 'eval_samples_per_second': 2512.424, 'eval_steps_per_second': 6.97, 'epoch': 77.55}


Model weights saved in temp/checkpoint-7600/pytorch_model.bin
 82%|████████▏ | 8000/9800 [25:25<05:29,  5.46it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 30.5903, 'learning_rate': 9.67741935483871e-06, 'epoch': 81.63}



 82%|████████▏ | 8000/9800 [25:26<05:29,  5.46it/s]Saving model checkpoint to temp/checkpoint-8000
Configuration saved in temp/checkpoint-8000/config.json
Model weights saved in temp/checkpoint-8000/pytorch_model.bin


{'eval_loss': 13.73576831817627, 'eval_accuracy': 0.6291079812206573, 'eval_runtime': 1.8441, 'eval_samples_per_second': 2541.077, 'eval_steps_per_second': 7.05, 'epoch': 81.63}


 86%|████████▌ | 8400/9800 [26:39<04:01,  5.80it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 30.5707, 'learning_rate': 7.526881720430108e-06, 'epoch': 85.71}



 86%|████████▌ | 8400/9800 [26:41<04:01,  5.80it/s]Saving model checkpoint to temp/checkpoint-8400
Configuration saved in temp/checkpoint-8400/config.json


{'eval_loss': 13.519620895385742, 'eval_accuracy': 0.6297481860862143, 'eval_runtime': 1.7742, 'eval_samples_per_second': 2641.132, 'eval_steps_per_second': 7.327, 'epoch': 85.71}


Model weights saved in temp/checkpoint-8400/pytorch_model.bin
 90%|████████▉ | 8800/9800 [27:56<03:07,  5.32it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 30.7113, 'learning_rate': 5.376344086021506e-06, 'epoch': 89.8}



 90%|████████▉ | 8800/9800 [27:58<03:07,  5.32it/s]Saving model checkpoint to temp/checkpoint-8800
Configuration saved in temp/checkpoint-8800/config.json
Model weights saved in temp/checkpoint-8800/pytorch_model.bin


{'eval_loss': 13.449499130249023, 'eval_accuracy': 0.6288945795988049, 'eval_runtime': 1.8597, 'eval_samples_per_second': 2519.732, 'eval_steps_per_second': 6.99, 'epoch': 89.8}


 94%|█████████▍| 9200/9800 [29:13<01:50,  5.44it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 30.7215, 'learning_rate': 3.225806451612903e-06, 'epoch': 93.88}



 94%|█████████▍| 9200/9800 [29:15<01:50,  5.44it/s]Saving model checkpoint to temp/checkpoint-9200
Configuration saved in temp/checkpoint-9200/config.json


{'eval_loss': 13.819748878479004, 'eval_accuracy': 0.6288945795988049, 'eval_runtime': 1.8693, 'eval_samples_per_second': 2506.868, 'eval_steps_per_second': 6.955, 'epoch': 93.88}


Model weights saved in temp/checkpoint-9200/pytorch_model.bin
 98%|█████████▊| 9600/9800 [30:30<00:36,  5.42it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 30.7453, 'learning_rate': 1.0752688172043011e-06, 'epoch': 97.96}



 98%|█████████▊| 9600/9800 [30:32<00:36,  5.42it/s]Saving model checkpoint to temp/checkpoint-9600
Configuration saved in temp/checkpoint-9600/config.json
Model weights saved in temp/checkpoint-9600/pytorch_model.bin


{'eval_loss': 13.722984313964844, 'eval_accuracy': 0.6297481860862143, 'eval_runtime': 1.849, 'eval_samples_per_second': 2534.333, 'eval_steps_per_second': 7.031, 'epoch': 97.96}


100%|██████████| 9800/9800 [31:09<00:00,  6.01it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-2400 (score: 9.028046607971191).
100%|██████████| 9800/9800 [31:09<00:00,  5.24it/s]

{'train_runtime': 1869.9495, 'train_samples_per_second': 1002.38, 'train_steps_per_second': 5.241, 'train_loss': 31.999580327248086, 'epoch': 100.0}





TrainOutput(global_step=9800, training_loss=31.999580327248086, metrics={'train_runtime': 1869.9495, 'train_samples_per_second': 1002.38, 'train_steps_per_second': 5.241, 'train_loss': 31.999580327248086, 'epoch': 100.0})