In [1]:
import pandas as pd
import torch
import os

from torch.utils.data import Dataset
from transformers import (BertTokenizer, BertForSequenceClassification, Trainer,
                          TrainingArguments, BertPreTrainedModel)
# from simpletransformers.language_modeling import LanguageModelingModel
from sklearn.metrics import accuracy_score, f1_score


In [2]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [3]:
MODEL_NAME = '/home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert'
TRAIN_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/merged_train.csv'
EVAL_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/merged_test.csv'


In [4]:
def tsv_to_text(tsv_file_loc):
    file_name = tsv_file_loc.split("/")[-1].split(".")[0]
    txt_name = tsv_file_loc.replace(".tsv", ".txt")
    txt_name = txt_name.replace("split_merged", "texts")

    if os.path.exists(txt_name):
        return [txt_name, file_name]

    df = pd.read_csv(tsv_file_loc, sep="\t")

    for txt in df["text"]:
        with open(txt_name, "a", encoding="utf8") as f:
            f.writelines(txt + "\n")
    return [txt_name, file_name]


tsv_to_text(TRAIN_FILE_LOC)


['/home/abdullah/Code/dl/bnlp-resources/sentiment/merged_train.csv',
 'merged_train']

In [5]:
def csv_to_df(csv_file_loc):
    df = pd.read_csv(csv_file_loc)
    # remove id column
    # df = df.drop(columns=['id'])

    # replace neutral with 0, positive with 1, negative with 2 in class_label column
    df.loc[df['label'] == 'Neutral', 'label'] = 0
    df.loc[df['label'] == 'Positive', 'label'] = 1
    df.loc[df['label'] == 'Negative', 'label'] = 2

    return df


train_df = csv_to_df(TRAIN_FILE_LOC)

texts = train_df['text'].tolist()
print(train_df)


                                                    text label
0         @BoyBanglar ভালোই,, তাও ত বাংলার বেকার ছেলে না     1
1      'সুন্নাহের গুরুত্ব ও প্রয়োজনীয়তাhttp:/t.co/Kv9...     0
2      সারা দুনিয়া থেকে যদি বাছাই করে ৫০০ জন ভালো মা...     0
3      ক্রিকেট বোর্ড কে যদি দুদকের সম্মুখীন করা হয় এব...     2
4      সপ্তাহের জন্য তারা একটি দুর্দান্ত কাজ করে এই র...     1
...                                                  ...   ...
18739  RT @arafatul: গত এক দশক ধরে জার্মান রাজনীতি খু...     0
18740  না অাছে কোনো পাওয়ার হিটার, না অাছে একটা লেগস্প...     2
18741  গাজীপুরের শ্রীপুরে ১০০ পিছ ইয়াবা ও গাঁজাসহ ৩ জ...     2
18742   করো উন্নতি হউক বা না হউক মেডামের শরীরের ভালো ...     2
18743  খেলোয়াড়দের কথা বাদ দিলাম। বোর্ড, কোচ, নির্বাচক...     2

[18744 rows x 2 columns]


In [6]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  return {
      'accuracy': acc,
  }


In [7]:
def f1_calculator(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  f1 = f1_score(labels, preds, average='weighted')
  return {
      'f1': f1,
  }


In [8]:
train_list = tsv_to_text(TRAIN_FILE_LOC)
eval_list = tsv_to_text(EVAL_FILE_LOC)


In [9]:
os.environ["WANDB_DISABLED"] = "true"

train_df = csv_to_df(TRAIN_FILE_LOC)
eval_df = csv_to_df(EVAL_FILE_LOC)


class TINYDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(value[idx])
                for key, value in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

train_encodings = tokenizer(train_df['text'].tolist(), padding=True, truncation=True, max_length=512)
eval_encodings = tokenizer(eval_df['text'].tolist(), padding=True, truncation=True, max_length=512)

train_dataset = TINYDataset(train_encodings, train_df['label'].tolist())
eval_dataset = TINYDataset(eval_encodings, eval_df['label'].tolist())

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14


Some weights of the model checkpoint at /home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert were not used when initializing BertForSequenceClassification: ['bert.encoder.layer.1.output.dense.weight_mask', 'bert.encoder.layer.0.attention.self.query.weight_mask', 'bert.encoder.layer.1.attention.output.dense.weight_mask', 'cls.predictions.transform.LayerNorm.bias', 'bert.encoder.layer.0.output.dense.weight_orig', 'bert.encoder.layer.0.attention.self.query.weight_orig', 'bert.encoder.layer.1.intermediate.dense.weight_mask', 'bert.encoder.layer.0.attention.output.dense.weight_orig', 'bert.encoder.layer.1.output.dense.weight_orig', 'bert.encoder.layer.0.attention.output.dense.weight_mask', 'bert.encoder.layer.1.attention.self.query.weight_mask', 'cls.predictions.transform.LayerNorm.weight', 'bert.encoder.layer.0.attention.self.value.weight_orig', 'bert.encoder.layer.1.intermediate.dense.weight_orig', 'bert.encoder.layer.0.intermediate.dense.weight_orig', 'bert.encoder.layer.1.at

## 1 Epoch Train

In [10]:
training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=1,
    per_device_train_batch_size=48,
    per_device_eval_batch_size=96,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=130,
    save_steps=130,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset

)

trainer.train()


Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running training *****
  Num examples = 18744
  Num Epochs = 1
  Instantaneous batch size per device = 48
  Total train batch size (w. parallel, distributed & accumulation) = 48
  Gradient Accumulation steps = 1
  Total optimization steps = 391
 33%|███▎      | 130/391 [00:07<00:14, 18.62it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 1.0859, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.33}


                                                 
 33%|███▎      | 130/391 [00:08<00:14, 18.62it/s]Saving model checkpoint to temp/checkpoint-130
Configuration saved in temp/checkpoint-130/config.json


{'eval_loss': 1.074790596961975, 'eval_runtime': 1.4909, 'eval_samples_per_second': 3143.163, 'eval_steps_per_second': 32.867, 'epoch': 0.33}


Model weights saved in temp/checkpoint-130/pytorch_model.bin
 66%|██████▋   | 260/391 [00:16<00:06, 19.38it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 1.0667, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.66}


                                                 
 66%|██████▋   | 260/391 [00:18<00:06, 19.38it/s]Saving model checkpoint to temp/checkpoint-260
Configuration saved in temp/checkpoint-260/config.json


{'eval_loss': 1.0611416101455688, 'eval_runtime': 1.4938, 'eval_samples_per_second': 3136.897, 'eval_steps_per_second': 32.802, 'epoch': 0.66}


Model weights saved in temp/checkpoint-260/pytorch_model.bin
100%|█████████▉| 390/391 [00:26<00:00, 19.35it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 0.9965, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.0}


                                                 
100%|█████████▉| 390/391 [00:28<00:00, 19.35it/s]Saving model checkpoint to temp/checkpoint-390
Configuration saved in temp/checkpoint-390/config.json


{'eval_loss': 0.9461308717727661, 'eval_runtime': 1.4958, 'eval_samples_per_second': 3132.682, 'eval_steps_per_second': 32.757, 'epoch': 1.0}


Model weights saved in temp/checkpoint-390/pytorch_model.bin
100%|██████████| 391/391 [00:29<00:00,  2.22it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-390 (score: 0.9461308717727661).
100%|██████████| 391/391 [00:29<00:00, 13.18it/s]

{'train_runtime': 29.6652, 'train_samples_per_second': 631.85, 'train_steps_per_second': 13.18, 'train_loss': 1.0494939073577256, 'epoch': 1.0}





TrainOutput(global_step=391, training_loss=1.0494939073577256, metrics={'train_runtime': 29.6652, 'train_samples_per_second': 631.85, 'train_steps_per_second': 13.18, 'train_loss': 1.0494939073577256, 'epoch': 1.0})

## 100 Epoch with 1 epoch model

In [12]:
MODEL_NAME = '/home/abdullah/Code/dl/lt_bert/best_models/1/1_combined_50'
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14

for name, param in model.named_parameters():
    if 'bert.encoder.layer.0' in name:
        # print(name, param.requires_grad)
        param.requires_grad = False
# print("\n\n\n")
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=400,
    save_steps=400,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=f1_calculator,

)

trainer.train()

# print(cool.metrics)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,

)

trainer.train()


loading configuration file /home/abdullah/Code/dl/lt_bert/best_models/1/1_combined_50/config.json
Model config BertConfig {
  "_name_or_path": "/home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_siz

{'loss': 0.932, 'learning_rate': 4e-05, 'epoch': 4.08}


                                                  
  4%|▍         | 400/9800 [01:13<27:42,  5.66it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 0.9268326163291931, 'eval_f1': 0.4224656966846347, 'eval_runtime': 1.7821, 'eval_samples_per_second': 2629.452, 'eval_steps_per_second': 7.295, 'epoch': 4.08}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
  8%|▊         | 800/9800 [02:26<26:48,  5.60it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.9124, 'learning_rate': 4.8387096774193554e-05, 'epoch': 8.16}


                                                  
  8%|▊         | 800/9800 [02:27<26:48,  5.60it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 0.8964125514030457, 'eval_f1': 0.4446060169844989, 'eval_runtime': 1.7923, 'eval_samples_per_second': 2614.513, 'eval_steps_per_second': 7.253, 'epoch': 8.16}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 12%|█▏        | 1200/9800 [03:40<25:52,  5.54it/s] ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.8501, 'learning_rate': 4.6236559139784944e-05, 'epoch': 12.24}


                                                   
 12%|█▏        | 1200/9800 [03:42<25:52,  5.54it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 0.8312300443649292, 'eval_f1': 0.6217534669756833, 'eval_runtime': 1.79, 'eval_samples_per_second': 2617.855, 'eval_steps_per_second': 7.263, 'epoch': 12.24}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 16%|█▋        | 1600/9800 [04:54<24:16,  5.63it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.8131, 'learning_rate': 4.408602150537635e-05, 'epoch': 16.33}


                                                   
 16%|█▋        | 1600/9800 [04:56<24:16,  5.63it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json


{'eval_loss': 0.8229454755783081, 'eval_f1': 0.6303078160918113, 'eval_runtime': 1.7896, 'eval_samples_per_second': 2618.526, 'eval_steps_per_second': 7.264, 'epoch': 16.33}


Model weights saved in temp/checkpoint-1600/pytorch_model.bin
 20%|██        | 2000/9800 [06:08<23:00,  5.65it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7938, 'learning_rate': 4.1935483870967746e-05, 'epoch': 20.41}


                                                   
 20%|██        | 2000/9800 [06:10<23:00,  5.65it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 0.8207581639289856, 'eval_f1': 0.6361189965223594, 'eval_runtime': 1.768, 'eval_samples_per_second': 2650.393, 'eval_steps_per_second': 7.353, 'epoch': 20.41}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
 24%|██▍       | 2400/9800 [07:22<21:51,  5.64it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7735, 'learning_rate': 3.978494623655914e-05, 'epoch': 24.49}


                                                   
 24%|██▍       | 2400/9800 [07:24<21:51,  5.64it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json


{'eval_loss': 0.8200910687446594, 'eval_f1': 0.6452599890285028, 'eval_runtime': 1.7918, 'eval_samples_per_second': 2615.18, 'eval_steps_per_second': 7.255, 'epoch': 24.49}


Model weights saved in temp/checkpoint-2400/pytorch_model.bin
 29%|██▊       | 2800/9800 [08:36<20:53,  5.58it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7484, 'learning_rate': 3.763440860215054e-05, 'epoch': 28.57}


                                                   
 29%|██▊       | 2800/9800 [08:38<20:53,  5.58it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json


{'eval_loss': 0.8233359456062317, 'eval_f1': 0.6476206525982856, 'eval_runtime': 1.7743, 'eval_samples_per_second': 2640.993, 'eval_steps_per_second': 7.327, 'epoch': 28.57}


Model weights saved in temp/checkpoint-2800/pytorch_model.bin
 33%|███▎      | 3200/9800 [09:51<19:41,  5.58it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7295, 'learning_rate': 3.548387096774194e-05, 'epoch': 32.65}


                                                   
 33%|███▎      | 3200/9800 [09:52<19:41,  5.58it/s]Saving model checkpoint to temp/checkpoint-3200
Configuration saved in temp/checkpoint-3200/config.json


{'eval_loss': 0.8262816071510315, 'eval_f1': 0.6453565663825808, 'eval_runtime': 1.7757, 'eval_samples_per_second': 2638.977, 'eval_steps_per_second': 7.321, 'epoch': 32.65}


Model weights saved in temp/checkpoint-3200/pytorch_model.bin
 37%|███▋      | 3600/9800 [11:05<18:26,  5.60it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7153, 'learning_rate': 3.3333333333333335e-05, 'epoch': 36.73}


                                                   
 37%|███▋      | 3600/9800 [11:07<18:26,  5.60it/s]Saving model checkpoint to temp/checkpoint-3600
Configuration saved in temp/checkpoint-3600/config.json


{'eval_loss': 0.8320893049240112, 'eval_f1': 0.6491470214549394, 'eval_runtime': 1.7775, 'eval_samples_per_second': 2636.335, 'eval_steps_per_second': 7.314, 'epoch': 36.73}


Model weights saved in temp/checkpoint-3600/pytorch_model.bin
 41%|████      | 4000/9800 [12:19<17:08,  5.64it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7029, 'learning_rate': 3.118279569892473e-05, 'epoch': 40.82}


                                                   
 41%|████      | 4000/9800 [12:21<17:08,  5.64it/s]Saving model checkpoint to temp/checkpoint-4000
Configuration saved in temp/checkpoint-4000/config.json


{'eval_loss': 0.8421012759208679, 'eval_f1': 0.6511020057079464, 'eval_runtime': 1.7771, 'eval_samples_per_second': 2636.855, 'eval_steps_per_second': 7.315, 'epoch': 40.82}


Model weights saved in temp/checkpoint-4000/pytorch_model.bin
 45%|████▍     | 4400/9800 [13:33<15:50,  5.68it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6938, 'learning_rate': 2.9032258064516133e-05, 'epoch': 44.9}


                                                   
 45%|████▍     | 4400/9800 [13:34<15:50,  5.68it/s]Saving model checkpoint to temp/checkpoint-4400
Configuration saved in temp/checkpoint-4400/config.json


{'eval_loss': 0.8491581678390503, 'eval_f1': 0.6514259094708419, 'eval_runtime': 1.7567, 'eval_samples_per_second': 2667.45, 'eval_steps_per_second': 7.4, 'epoch': 44.9}


Model weights saved in temp/checkpoint-4400/pytorch_model.bin
 49%|████▉     | 4800/9800 [14:46<14:54,  5.59it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6857, 'learning_rate': 2.6881720430107527e-05, 'epoch': 48.98}


                                                   
 49%|████▉     | 4800/9800 [14:48<14:54,  5.59it/s]Saving model checkpoint to temp/checkpoint-4800
Configuration saved in temp/checkpoint-4800/config.json


{'eval_loss': 0.8590767979621887, 'eval_f1': 0.6466339467371769, 'eval_runtime': 1.7841, 'eval_samples_per_second': 2626.543, 'eval_steps_per_second': 7.287, 'epoch': 48.98}


Model weights saved in temp/checkpoint-4800/pytorch_model.bin
 53%|█████▎    | 5200/9800 [16:01<13:26,  5.70it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6782, 'learning_rate': 2.4731182795698928e-05, 'epoch': 53.06}


                                                   
 53%|█████▎    | 5200/9800 [16:02<13:26,  5.70it/s]Saving model checkpoint to temp/checkpoint-5200
Configuration saved in temp/checkpoint-5200/config.json


{'eval_loss': 0.8638338446617126, 'eval_f1': 0.6449579607758779, 'eval_runtime': 1.7694, 'eval_samples_per_second': 2648.414, 'eval_steps_per_second': 7.347, 'epoch': 53.06}


Model weights saved in temp/checkpoint-5200/pytorch_model.bin
 57%|█████▋    | 5600/9800 [17:14<12:27,  5.62it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.672, 'learning_rate': 2.258064516129032e-05, 'epoch': 57.14}


                                                   
 57%|█████▋    | 5600/9800 [17:16<12:27,  5.62it/s]Saving model checkpoint to temp/checkpoint-5600
Configuration saved in temp/checkpoint-5600/config.json


{'eval_loss': 0.8725993633270264, 'eval_f1': 0.6429306245083211, 'eval_runtime': 1.7852, 'eval_samples_per_second': 2624.967, 'eval_steps_per_second': 7.282, 'epoch': 57.14}


Model weights saved in temp/checkpoint-5600/pytorch_model.bin
 61%|██████    | 6000/9800 [18:28<11:12,  5.65it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6655, 'learning_rate': 2.0430107526881722e-05, 'epoch': 61.22}


                                                   
 61%|██████    | 6000/9800 [18:30<11:12,  5.65it/s]Saving model checkpoint to temp/checkpoint-6000
Configuration saved in temp/checkpoint-6000/config.json


{'eval_loss': 0.8746367692947388, 'eval_f1': 0.647092830446133, 'eval_runtime': 1.7613, 'eval_samples_per_second': 2660.464, 'eval_steps_per_second': 7.381, 'epoch': 61.22}


Model weights saved in temp/checkpoint-6000/pytorch_model.bin
 65%|██████▌   | 6400/9800 [19:42<10:02,  5.65it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6642, 'learning_rate': 1.827956989247312e-05, 'epoch': 65.31}


                                                   
 65%|██████▌   | 6400/9800 [19:44<10:02,  5.65it/s]Saving model checkpoint to temp/checkpoint-6400
Configuration saved in temp/checkpoint-6400/config.json


{'eval_loss': 0.8770627379417419, 'eval_f1': 0.6460668975469535, 'eval_runtime': 1.7627, 'eval_samples_per_second': 2658.408, 'eval_steps_per_second': 7.375, 'epoch': 65.31}


Model weights saved in temp/checkpoint-6400/pytorch_model.bin
 69%|██████▉   | 6800/9800 [20:57<08:48,  5.68it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6588, 'learning_rate': 1.6129032258064517e-05, 'epoch': 69.39}


                                                   
 69%|██████▉   | 6800/9800 [20:58<08:48,  5.68it/s]Saving model checkpoint to temp/checkpoint-6800
Configuration saved in temp/checkpoint-6800/config.json


{'eval_loss': 0.883075475692749, 'eval_f1': 0.6481030195503154, 'eval_runtime': 1.742, 'eval_samples_per_second': 2690.061, 'eval_steps_per_second': 7.463, 'epoch': 69.39}


Model weights saved in temp/checkpoint-6800/pytorch_model.bin
 73%|███████▎  | 7200/9800 [22:10<07:37,  5.68it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6569, 'learning_rate': 1.3978494623655914e-05, 'epoch': 73.47}


                                                   
 73%|███████▎  | 7200/9800 [22:12<07:37,  5.68it/s]Saving model checkpoint to temp/checkpoint-7200
Configuration saved in temp/checkpoint-7200/config.json


{'eval_loss': 0.8839555978775024, 'eval_f1': 0.6458400874312616, 'eval_runtime': 1.7621, 'eval_samples_per_second': 2659.369, 'eval_steps_per_second': 7.378, 'epoch': 73.47}


Model weights saved in temp/checkpoint-7200/pytorch_model.bin
 78%|███████▊  | 7600/9800 [23:23<06:29,  5.65it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6561, 'learning_rate': 1.1827956989247313e-05, 'epoch': 77.55}


                                                   
 78%|███████▊  | 7600/9800 [23:25<06:29,  5.65it/s]Saving model checkpoint to temp/checkpoint-7600
Configuration saved in temp/checkpoint-7600/config.json


{'eval_loss': 0.8886541724205017, 'eval_f1': 0.6436692793144205, 'eval_runtime': 1.7676, 'eval_samples_per_second': 2651.081, 'eval_steps_per_second': 7.355, 'epoch': 77.55}


Model weights saved in temp/checkpoint-7600/pytorch_model.bin
 82%|████████▏ | 8000/9800 [24:38<05:20,  5.62it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6512, 'learning_rate': 9.67741935483871e-06, 'epoch': 81.63}


                                                   
 82%|████████▏ | 8000/9800 [24:40<05:20,  5.62it/s]Saving model checkpoint to temp/checkpoint-8000
Configuration saved in temp/checkpoint-8000/config.json


{'eval_loss': 0.891326904296875, 'eval_f1': 0.64345817866895, 'eval_runtime': 1.7765, 'eval_samples_per_second': 2637.828, 'eval_steps_per_second': 7.318, 'epoch': 81.63}


Model weights saved in temp/checkpoint-8000/pytorch_model.bin
 86%|████████▌ | 8400/9800 [25:52<04:09,  5.61it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6505, 'learning_rate': 7.526881720430108e-06, 'epoch': 85.71}


                                                   
 86%|████████▌ | 8400/9800 [25:53<04:09,  5.61it/s]Saving model checkpoint to temp/checkpoint-8400
Configuration saved in temp/checkpoint-8400/config.json


{'eval_loss': 0.8938393592834473, 'eval_f1': 0.6439761192210715, 'eval_runtime': 1.7868, 'eval_samples_per_second': 2622.578, 'eval_steps_per_second': 7.276, 'epoch': 85.71}


Model weights saved in temp/checkpoint-8400/pytorch_model.bin
 90%|████████▉ | 8800/9800 [27:05<02:57,  5.64it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6477, 'learning_rate': 5.376344086021506e-06, 'epoch': 89.8}


                                                   
 90%|████████▉ | 8800/9800 [27:07<02:57,  5.64it/s]Saving model checkpoint to temp/checkpoint-8800
Configuration saved in temp/checkpoint-8800/config.json


{'eval_loss': 0.895429790019989, 'eval_f1': 0.6443165636170556, 'eval_runtime': 1.7621, 'eval_samples_per_second': 2659.27, 'eval_steps_per_second': 7.377, 'epoch': 89.8}


Model weights saved in temp/checkpoint-8800/pytorch_model.bin
 94%|█████████▍| 9200/9800 [28:19<01:46,  5.64it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6446, 'learning_rate': 3.225806451612903e-06, 'epoch': 93.88}


                                                   
 94%|█████████▍| 9200/9800 [28:21<01:46,  5.64it/s]Saving model checkpoint to temp/checkpoint-9200
Configuration saved in temp/checkpoint-9200/config.json


{'eval_loss': 0.8963251709938049, 'eval_f1': 0.6432787440829502, 'eval_runtime': 1.7612, 'eval_samples_per_second': 2660.755, 'eval_steps_per_second': 7.382, 'epoch': 93.88}


Model weights saved in temp/checkpoint-9200/pytorch_model.bin
 98%|█████████▊| 9600/9800 [29:33<00:35,  5.58it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6474, 'learning_rate': 1.0752688172043011e-06, 'epoch': 97.96}


                                                   
 98%|█████████▊| 9600/9800 [29:35<00:35,  5.58it/s]Saving model checkpoint to temp/checkpoint-9600
Configuration saved in temp/checkpoint-9600/config.json


{'eval_loss': 0.8975085616111755, 'eval_f1': 0.6430413426041153, 'eval_runtime': 1.7746, 'eval_samples_per_second': 2640.543, 'eval_steps_per_second': 7.325, 'epoch': 97.96}


Model weights saved in temp/checkpoint-9600/pytorch_model.bin
100%|██████████| 9800/9800 [30:12<00:00,  6.27it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-2400 (score: 0.8200910687446594).
100%|██████████| 9800/9800 [30:12<00:00,  5.41it/s]
***** Running training *****
  Num examples = 18744
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 9800


{'train_runtime': 1812.2248, 'train_samples_per_second': 1034.309, 'train_steps_per_second': 5.408, 'train_loss': 0.7169937663175622, 'epoch': 100.0}


  4%|▍         | 400/9800 [01:11<27:37,  5.67it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7532, 'learning_rate': 4e-05, 'epoch': 4.08}



  4%|▍         | 400/9800 [01:13<27:37,  5.67it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 0.8267119526863098, 'eval_accuracy': 0.6457533077251387, 'eval_runtime': 1.7752, 'eval_samples_per_second': 2639.645, 'eval_steps_per_second': 7.323, 'epoch': 4.08}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
  8%|▊         | 800/9800 [02:25<26:42,  5.62it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7424, 'learning_rate': 4.8387096774193554e-05, 'epoch': 8.16}



  8%|▊         | 800/9800 [02:27<26:42,  5.62it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 0.8244216442108154, 'eval_accuracy': 0.6487409304310713, 'eval_runtime': 1.7923, 'eval_samples_per_second': 2614.483, 'eval_steps_per_second': 7.253, 'epoch': 8.16}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 12%|█▏        | 1200/9800 [03:40<25:36,  5.60it/s] ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7215, 'learning_rate': 4.6236559139784944e-05, 'epoch': 12.24}



 12%|█▏        | 1200/9800 [03:41<25:36,  5.60it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 0.835204541683197, 'eval_accuracy': 0.6515151515151515, 'eval_runtime': 1.7934, 'eval_samples_per_second': 2612.908, 'eval_steps_per_second': 7.249, 'epoch': 12.24}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 16%|█▋        | 1600/9800 [04:54<24:12,  5.64it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7078, 'learning_rate': 4.408602150537635e-05, 'epoch': 16.33}



 16%|█▋        | 1600/9800 [04:56<24:12,  5.64it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json


{'eval_loss': 0.8407989740371704, 'eval_accuracy': 0.6498079385403329, 'eval_runtime': 1.7703, 'eval_samples_per_second': 2647.022, 'eval_steps_per_second': 7.343, 'epoch': 16.33}


Model weights saved in temp/checkpoint-1600/pytorch_model.bin
 20%|██        | 2000/9800 [06:07<23:11,  5.61it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6939, 'learning_rate': 4.1935483870967746e-05, 'epoch': 20.41}



 20%|██        | 2000/9800 [06:09<23:11,  5.61it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 0.8559519648551941, 'eval_accuracy': 0.6489543320529236, 'eval_runtime': 1.7685, 'eval_samples_per_second': 2649.641, 'eval_steps_per_second': 7.351, 'epoch': 20.41}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
 24%|██▍       | 2400/9800 [07:21<22:02,  5.60it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6832, 'learning_rate': 3.978494623655914e-05, 'epoch': 24.49}



 24%|██▍       | 2400/9800 [07:23<22:02,  5.60it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json


{'eval_loss': 0.865621030330658, 'eval_accuracy': 0.6474605206999573, 'eval_runtime': 1.787, 'eval_samples_per_second': 2622.201, 'eval_steps_per_second': 7.275, 'epoch': 24.49}


Model weights saved in temp/checkpoint-2400/pytorch_model.bin
 29%|██▊       | 2800/9800 [08:35<20:53,  5.59it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.675, 'learning_rate': 3.763440860215054e-05, 'epoch': 28.57}



 29%|██▊       | 2800/9800 [08:37<20:53,  5.59it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json


{'eval_loss': 0.8716239333152771, 'eval_accuracy': 0.6436192915066155, 'eval_runtime': 1.7751, 'eval_samples_per_second': 2639.831, 'eval_steps_per_second': 7.323, 'epoch': 28.57}


Model weights saved in temp/checkpoint-2800/pytorch_model.bin
 33%|███▎      | 3200/9800 [09:50<19:45,  5.57it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.667, 'learning_rate': 3.548387096774194e-05, 'epoch': 32.65}



 33%|███▎      | 3200/9800 [09:51<19:45,  5.57it/s]Saving model checkpoint to temp/checkpoint-3200
Configuration saved in temp/checkpoint-3200/config.json


{'eval_loss': 0.8768733143806458, 'eval_accuracy': 0.6470337174562527, 'eval_runtime': 1.7925, 'eval_samples_per_second': 2614.164, 'eval_steps_per_second': 7.252, 'epoch': 32.65}


Model weights saved in temp/checkpoint-3200/pytorch_model.bin
 37%|███▋      | 3600/9800 [11:04<18:22,  5.62it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6594, 'learning_rate': 3.3333333333333335e-05, 'epoch': 36.73}



 37%|███▋      | 3600/9800 [11:06<18:22,  5.62it/s]Saving model checkpoint to temp/checkpoint-3600
Configuration saved in temp/checkpoint-3600/config.json


{'eval_loss': 0.88669353723526, 'eval_accuracy': 0.6463935125906957, 'eval_runtime': 1.7758, 'eval_samples_per_second': 2638.856, 'eval_steps_per_second': 7.321, 'epoch': 36.73}


Model weights saved in temp/checkpoint-3600/pytorch_model.bin
 41%|████      | 4000/9800 [12:18<16:59,  5.69it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6541, 'learning_rate': 3.118279569892473e-05, 'epoch': 40.82}



 41%|████      | 4000/9800 [12:19<16:59,  5.69it/s]Saving model checkpoint to temp/checkpoint-4000
Configuration saved in temp/checkpoint-4000/config.json


{'eval_loss': 0.8963725566864014, 'eval_accuracy': 0.6416986769099445, 'eval_runtime': 1.7833, 'eval_samples_per_second': 2627.743, 'eval_steps_per_second': 7.29, 'epoch': 40.82}


Model weights saved in temp/checkpoint-4000/pytorch_model.bin
 45%|████▍     | 4400/9800 [13:31<16:00,  5.62it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6495, 'learning_rate': 2.9032258064516133e-05, 'epoch': 44.9}



 45%|████▍     | 4400/9800 [13:33<16:00,  5.62it/s]Saving model checkpoint to temp/checkpoint-4400
Configuration saved in temp/checkpoint-4400/config.json


{'eval_loss': 0.9009112119674683, 'eval_accuracy': 0.6446862996158771, 'eval_runtime': 1.7755, 'eval_samples_per_second': 2639.251, 'eval_steps_per_second': 7.322, 'epoch': 44.9}


Model weights saved in temp/checkpoint-4400/pytorch_model.bin
 49%|████▉     | 4800/9800 [14:46<14:50,  5.61it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.644, 'learning_rate': 2.6881720430107527e-05, 'epoch': 48.98}



 49%|████▉     | 4800/9800 [14:47<14:50,  5.61it/s]Saving model checkpoint to temp/checkpoint-4800
Configuration saved in temp/checkpoint-4800/config.json


{'eval_loss': 0.9102607369422913, 'eval_accuracy': 0.6419120785317969, 'eval_runtime': 1.7732, 'eval_samples_per_second': 2642.678, 'eval_steps_per_second': 7.331, 'epoch': 48.98}


Model weights saved in temp/checkpoint-4800/pytorch_model.bin
 53%|█████▎    | 5200/9800 [16:00<13:26,  5.70it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6403, 'learning_rate': 2.4731182795698928e-05, 'epoch': 53.06}



 53%|█████▎    | 5200/9800 [16:02<13:26,  5.70it/s]Saving model checkpoint to temp/checkpoint-5200
Configuration saved in temp/checkpoint-5200/config.json


{'eval_loss': 0.9116031527519226, 'eval_accuracy': 0.639991463935126, 'eval_runtime': 1.7868, 'eval_samples_per_second': 2622.523, 'eval_steps_per_second': 7.275, 'epoch': 53.06}


Model weights saved in temp/checkpoint-5200/pytorch_model.bin
 57%|█████▋    | 5600/9800 [17:14<12:28,  5.61it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6364, 'learning_rate': 2.258064516129032e-05, 'epoch': 57.14}



 57%|█████▋    | 5600/9800 [17:16<12:28,  5.61it/s]Saving model checkpoint to temp/checkpoint-5600
Configuration saved in temp/checkpoint-5600/config.json


{'eval_loss': 0.9166362285614014, 'eval_accuracy': 0.6389244558258643, 'eval_runtime': 1.7829, 'eval_samples_per_second': 2628.281, 'eval_steps_per_second': 7.291, 'epoch': 57.14}


Model weights saved in temp/checkpoint-5600/pytorch_model.bin
 61%|██████    | 6000/9800 [18:29<11:13,  5.65it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6317, 'learning_rate': 2.0430107526881722e-05, 'epoch': 61.22}



 61%|██████    | 6000/9800 [18:30<11:13,  5.65it/s]Saving model checkpoint to temp/checkpoint-6000
Configuration saved in temp/checkpoint-6000/config.json


{'eval_loss': 0.9203482866287231, 'eval_accuracy': 0.6393512590695689, 'eval_runtime': 1.7793, 'eval_samples_per_second': 2633.679, 'eval_steps_per_second': 7.306, 'epoch': 61.22}


Model weights saved in temp/checkpoint-6000/pytorch_model.bin
 65%|██████▌   | 6400/9800 [19:43<09:58,  5.68it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6316, 'learning_rate': 1.827956989247312e-05, 'epoch': 65.31}



 65%|██████▌   | 6400/9800 [19:44<09:58,  5.68it/s]Saving model checkpoint to temp/checkpoint-6400
Configuration saved in temp/checkpoint-6400/config.json


{'eval_loss': 0.9197334051132202, 'eval_accuracy': 0.639991463935126, 'eval_runtime': 1.7475, 'eval_samples_per_second': 2681.488, 'eval_steps_per_second': 7.439, 'epoch': 65.31}


Model weights saved in temp/checkpoint-6400/pytorch_model.bin
 69%|██████▉   | 6800/9800 [20:56<08:57,  5.58it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6277, 'learning_rate': 1.6129032258064517e-05, 'epoch': 69.39}



 69%|██████▉   | 6800/9800 [20:58<08:57,  5.58it/s]Saving model checkpoint to temp/checkpoint-6800
Configuration saved in temp/checkpoint-6800/config.json


{'eval_loss': 0.9249185919761658, 'eval_accuracy': 0.6410584720443875, 'eval_runtime': 1.807, 'eval_samples_per_second': 2593.222, 'eval_steps_per_second': 7.194, 'epoch': 69.39}


Model weights saved in temp/checkpoint-6800/pytorch_model.bin
 73%|███████▎  | 7200/9800 [22:09<07:53,  5.49it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6268, 'learning_rate': 1.3978494623655914e-05, 'epoch': 73.47}



 73%|███████▎  | 7200/9800 [22:11<07:53,  5.49it/s]Saving model checkpoint to temp/checkpoint-7200
Configuration saved in temp/checkpoint-7200/config.json


{'eval_loss': 0.9248293042182922, 'eval_accuracy': 0.6397780623132736, 'eval_runtime': 1.7442, 'eval_samples_per_second': 2686.663, 'eval_steps_per_second': 7.453, 'epoch': 73.47}


Model weights saved in temp/checkpoint-7200/pytorch_model.bin
 78%|███████▊  | 7600/9800 [23:26<06:52,  5.33it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6258, 'learning_rate': 1.1827956989247313e-05, 'epoch': 77.55}



 78%|███████▊  | 7600/9800 [23:28<06:52,  5.33it/s]Saving model checkpoint to temp/checkpoint-7600
Configuration saved in temp/checkpoint-7600/config.json


{'eval_loss': 0.9303790926933289, 'eval_accuracy': 0.6389244558258643, 'eval_runtime': 1.8623, 'eval_samples_per_second': 2516.227, 'eval_steps_per_second': 6.981, 'epoch': 77.55}


Model weights saved in temp/checkpoint-7600/pytorch_model.bin
 82%|████████▏ | 8000/9800 [24:42<05:12,  5.76it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6212, 'learning_rate': 9.67741935483871e-06, 'epoch': 81.63}



 82%|████████▏ | 8000/9800 [24:43<05:12,  5.76it/s]Saving model checkpoint to temp/checkpoint-8000
Configuration saved in temp/checkpoint-8000/config.json


{'eval_loss': 0.9336408376693726, 'eval_accuracy': 0.6382842509603073, 'eval_runtime': 1.8474, 'eval_samples_per_second': 2536.559, 'eval_steps_per_second': 7.037, 'epoch': 81.63}


Model weights saved in temp/checkpoint-8000/pytorch_model.bin
 86%|████████▌ | 8400/9800 [25:58<04:21,  5.36it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6217, 'learning_rate': 7.526881720430108e-06, 'epoch': 85.71}



 86%|████████▌ | 8400/9800 [26:00<04:21,  5.36it/s]Saving model checkpoint to temp/checkpoint-8400
Configuration saved in temp/checkpoint-8400/config.json


{'eval_loss': 0.9364458918571472, 'eval_accuracy': 0.6384976525821596, 'eval_runtime': 1.8523, 'eval_samples_per_second': 2529.858, 'eval_steps_per_second': 7.018, 'epoch': 85.71}


Model weights saved in temp/checkpoint-8400/pytorch_model.bin
 90%|████████▉ | 8800/9800 [27:13<02:56,  5.68it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6193, 'learning_rate': 5.376344086021506e-06, 'epoch': 89.8}



 90%|████████▉ | 8800/9800 [27:15<02:56,  5.68it/s]Saving model checkpoint to temp/checkpoint-8800
Configuration saved in temp/checkpoint-8800/config.json


{'eval_loss': 0.9373853802680969, 'eval_accuracy': 0.6393512590695689, 'eval_runtime': 1.7599, 'eval_samples_per_second': 2662.718, 'eval_steps_per_second': 7.387, 'epoch': 89.8}


Model weights saved in temp/checkpoint-8800/pytorch_model.bin
 94%|█████████▍| 9200/9800 [28:25<01:41,  5.90it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6164, 'learning_rate': 3.225806451612903e-06, 'epoch': 93.88}



 94%|█████████▍| 9200/9800 [28:27<01:41,  5.90it/s]Saving model checkpoint to temp/checkpoint-9200
Configuration saved in temp/checkpoint-9200/config.json


{'eval_loss': 0.93868488073349, 'eval_accuracy': 0.637430644472898, 'eval_runtime': 1.7109, 'eval_samples_per_second': 2738.905, 'eval_steps_per_second': 7.598, 'epoch': 93.88}


Model weights saved in temp/checkpoint-9200/pytorch_model.bin
 98%|█████████▊| 9600/9800 [29:40<00:36,  5.55it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6196, 'learning_rate': 1.0752688172043011e-06, 'epoch': 97.96}



 98%|█████████▊| 9600/9800 [29:42<00:36,  5.55it/s]Saving model checkpoint to temp/checkpoint-9600
Configuration saved in temp/checkpoint-9600/config.json


{'eval_loss': 0.9393672347068787, 'eval_accuracy': 0.6384976525821596, 'eval_runtime': 1.7724, 'eval_samples_per_second': 2643.923, 'eval_steps_per_second': 7.335, 'epoch': 97.96}


Model weights saved in temp/checkpoint-9600/pytorch_model.bin
100%|██████████| 9800/9800 [30:19<00:00,  6.16it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-800 (score: 0.8244216442108154).
100%|██████████| 9800/9800 [30:19<00:00,  5.39it/s]

{'train_runtime': 1819.5274, 'train_samples_per_second': 1030.158, 'train_steps_per_second': 5.386, 'train_loss': 0.6562452728894292, 'epoch': 100.0}





TrainOutput(global_step=9800, training_loss=0.6562452728894292, metrics={'train_runtime': 1819.5274, 'train_samples_per_second': 1030.158, 'train_steps_per_second': 5.386, 'train_loss': 0.6562452728894292, 'epoch': 100.0})