In [1]:
import pandas as pd
import torch
import os

from torch.utils.data import Dataset
from transformers import (BertTokenizer, BertForSequenceClassification, Trainer,
                          TrainingArguments, BertPreTrainedModel)
# from simpletransformers.language_modeling import LanguageModelingModel
from sklearn.metrics import accuracy_score, f1_score


In [2]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [3]:
MODEL_NAME = '/home/abdullah/Code/dl/lt_bert/best_models/best_tiny_bert'
TRAIN_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/merged_train.csv'
EVAL_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/merged_test.csv'


In [4]:
def tsv_to_text(tsv_file_loc):
    file_name = tsv_file_loc.split("/")[-1].split(".")[0]
    txt_name = tsv_file_loc.replace(".tsv", ".txt")
    txt_name = txt_name.replace("split_merged", "texts")

    if os.path.exists(txt_name):
        return [txt_name, file_name]

    df = pd.read_csv(tsv_file_loc, sep="\t")

    for txt in df["text"]:
        with open(txt_name, "a", encoding="utf8") as f:
            f.writelines(txt + "\n")
    return [txt_name, file_name]


tsv_to_text(TRAIN_FILE_LOC)


['/home/abdullah/Code/dl/bnlp-resources/sentiment/merged_train.csv',
 'merged_train']

In [5]:
def csv_to_df(csv_file_loc):
    df = pd.read_csv(csv_file_loc)
    # remove id column
    # df = df.drop(columns=['id'])

    # replace neutral with 0, positive with 1, negative with 2 in class_label column
    df.loc[df['label'] == 'Neutral', 'label'] = 0
    df.loc[df['label'] == 'Positive', 'label'] = 1
    df.loc[df['label'] == 'Negative', 'label'] = 2

    return df


train_df = csv_to_df(TRAIN_FILE_LOC)

texts = train_df['text'].tolist()
print(train_df)


                                                    text label
0         @BoyBanglar ভালোই,, তাও ত বাংলার বেকার ছেলে না     1
1      'সুন্নাহের গুরুত্ব ও প্রয়োজনীয়তাhttp:/t.co/Kv9...     0
2      সারা দুনিয়া থেকে যদি বাছাই করে ৫০০ জন ভালো মা...     0
3      ক্রিকেট বোর্ড কে যদি দুদকের সম্মুখীন করা হয় এব...     2
4      সপ্তাহের জন্য তারা একটি দুর্দান্ত কাজ করে এই র...     1
...                                                  ...   ...
18739  RT @arafatul: গত এক দশক ধরে জার্মান রাজনীতি খু...     0
18740  না অাছে কোনো পাওয়ার হিটার, না অাছে একটা লেগস্প...     2
18741  গাজীপুরের শ্রীপুরে ১০০ পিছ ইয়াবা ও গাঁজাসহ ৩ জ...     2
18742   করো উন্নতি হউক বা না হউক মেডামের শরীরের ভালো ...     2
18743  খেলোয়াড়দের কথা বাদ দিলাম। বোর্ড, কোচ, নির্বাচক...     2

[18744 rows x 2 columns]


In [6]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  return {
      'accuracy': acc,
  }


In [7]:
def f1_calculator(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  f1 = f1_score(labels, preds, average='weighted')
  return {
      'f1': f1,
  }


In [8]:
train_list = tsv_to_text(TRAIN_FILE_LOC)
eval_list = tsv_to_text(EVAL_FILE_LOC)


In [9]:
os.environ["WANDB_DISABLED"] = "true"

train_df = csv_to_df(TRAIN_FILE_LOC)
eval_df = csv_to_df(EVAL_FILE_LOC)


class TINYDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(value[idx])
                for key, value in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

train_encodings = tokenizer(train_df['text'].tolist(), padding=True, truncation=True, max_length=512)
eval_encodings = tokenizer(eval_df['text'].tolist(), padding=True, truncation=True, max_length=512)

train_dataset = TINYDataset(train_encodings, train_df['label'].tolist())
eval_dataset = TINYDataset(eval_encodings, eval_df['label'].tolist())

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14


Some weights of the model checkpoint at /home/abdullah/Code/dl/lt_bert/best_models/best_tiny_bert were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model c

## 1 Epoch Train

In [10]:
# training_args = TrainingArguments(
#     output_dir=f"temp",
#     num_train_epochs=1,
#     per_device_train_batch_size=48,
#     per_device_eval_batch_size=96,
#     warmup_steps=500,
#     learning_rate=5e-5,
#     weight_decay=0.01,
#     overwrite_output_dir=True,
#     logging_dir=f"temp/logs",
#     logging_steps=130,
#     save_steps=130,
#     load_best_model_at_end=True,
#     evaluation_strategy="steps",
#     seed=14,
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=eval_dataset

# )

# trainer.train()


Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running training *****
  Num examples = 18744
  Num Epochs = 1
  Instantaneous batch size per device = 48
  Total train batch size (w. parallel, distributed & accumulation) = 48
  Gradient Accumulation steps = 1
  Total optimization steps = 391
 33%|███▎      | 130/391 [00:07<00:14, 18.64it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 1.0833, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.33}


                                                 
 33%|███▎      | 130/391 [00:08<00:14, 18.64it/s]Saving model checkpoint to temp/checkpoint-130
Configuration saved in temp/checkpoint-130/config.json


{'eval_loss': 1.0609605312347412, 'eval_runtime': 1.5151, 'eval_samples_per_second': 3092.92, 'eval_steps_per_second': 32.342, 'epoch': 0.33}


Model weights saved in temp/checkpoint-130/pytorch_model.bin
 66%|██████▋   | 260/391 [00:17<00:07, 18.71it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 1.0169, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.66}


                                                 
 66%|██████▋   | 260/391 [00:19<00:07, 18.71it/s]Saving model checkpoint to temp/checkpoint-260
Configuration saved in temp/checkpoint-260/config.json


{'eval_loss': 0.9470070004463196, 'eval_runtime': 1.5117, 'eval_samples_per_second': 3099.87, 'eval_steps_per_second': 32.414, 'epoch': 0.66}


Model weights saved in temp/checkpoint-260/pytorch_model.bin
100%|█████████▉| 390/391 [00:27<00:00, 18.73it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 96


{'loss': 0.9302, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.0}


                                                 
100%|█████████▉| 390/391 [00:28<00:00, 18.73it/s]Saving model checkpoint to temp/checkpoint-390
Configuration saved in temp/checkpoint-390/config.json


{'eval_loss': 0.893717885017395, 'eval_runtime': 1.5125, 'eval_samples_per_second': 3098.249, 'eval_steps_per_second': 32.397, 'epoch': 1.0}


Model weights saved in temp/checkpoint-390/pytorch_model.bin
100%|██████████| 391/391 [00:30<00:00,  1.86it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-390 (score: 0.893717885017395).
100%|██████████| 391/391 [00:30<00:00, 12.78it/s]

{'train_runtime': 30.6137, 'train_samples_per_second': 612.276, 'train_steps_per_second': 12.772, 'train_loss': 1.0099699033800598, 'epoch': 1.0}





TrainOutput(global_step=391, training_loss=1.0099699033800598, metrics={'train_runtime': 30.6137, 'train_samples_per_second': 612.276, 'train_steps_per_second': 12.772, 'train_loss': 1.0099699033800598, 'epoch': 1.0})

## 100 Epoch with 1 epoch model

In [12]:
MODEL_NAME = '/home/abdullah/Code/dl/lt_bert/best_models/1/1_combined'
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14

for name, param in model.named_parameters():
    if 'bert.encoder.layer.0' in name:
        # print(name, param.requires_grad)
        param.requires_grad = False
# print("\n\n\n")
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=400,
    save_steps=400,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=f1_calculator,

)

trainer.train()

# print(cool.metrics)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,

)

trainer.train()


loading configuration file /home/abdullah/Code/dl/lt_bert/best_models/1/1_combined/config.json
Model config BertConfig {
  "_name_or_path": "/home/abdullah/Code/dl/lt_bert/best_models/best_tiny_bert",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522


{'loss': 0.8779, 'learning_rate': 4e-05, 'epoch': 4.08}



  4%|▍         | 400/9800 [01:13<27:49,  5.63it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 0.8381050229072571, 'eval_f1': 0.6311814528843358, 'eval_runtime': 1.7933, 'eval_samples_per_second': 2613.109, 'eval_steps_per_second': 7.249, 'epoch': 4.08}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
  8%|▊         | 800/9800 [02:23<25:23,  5.91it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.8263, 'learning_rate': 4.8387096774193554e-05, 'epoch': 8.16}



  8%|▊         | 800/9800 [02:24<25:23,  5.91it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 0.8150410056114197, 'eval_f1': 0.6419399933743292, 'eval_runtime': 1.7037, 'eval_samples_per_second': 2750.555, 'eval_steps_per_second': 7.631, 'epoch': 8.16}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 12%|█▏        | 1200/9800 [03:34<24:17,  5.90it/s] ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.8003, 'learning_rate': 4.6236559139784944e-05, 'epoch': 12.24}



 12%|█▏        | 1200/9800 [03:36<24:17,  5.90it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 0.8021912574768066, 'eval_f1': 0.6510371020220614, 'eval_runtime': 1.6975, 'eval_samples_per_second': 2760.505, 'eval_steps_per_second': 7.658, 'epoch': 12.24}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 16%|█▋        | 1600/9800 [04:45<23:01,  5.93it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.781, 'learning_rate': 4.408602150537635e-05, 'epoch': 16.33}



 16%|█▋        | 1600/9800 [04:47<23:01,  5.93it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json
Model weights saved in temp/checkpoint-1600/pytorch_model.bin


{'eval_loss': 0.7931427359580994, 'eval_f1': 0.6563714342288443, 'eval_runtime': 1.6992, 'eval_samples_per_second': 2757.82, 'eval_steps_per_second': 7.651, 'epoch': 16.33}


 20%|██        | 2000/9800 [05:58<22:50,  5.69it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7586, 'learning_rate': 4.1935483870967746e-05, 'epoch': 20.41}


                                                   
 20%|██        | 2000/9800 [06:00<22:50,  5.69it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 0.7867947816848755, 'eval_f1': 0.6648455624134015, 'eval_runtime': 1.7477, 'eval_samples_per_second': 2681.207, 'eval_steps_per_second': 7.438, 'epoch': 20.41}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
 24%|██▍       | 2400/9800 [07:11<20:50,  5.92it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7427, 'learning_rate': 3.978494623655914e-05, 'epoch': 24.49}



 24%|██▍       | 2400/9800 [07:13<20:50,  5.92it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json


{'eval_loss': 0.7799067497253418, 'eval_f1': 0.6714036888875788, 'eval_runtime': 1.7065, 'eval_samples_per_second': 2746.009, 'eval_steps_per_second': 7.618, 'epoch': 24.49}


Model weights saved in temp/checkpoint-2400/pytorch_model.bin
 29%|██▊       | 2800/9800 [08:22<19:44,  5.91it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7231, 'learning_rate': 3.763440860215054e-05, 'epoch': 28.57}


                                                   
 29%|██▊       | 2800/9800 [08:23<19:44,  5.91it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json


{'eval_loss': 0.7736707925796509, 'eval_f1': 0.678237909471621, 'eval_runtime': 1.6927, 'eval_samples_per_second': 2768.439, 'eval_steps_per_second': 7.68, 'epoch': 28.57}


Model weights saved in temp/checkpoint-2800/pytorch_model.bin
 33%|███▎      | 3200/9800 [09:36<19:42,  5.58it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.7073, 'learning_rate': 3.548387096774194e-05, 'epoch': 32.65}



 33%|███▎      | 3200/9800 [09:37<19:42,  5.58it/s]Saving model checkpoint to temp/checkpoint-3200
Configuration saved in temp/checkpoint-3200/config.json


{'eval_loss': 0.7710894346237183, 'eval_f1': 0.6816925546804526, 'eval_runtime': 1.788, 'eval_samples_per_second': 2620.808, 'eval_steps_per_second': 7.271, 'epoch': 32.65}


Model weights saved in temp/checkpoint-3200/pytorch_model.bin
 37%|███▋      | 3600/9800 [10:47<17:36,  5.87it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6896, 'learning_rate': 3.3333333333333335e-05, 'epoch': 36.73}



 37%|███▋      | 3600/9800 [10:49<17:36,  5.87it/s]Saving model checkpoint to temp/checkpoint-3600
Configuration saved in temp/checkpoint-3600/config.json


{'eval_loss': 0.7675222754478455, 'eval_f1': 0.6858458146092123, 'eval_runtime': 1.7027, 'eval_samples_per_second': 2752.172, 'eval_steps_per_second': 7.635, 'epoch': 36.73}


Model weights saved in temp/checkpoint-3600/pytorch_model.bin
 41%|████      | 4000/9800 [11:58<16:29,  5.86it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6748, 'learning_rate': 3.118279569892473e-05, 'epoch': 40.82}



 41%|████      | 4000/9800 [12:00<16:29,  5.86it/s]Saving model checkpoint to temp/checkpoint-4000
Configuration saved in temp/checkpoint-4000/config.json


{'eval_loss': 0.7658502459526062, 'eval_f1': 0.6902416976418718, 'eval_runtime': 1.7041, 'eval_samples_per_second': 2749.873, 'eval_steps_per_second': 7.629, 'epoch': 40.82}


Model weights saved in temp/checkpoint-4000/pytorch_model.bin
 45%|████▍     | 4400/9800 [13:09<15:15,  5.90it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6593, 'learning_rate': 2.9032258064516133e-05, 'epoch': 44.9}



 45%|████▍     | 4400/9800 [13:11<15:15,  5.90it/s]Saving model checkpoint to temp/checkpoint-4400
Configuration saved in temp/checkpoint-4400/config.json


{'eval_loss': 0.7703796625137329, 'eval_f1': 0.6942485874669178, 'eval_runtime': 1.8863, 'eval_samples_per_second': 2484.172, 'eval_steps_per_second': 6.892, 'epoch': 44.9}


Model weights saved in temp/checkpoint-4400/pytorch_model.bin
 49%|████▉     | 4800/9800 [14:19<14:07,  5.90it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6479, 'learning_rate': 2.6881720430107527e-05, 'epoch': 48.98}



 49%|████▉     | 4800/9800 [14:21<14:07,  5.90it/s]Saving model checkpoint to temp/checkpoint-4800
Configuration saved in temp/checkpoint-4800/config.json


{'eval_loss': 0.7720142602920532, 'eval_f1': 0.692396783856922, 'eval_runtime': 1.706, 'eval_samples_per_second': 2746.711, 'eval_steps_per_second': 7.62, 'epoch': 48.98}


Model weights saved in temp/checkpoint-4800/pytorch_model.bin
 53%|█████▎    | 5200/9800 [15:29<12:45,  6.01it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6358, 'learning_rate': 2.4731182795698928e-05, 'epoch': 53.06}



 53%|█████▎    | 5200/9800 [15:31<12:45,  6.01it/s]Saving model checkpoint to temp/checkpoint-5200
Configuration saved in temp/checkpoint-5200/config.json


{'eval_loss': 0.7680339217185974, 'eval_f1': 0.6987826894365133, 'eval_runtime': 1.7037, 'eval_samples_per_second': 2750.492, 'eval_steps_per_second': 7.63, 'epoch': 53.06}


Model weights saved in temp/checkpoint-5200/pytorch_model.bin
 57%|█████▋    | 5600/9800 [16:41<12:29,  5.60it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6251, 'learning_rate': 2.258064516129032e-05, 'epoch': 57.14}



 57%|█████▋    | 5600/9800 [16:43<12:29,  5.60it/s]Saving model checkpoint to temp/checkpoint-5600
Configuration saved in temp/checkpoint-5600/config.json


{'eval_loss': 0.7723830342292786, 'eval_f1': 0.6984802343624676, 'eval_runtime': 1.7798, 'eval_samples_per_second': 2632.953, 'eval_steps_per_second': 7.304, 'epoch': 57.14}


Model weights saved in temp/checkpoint-5600/pytorch_model.bin
 61%|██████    | 6000/9800 [17:55<11:21,  5.57it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6171, 'learning_rate': 2.0430107526881722e-05, 'epoch': 61.22}



 61%|██████    | 6000/9800 [17:57<11:21,  5.57it/s]Saving model checkpoint to temp/checkpoint-6000
Configuration saved in temp/checkpoint-6000/config.json


{'eval_loss': 0.774113118648529, 'eval_f1': 0.6986385733306825, 'eval_runtime': 1.7823, 'eval_samples_per_second': 2629.213, 'eval_steps_per_second': 7.294, 'epoch': 61.22}


Model weights saved in temp/checkpoint-6000/pytorch_model.bin
 65%|██████▌   | 6400/9800 [19:10<10:13,  5.54it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.612, 'learning_rate': 1.827956989247312e-05, 'epoch': 65.31}


                                                   
 65%|██████▌   | 6400/9800 [19:12<10:13,  5.54it/s]Saving model checkpoint to temp/checkpoint-6400
Configuration saved in temp/checkpoint-6400/config.json


{'eval_loss': 0.7737630605697632, 'eval_f1': 0.7000721010674372, 'eval_runtime': 1.7929, 'eval_samples_per_second': 2613.642, 'eval_steps_per_second': 7.251, 'epoch': 65.31}


Model weights saved in temp/checkpoint-6400/pytorch_model.bin
 69%|██████▉   | 6800/9800 [20:24<08:53,  5.62it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.603, 'learning_rate': 1.6129032258064517e-05, 'epoch': 69.39}



 69%|██████▉   | 6800/9800 [20:26<08:53,  5.62it/s]Saving model checkpoint to temp/checkpoint-6800
Configuration saved in temp/checkpoint-6800/config.json


{'eval_loss': 0.7773008346557617, 'eval_f1': 0.7004112308991688, 'eval_runtime': 1.7888, 'eval_samples_per_second': 2619.597, 'eval_steps_per_second': 7.267, 'epoch': 69.39}


Model weights saved in temp/checkpoint-6800/pytorch_model.bin
 73%|███████▎  | 7200/9800 [21:38<07:40,  5.64it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5974, 'learning_rate': 1.3978494623655914e-05, 'epoch': 73.47}



 73%|███████▎  | 7200/9800 [21:40<07:40,  5.64it/s]Saving model checkpoint to temp/checkpoint-7200
Configuration saved in temp/checkpoint-7200/config.json


{'eval_loss': 0.7786484360694885, 'eval_f1': 0.7046781784040629, 'eval_runtime': 1.7744, 'eval_samples_per_second': 2640.839, 'eval_steps_per_second': 7.326, 'epoch': 73.47}


Model weights saved in temp/checkpoint-7200/pytorch_model.bin
 78%|███████▊  | 7600/9800 [22:49<06:14,  5.88it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5939, 'learning_rate': 1.1827956989247313e-05, 'epoch': 77.55}



 78%|███████▊  | 7600/9800 [22:50<06:14,  5.88it/s]Saving model checkpoint to temp/checkpoint-7600
Configuration saved in temp/checkpoint-7600/config.json


{'eval_loss': 0.7813209295272827, 'eval_f1': 0.7028263789660867, 'eval_runtime': 1.6866, 'eval_samples_per_second': 2778.358, 'eval_steps_per_second': 7.708, 'epoch': 77.55}


Model weights saved in temp/checkpoint-7600/pytorch_model.bin
 82%|████████▏ | 8000/9800 [23:59<05:04,  5.91it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5865, 'learning_rate': 9.67741935483871e-06, 'epoch': 81.63}



 82%|████████▏ | 8000/9800 [24:00<05:04,  5.91it/s]Saving model checkpoint to temp/checkpoint-8000
Configuration saved in temp/checkpoint-8000/config.json


{'eval_loss': 0.7822911739349365, 'eval_f1': 0.7025836074926674, 'eval_runtime': 1.695, 'eval_samples_per_second': 2764.671, 'eval_steps_per_second': 7.67, 'epoch': 81.63}


Model weights saved in temp/checkpoint-8000/pytorch_model.bin
 86%|████████▌ | 8400/9800 [25:09<03:56,  5.92it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5849, 'learning_rate': 7.526881720430108e-06, 'epoch': 85.71}



 86%|████████▌ | 8400/9800 [25:11<03:56,  5.92it/s]Saving model checkpoint to temp/checkpoint-8400
Configuration saved in temp/checkpoint-8400/config.json


{'eval_loss': 0.7838456630706787, 'eval_f1': 0.7037615757410443, 'eval_runtime': 1.6987, 'eval_samples_per_second': 2758.558, 'eval_steps_per_second': 7.653, 'epoch': 85.71}


Model weights saved in temp/checkpoint-8400/pytorch_model.bin
 90%|████████▉ | 8800/9800 [26:19<02:48,  5.94it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5833, 'learning_rate': 5.376344086021506e-06, 'epoch': 89.8}



 90%|████████▉ | 8800/9800 [26:21<02:48,  5.94it/s]Saving model checkpoint to temp/checkpoint-8800
Configuration saved in temp/checkpoint-8800/config.json


{'eval_loss': 0.7842379808425903, 'eval_f1': 0.7052251922665624, 'eval_runtime': 1.6917, 'eval_samples_per_second': 2770.005, 'eval_steps_per_second': 7.685, 'epoch': 89.8}


Model weights saved in temp/checkpoint-8800/pytorch_model.bin
 94%|█████████▍| 9200/9800 [27:29<01:41,  5.92it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5792, 'learning_rate': 3.225806451612903e-06, 'epoch': 93.88}



 94%|█████████▍| 9200/9800 [27:31<01:41,  5.92it/s]Saving model checkpoint to temp/checkpoint-9200
Configuration saved in temp/checkpoint-9200/config.json


{'eval_loss': 0.7839885950088501, 'eval_f1': 0.7047368115447835, 'eval_runtime': 1.7014, 'eval_samples_per_second': 2754.226, 'eval_steps_per_second': 7.641, 'epoch': 93.88}


Model weights saved in temp/checkpoint-9200/pytorch_model.bin
 98%|█████████▊| 9600/9800 [28:40<00:33,  5.92it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5813, 'learning_rate': 1.0752688172043011e-06, 'epoch': 97.96}



 98%|█████████▊| 9600/9800 [28:42<00:33,  5.92it/s]Saving model checkpoint to temp/checkpoint-9600
Configuration saved in temp/checkpoint-9600/config.json
Model weights saved in temp/checkpoint-9600/pytorch_model.bin


{'eval_loss': 0.7850894927978516, 'eval_f1': 0.7054863672269955, 'eval_runtime': 1.6882, 'eval_samples_per_second': 2775.77, 'eval_steps_per_second': 7.701, 'epoch': 97.96}


100%|██████████| 9800/9800 [29:16<00:00,  6.64it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-4000 (score: 0.7658502459526062).
100%|██████████| 9800/9800 [29:16<00:00,  5.58it/s]
***** Running training *****
  Num examples = 18744
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 9800


{'train_runtime': 1756.5861, 'train_samples_per_second': 1067.07, 'train_steps_per_second': 5.579, 'train_loss': 0.6684466116768973, 'epoch': 100.0}


  4%|▍         | 400/9800 [01:07<26:18,  5.95it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6607, 'learning_rate': 4e-05, 'epoch': 4.08}


                                                  
  4%|▍         | 400/9800 [01:09<26:18,  5.95it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json
Model weights saved in temp/checkpoint-400/pytorch_model.bin


{'eval_loss': 0.7805559039115906, 'eval_accuracy': 0.6873666239863423, 'eval_runtime': 1.7022, 'eval_samples_per_second': 2752.944, 'eval_steps_per_second': 7.637, 'epoch': 4.08}


  8%|▊         | 800/9800 [02:17<25:38,  5.85it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6486, 'learning_rate': 4.8387096774193554e-05, 'epoch': 8.16}


                                                  
  8%|▊         | 800/9800 [02:19<25:38,  5.85it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 0.7707378268241882, 'eval_accuracy': 0.6952624839948783, 'eval_runtime': 1.8408, 'eval_samples_per_second': 2545.681, 'eval_steps_per_second': 7.062, 'epoch': 8.16}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 12%|█▏        | 1200/9800 [03:30<24:37,  5.82it/s] ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.6314, 'learning_rate': 4.6236559139784944e-05, 'epoch': 12.24}


                                                   
 12%|█▏        | 1200/9800 [03:32<24:37,  5.82it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 0.7781342267990112, 'eval_accuracy': 0.6935552710200598, 'eval_runtime': 1.7317, 'eval_samples_per_second': 2705.968, 'eval_steps_per_second': 7.507, 'epoch': 12.24}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 16%|█▋        | 1600/9800 [04:47<24:32,  5.57it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.613, 'learning_rate': 4.408602150537635e-05, 'epoch': 16.33}



 16%|█▋        | 1600/9800 [04:48<24:32,  5.57it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json
Model weights saved in temp/checkpoint-1600/pytorch_model.bin


{'eval_loss': 0.7795121669769287, 'eval_accuracy': 0.6997439180537772, 'eval_runtime': 1.8066, 'eval_samples_per_second': 2593.865, 'eval_steps_per_second': 7.196, 'epoch': 16.33}


 20%|██        | 2000/9800 [06:00<22:29,  5.78it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5956, 'learning_rate': 4.1935483870967746e-05, 'epoch': 20.41}


                                                   
 20%|██        | 2000/9800 [06:02<22:29,  5.78it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 0.7885563373565674, 'eval_accuracy': 0.6988903115663679, 'eval_runtime': 1.8298, 'eval_samples_per_second': 2560.895, 'eval_steps_per_second': 7.104, 'epoch': 20.41}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
 24%|██▍       | 2400/9800 [07:17<23:38,  5.22it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5836, 'learning_rate': 3.978494623655914e-05, 'epoch': 24.49}


                                                   
 24%|██▍       | 2400/9800 [07:19<23:38,  5.22it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json


{'eval_loss': 0.7956011295318604, 'eval_accuracy': 0.7040119504908238, 'eval_runtime': 1.846, 'eval_samples_per_second': 2538.439, 'eval_steps_per_second': 7.042, 'epoch': 24.49}


Model weights saved in temp/checkpoint-2400/pytorch_model.bin
 29%|██▊       | 2800/9800 [08:33<21:02,  5.54it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5682, 'learning_rate': 3.763440860215054e-05, 'epoch': 28.57}



 29%|██▊       | 2800/9800 [08:35<21:02,  5.54it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json


{'eval_loss': 0.8016554713249207, 'eval_accuracy': 0.7059325650874947, 'eval_runtime': 1.7479, 'eval_samples_per_second': 2680.864, 'eval_steps_per_second': 7.437, 'epoch': 28.57}


Model weights saved in temp/checkpoint-2800/pytorch_model.bin
 33%|███▎      | 3200/9800 [09:47<19:39,  5.60it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5605, 'learning_rate': 3.548387096774194e-05, 'epoch': 32.65}


                                                   
 33%|███▎      | 3200/9800 [09:49<19:39,  5.60it/s]Saving model checkpoint to temp/checkpoint-3200
Configuration saved in temp/checkpoint-3200/config.json


{'eval_loss': 0.8027281165122986, 'eval_accuracy': 0.7020913358941528, 'eval_runtime': 1.7911, 'eval_samples_per_second': 2616.211, 'eval_steps_per_second': 7.258, 'epoch': 32.65}


Model weights saved in temp/checkpoint-3200/pytorch_model.bin
 37%|███▋      | 3600/9800 [11:01<18:35,  5.56it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5477, 'learning_rate': 3.3333333333333335e-05, 'epoch': 36.73}


                                                   
 37%|███▋      | 3600/9800 [11:03<18:35,  5.56it/s]Saving model checkpoint to temp/checkpoint-3600
Configuration saved in temp/checkpoint-3600/config.json


{'eval_loss': 0.8068245053291321, 'eval_accuracy': 0.7069995731967563, 'eval_runtime': 1.8067, 'eval_samples_per_second': 2593.694, 'eval_steps_per_second': 7.195, 'epoch': 36.73}


Model weights saved in temp/checkpoint-3600/pytorch_model.bin
 41%|████      | 4000/9800 [12:16<17:14,  5.61it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5409, 'learning_rate': 3.118279569892473e-05, 'epoch': 40.82}



 41%|████      | 4000/9800 [12:18<17:14,  5.61it/s]Saving model checkpoint to temp/checkpoint-4000
Configuration saved in temp/checkpoint-4000/config.json


{'eval_loss': 0.815051257610321, 'eval_accuracy': 0.7065727699530516, 'eval_runtime': 1.7827, 'eval_samples_per_second': 2628.614, 'eval_steps_per_second': 7.292, 'epoch': 40.82}


Model weights saved in temp/checkpoint-4000/pytorch_model.bin
 45%|████▍     | 4400/9800 [13:30<15:50,  5.68it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5375, 'learning_rate': 2.9032258064516133e-05, 'epoch': 44.9}



 45%|████▍     | 4400/9800 [13:32<15:50,  5.68it/s]Saving model checkpoint to temp/checkpoint-4400
Configuration saved in temp/checkpoint-4400/config.json


{'eval_loss': 0.8156492114067078, 'eval_accuracy': 0.7097737942808365, 'eval_runtime': 1.7655, 'eval_samples_per_second': 2654.261, 'eval_steps_per_second': 7.364, 'epoch': 44.9}


Model weights saved in temp/checkpoint-4400/pytorch_model.bin
 49%|████▉     | 4800/9800 [14:43<14:44,  5.65it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5295, 'learning_rate': 2.6881720430107527e-05, 'epoch': 48.98}



 49%|████▉     | 4800/9800 [14:45<14:44,  5.65it/s]Saving model checkpoint to temp/checkpoint-4800
Configuration saved in temp/checkpoint-4800/config.json


{'eval_loss': 0.8208633661270142, 'eval_accuracy': 0.7110542040119505, 'eval_runtime': 1.7768, 'eval_samples_per_second': 2637.312, 'eval_steps_per_second': 7.316, 'epoch': 48.98}


Model weights saved in temp/checkpoint-4800/pytorch_model.bin
 53%|█████▎    | 5200/9800 [15:57<13:30,  5.67it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5216, 'learning_rate': 2.4731182795698928e-05, 'epoch': 53.06}



 53%|█████▎    | 5200/9800 [15:59<13:30,  5.67it/s]Saving model checkpoint to temp/checkpoint-5200
Configuration saved in temp/checkpoint-5200/config.json


{'eval_loss': 0.8176801800727844, 'eval_accuracy': 0.7121212121212122, 'eval_runtime': 1.7899, 'eval_samples_per_second': 2617.996, 'eval_steps_per_second': 7.263, 'epoch': 53.06}


Model weights saved in temp/checkpoint-5200/pytorch_model.bin
 57%|█████▋    | 5600/9800 [17:12<12:29,  5.60it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5139, 'learning_rate': 2.258064516129032e-05, 'epoch': 57.14}


                                                   
 57%|█████▋    | 5600/9800 [17:14<12:29,  5.60it/s]Saving model checkpoint to temp/checkpoint-5600
Configuration saved in temp/checkpoint-5600/config.json


{'eval_loss': 0.8255428671836853, 'eval_accuracy': 0.7123346137430644, 'eval_runtime': 1.7948, 'eval_samples_per_second': 2610.924, 'eval_steps_per_second': 7.243, 'epoch': 57.14}


Model weights saved in temp/checkpoint-5600/pytorch_model.bin
 61%|██████    | 6000/9800 [18:26<11:24,  5.55it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5086, 'learning_rate': 2.0430107526881722e-05, 'epoch': 61.22}


                                                   
 61%|██████    | 6000/9800 [18:28<11:24,  5.55it/s]Saving model checkpoint to temp/checkpoint-6000
Configuration saved in temp/checkpoint-6000/config.json


{'eval_loss': 0.8272386193275452, 'eval_accuracy': 0.7116944088775075, 'eval_runtime': 1.8114, 'eval_samples_per_second': 2586.998, 'eval_steps_per_second': 7.177, 'epoch': 61.22}


Model weights saved in temp/checkpoint-6000/pytorch_model.bin
 65%|██████▌   | 6400/9800 [19:41<10:09,  5.58it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.506, 'learning_rate': 1.827956989247312e-05, 'epoch': 65.31}


                                                   
 65%|██████▌   | 6400/9800 [19:43<10:09,  5.58it/s]Saving model checkpoint to temp/checkpoint-6400
Configuration saved in temp/checkpoint-6400/config.json


{'eval_loss': 0.8303500413894653, 'eval_accuracy': 0.7123346137430644, 'eval_runtime': 1.7936, 'eval_samples_per_second': 2612.563, 'eval_steps_per_second': 7.248, 'epoch': 65.31}


Model weights saved in temp/checkpoint-6400/pytorch_model.bin
 69%|██████▉   | 6800/9800 [20:55<08:51,  5.64it/s]  ***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.5004, 'learning_rate': 1.6129032258064517e-05, 'epoch': 69.39}



 69%|██████▉   | 6800/9800 [20:57<08:51,  5.64it/s]Saving model checkpoint to temp/checkpoint-6800
Configuration saved in temp/checkpoint-6800/config.json


{'eval_loss': 0.8357117772102356, 'eval_accuracy': 0.7114810072556551, 'eval_runtime': 1.767, 'eval_samples_per_second': 2651.914, 'eval_steps_per_second': 7.357, 'epoch': 69.39}


Model weights saved in temp/checkpoint-6800/pytorch_model.bin
 73%|███████▎  | 7200/9800 [22:08<07:41,  5.64it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.495, 'learning_rate': 1.3978494623655914e-05, 'epoch': 73.47}



 73%|███████▎  | 7200/9800 [22:10<07:41,  5.64it/s]Saving model checkpoint to temp/checkpoint-7200
Configuration saved in temp/checkpoint-7200/config.json


{'eval_loss': 0.840442955493927, 'eval_accuracy': 0.7131882202304738, 'eval_runtime': 1.9702, 'eval_samples_per_second': 2378.486, 'eval_steps_per_second': 6.598, 'epoch': 73.47}


Model weights saved in temp/checkpoint-7200/pytorch_model.bin
 78%|███████▊  | 7600/9800 [23:22<06:34,  5.58it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.4939, 'learning_rate': 1.1827956989247313e-05, 'epoch': 77.55}



 78%|███████▊  | 7600/9800 [23:24<06:34,  5.58it/s]Saving model checkpoint to temp/checkpoint-7600
Configuration saved in temp/checkpoint-7600/config.json


{'eval_loss': 0.841055154800415, 'eval_accuracy': 0.7116944088775075, 'eval_runtime': 1.7816, 'eval_samples_per_second': 2630.289, 'eval_steps_per_second': 7.297, 'epoch': 77.55}


Model weights saved in temp/checkpoint-7600/pytorch_model.bin
 82%|████████▏ | 8000/9800 [24:36<05:25,  5.53it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.4887, 'learning_rate': 9.67741935483871e-06, 'epoch': 81.63}



 82%|████████▏ | 8000/9800 [24:38<05:25,  5.53it/s]Saving model checkpoint to temp/checkpoint-8000
Configuration saved in temp/checkpoint-8000/config.json


{'eval_loss': 0.8411793112754822, 'eval_accuracy': 0.7142552283397354, 'eval_runtime': 1.7857, 'eval_samples_per_second': 2624.171, 'eval_steps_per_second': 7.28, 'epoch': 81.63}


Model weights saved in temp/checkpoint-8000/pytorch_model.bin
 86%|████████▌ | 8400/9800 [25:51<04:09,  5.61it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.4868, 'learning_rate': 7.526881720430108e-06, 'epoch': 85.71}



 86%|████████▌ | 8400/9800 [25:53<04:09,  5.61it/s]Saving model checkpoint to temp/checkpoint-8400
Configuration saved in temp/checkpoint-8400/config.json


{'eval_loss': 0.8459917306900024, 'eval_accuracy': 0.7121212121212122, 'eval_runtime': 1.7833, 'eval_samples_per_second': 2627.775, 'eval_steps_per_second': 7.29, 'epoch': 85.71}


Model weights saved in temp/checkpoint-8400/pytorch_model.bin
 90%|████████▉ | 8800/9800 [27:05<03:00,  5.53it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.4855, 'learning_rate': 5.376344086021506e-06, 'epoch': 89.8}



 90%|████████▉ | 8800/9800 [27:07<03:00,  5.53it/s]Saving model checkpoint to temp/checkpoint-8800
Configuration saved in temp/checkpoint-8800/config.json


{'eval_loss': 0.8456211090087891, 'eval_accuracy': 0.7140418267178831, 'eval_runtime': 1.7848, 'eval_samples_per_second': 2625.511, 'eval_steps_per_second': 7.284, 'epoch': 89.8}


Model weights saved in temp/checkpoint-8800/pytorch_model.bin
 94%|█████████▍| 9200/9800 [28:19<01:46,  5.64it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.4846, 'learning_rate': 3.225806451612903e-06, 'epoch': 93.88}



 94%|█████████▍| 9200/9800 [28:21<01:46,  5.64it/s]Saving model checkpoint to temp/checkpoint-9200
Configuration saved in temp/checkpoint-9200/config.json


{'eval_loss': 0.845083475112915, 'eval_accuracy': 0.7129748186086214, 'eval_runtime': 1.7706, 'eval_samples_per_second': 2646.55, 'eval_steps_per_second': 7.342, 'epoch': 93.88}


Model weights saved in temp/checkpoint-9200/pytorch_model.bin
 98%|█████████▊| 9600/9800 [29:33<00:36,  5.55it/s]***** Running Evaluation *****
  Num examples = 4686
  Batch size = 384


{'loss': 0.4854, 'learning_rate': 1.0752688172043011e-06, 'epoch': 97.96}



 98%|█████████▊| 9600/9800 [29:35<00:36,  5.55it/s]Saving model checkpoint to temp/checkpoint-9600
Configuration saved in temp/checkpoint-9600/config.json


{'eval_loss': 0.8470042943954468, 'eval_accuracy': 0.7142552283397354, 'eval_runtime': 1.7896, 'eval_samples_per_second': 2618.417, 'eval_steps_per_second': 7.264, 'epoch': 97.96}


Model weights saved in temp/checkpoint-9600/pytorch_model.bin
100%|██████████| 9800/9800 [30:12<00:00,  6.23it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-800 (score: 0.7707378268241882).
100%|██████████| 9800/9800 [30:12<00:00,  5.41it/s]

{'train_runtime': 1812.6928, 'train_samples_per_second': 1034.042, 'train_steps_per_second': 5.406, 'train_loss': 0.5399496273118622, 'epoch': 100.0}





TrainOutput(global_step=9800, training_loss=0.5399496273118622, metrics={'train_runtime': 1812.6928, 'train_samples_per_second': 1034.042, 'train_steps_per_second': 5.406, 'train_loss': 0.5399496273118622, 'epoch': 100.0})