In [1]:
import pandas as pd
import torch
import os

from torch.utils.data import Dataset
from transformers import (BertTokenizer, BertForSequenceClassification, Trainer,
                          TrainingArguments, BertPreTrainedModel)
# from simpletransformers.language_modeling import LanguageModelingModel
from sklearn.metrics import accuracy_score, f1_score


In [2]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [3]:
MODEL_NAME = '/home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert'
TRAIN_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/SAIL_data/BN_data_train.tsv'
TEST_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/SAIL_data/BN_data_test.tsv'
EVAL_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/SAIL_data/BN_data_dev.tsv'


In [4]:
def tsv_to_text(tsv_file_loc):
    file_name = tsv_file_loc.split("/")[-1].split(".")[0]
    txt_name = tsv_file_loc.replace(".tsv", ".txt")
    txt_name = txt_name.replace("split_merged", "texts")

    if os.path.exists(txt_name):
        return [txt_name, file_name]

    df = pd.read_csv(tsv_file_loc, sep="\t")

    for txt in df["text"]:
        with open(txt_name, "a", encoding="utf8") as f:
            f.writelines(txt + "\n")
    return [txt_name, file_name]


tsv_to_text(TRAIN_FILE_LOC)


['/home/abdullah/Code/dl/bnlp-resources/sentiment/SAIL_data/BN_data_train.txt',
 'BN_data_train']

In [5]:
def tsv_to_df(csv_file_loc):
    df = pd.read_csv(csv_file_loc, sep='\t')
    # remove id column
    df = df.drop(columns=['id'])

    # replace neutral with 0, positive with 1, negative with 2 in class_label column
    df.loc[df['class'] == 'BN_NEU', 'class'] = 0
    df.loc[df['class'] == 'BN_POS', 'class'] = 1
    df.loc[df['class'] == 'BN_NEG', 'class'] = 2

    return df


train_df = tsv_to_df(TEST_FILE_LOC)

texts = train_df['text'].tolist()
print(train_df)


                                                  text class
0    'পকেটে শখানেক টাকা ... আর এই মুহুর্তে দিনব্যাপ...     2
1    'শেষ পর্যায়ে এসে গল্পটা এভাবে ভুল পথে মোড় না...     2
2    'সংসদেও ঘুমালেন সমাজকল্যাণ মন্ত্রী  http://t.c...     2
3    'সীমান্তে নারী ও শিশুসহ আটক ৮ http://t.co/Fz3d...     2
4    'আমিও মানুষ ভালা না, মনে মনে শুয়োরের বাচ্চা বল...     2
..                                                 ...   ...
199  'আবের সঙ্গে সৌজন্য সাক্ষাৎ করেছেন খালেদা জিয়া...     1
200  'ভৈরব নদ সংস্কার ও খননের দাবিতে জনউদ্যোগের মান...     1
201  'আলো টেলিফিল্মটা দেখলাম। মাঝেমধ্যে পর্যাপ্ত পর...     1
202      '@arif_rony2 তারপরে ও দেখতে পেলে খুব মজা পাই'     1
203  'চল কাবার পানে ওহে মুহাজির,পেতে প্রেম শুধা আল্...     1

[204 rows x 2 columns]


In [6]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  return {
      'accuracy': acc,
  }


In [7]:
def f1_calculator(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  f1 = f1_score(labels, preds, average='weighted')
  return {
      'f1': f1,
  }


In [8]:
train_list = tsv_to_text(TRAIN_FILE_LOC)
test_list = tsv_to_text(TEST_FILE_LOC)
eval_list = tsv_to_text(EVAL_FILE_LOC)


In [9]:
os.environ["WANDB_DISABLED"] = "true"

train_df = tsv_to_df(TRAIN_FILE_LOC)
test_df = tsv_to_df(TEST_FILE_LOC)
eval_df = tsv_to_df(EVAL_FILE_LOC)


class TINYDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(value[idx])
                for key, value in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

train_encodings = tokenizer(train_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
test_encodings = tokenizer(test_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
eval_encodings = tokenizer(eval_df['text'].tolist(
), padding=True, truncation=True, max_length=512)

train_dataset = TINYDataset(train_encodings, train_df['class'].tolist())
test_dataset = TINYDataset(test_encodings, test_df['class'].tolist())
eval_dataset = TINYDataset(eval_encodings, eval_df['class'].tolist())

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14


Some weights of the model checkpoint at /home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert were not used when initializing BertForSequenceClassification: ['bert.encoder.layer.1.intermediate.dense.weight_mask', 'bert.encoder.layer.1.attention.output.dense.weight_mask', 'bert.encoder.layer.0.intermediate.dense.weight_orig', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'bert.encoder.layer.1.attention.self.query.weight_mask', 'bert.encoder.layer.1.intermediate.dense.weight_orig', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'bert.encoder.layer.0.attention.output.dense.weight_orig', 'bert.encoder.layer.0.attention.output.dense.weight_mask', 'bert.encoder.layer.1.output.dense.weight_orig', 'bert.encoder.layer.0.intermediate.dense.weight_mask', 'bert.encoder.layer.0.attention.self.value.weight_mask', 'bert.encoder.layer.1.attention.self.value.weight_orig', 'bert.encoder.layer.0.attention.self.key.weight_mask', 'bert.encoder.lay

## 1 Epoch Train

In [10]:
training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=1,
    per_device_train_batch_size=48,
    per_device_eval_batch_size=96,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=15,
    save_steps=15,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset

)

trainer.train()


Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running training *****
  Num examples = 697
  Num Epochs = 1
  Instantaneous batch size per device = 48
  Total train batch size (w. parallel, distributed & accumulation) = 48
  Gradient Accumulation steps = 1
  Total optimization steps = 15
100%|██████████| 15/15 [00:00<00:00, 33.87it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.099, 'learning_rate': 1.5e-06, 'epoch': 1.0}


                                               
100%|██████████| 15/15 [00:00<00:00, 33.87it/s]Saving model checkpoint to temp/checkpoint-15
Configuration saved in temp/checkpoint-15/config.json


{'eval_loss': 1.0983589887619019, 'eval_runtime': 0.0171, 'eval_samples_per_second': 5743.214, 'eval_steps_per_second': 117.208, 'epoch': 1.0}


Model weights saved in temp/checkpoint-15/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-15 (score: 1.0983589887619019).
100%|██████████| 15/15 [00:01<00:00,  8.84it/s]

{'train_runtime': 1.7031, 'train_samples_per_second': 409.253, 'train_steps_per_second': 8.807, 'train_loss': 1.0989625295003256, 'epoch': 1.0}





TrainOutput(global_step=15, training_loss=1.0989625295003256, metrics={'train_runtime': 1.7031, 'train_samples_per_second': 409.253, 'train_steps_per_second': 8.807, 'train_loss': 1.0989625295003256, 'epoch': 1.0})

## 100 Epoch with 1 epoch model

In [12]:
MODEL_NAME = '/home/abdullah/Code/dl/lt_bert/best_models/1/1_sail_sentiment_50'
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14

for name, param in model.named_parameters():
    if 'bert.encoder.layer.0' in name:
        # print(name, param.requires_grad)
        param.requires_grad = False
# print("\n\n\n")
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=100,
    per_device_train_batch_size=48,
    per_device_eval_batch_size=96,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=100,
    save_steps=100,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=f1_calculator,

)

trainer.train()

# print(cool.metrics)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,

)

trainer.train()


loading configuration file /home/abdullah/Code/dl/lt_bert/best_models/1/1_sail_sentiment_50/config.json
Model config BertConfig {
  "_name_or_path": "/home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
  "use_cache": true,
  "voc

{'loss': 1.0976, 'learning_rate': 1e-05, 'epoch': 6.67}


                                                  
  7%|▋         | 100/1500 [00:01<00:19, 71.66it/s]Saving model checkpoint to temp/checkpoint-100
Configuration saved in temp/checkpoint-100/config.json


{'eval_loss': 1.0944733619689941, 'eval_f1': 0.1973804447151995, 'eval_runtime': 0.0154, 'eval_samples_per_second': 6367.213, 'eval_steps_per_second': 129.943, 'epoch': 6.67}


Model weights saved in temp/checkpoint-100/pytorch_model.bin
 13%|█▎        | 200/1500 [00:03<00:17, 75.65it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.0932, 'learning_rate': 2e-05, 'epoch': 13.33}


                                                  
 13%|█▎        | 200/1500 [00:03<00:17, 75.65it/s]Saving model checkpoint to temp/checkpoint-200
Configuration saved in temp/checkpoint-200/config.json


{'eval_loss': 1.091084361076355, 'eval_f1': 0.1973804447151995, 'eval_runtime': 0.0176, 'eval_samples_per_second': 5552.593, 'eval_steps_per_second': 113.318, 'epoch': 13.33}


Model weights saved in temp/checkpoint-200/pytorch_model.bin
 20%|██        | 300/1500 [00:05<00:16, 71.81it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.0908, 'learning_rate': 3e-05, 'epoch': 20.0}


                                                  
 20%|██        | 300/1500 [00:05<00:16, 71.81it/s]Saving model checkpoint to temp/checkpoint-300
Configuration saved in temp/checkpoint-300/config.json


{'eval_loss': 1.0905683040618896, 'eval_f1': 0.1973804447151995, 'eval_runtime': 0.018, 'eval_samples_per_second': 5434.473, 'eval_steps_per_second': 110.908, 'epoch': 20.0}


Model weights saved in temp/checkpoint-300/pytorch_model.bin
 27%|██▋       | 400/1500 [00:08<00:15, 69.25it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.0916, 'learning_rate': 4e-05, 'epoch': 26.67}


                                                  
 27%|██▋       | 400/1500 [00:08<00:15, 69.25it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 1.0902270078659058, 'eval_f1': 0.1973804447151995, 'eval_runtime': 0.014, 'eval_samples_per_second': 6976.506, 'eval_steps_per_second': 142.378, 'epoch': 26.67}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
 33%|███▎      | 500/1500 [00:10<00:13, 74.62it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.0905, 'learning_rate': 5e-05, 'epoch': 33.33}


                                                  
 33%|███▎      | 500/1500 [00:10<00:13, 74.62it/s]Saving model checkpoint to temp/checkpoint-500
Configuration saved in temp/checkpoint-500/config.json


{'eval_loss': 1.0892739295959473, 'eval_f1': 0.1973804447151995, 'eval_runtime': 0.0171, 'eval_samples_per_second': 5737.362, 'eval_steps_per_second': 117.089, 'epoch': 33.33}


Model weights saved in temp/checkpoint-500/pytorch_model.bin
 40%|████      | 600/1500 [00:12<00:12, 72.66it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.0865, 'learning_rate': 4.5e-05, 'epoch': 40.0}


                                                  
 40%|████      | 600/1500 [00:12<00:12, 72.66it/s]Saving model checkpoint to temp/checkpoint-600
Configuration saved in temp/checkpoint-600/config.json


{'eval_loss': 1.0827983617782593, 'eval_f1': 0.37032616618075803, 'eval_runtime': 0.0174, 'eval_samples_per_second': 5621.238, 'eval_steps_per_second': 114.719, 'epoch': 40.0}


Model weights saved in temp/checkpoint-600/pytorch_model.bin
 47%|████▋     | 700/1500 [00:14<00:10, 75.85it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.0236, 'learning_rate': 4e-05, 'epoch': 46.67}


                                                  
 47%|████▋     | 700/1500 [00:14<00:10, 75.85it/s]Saving model checkpoint to temp/checkpoint-700
Configuration saved in temp/checkpoint-700/config.json


{'eval_loss': 1.1270883083343506, 'eval_f1': 0.3337198515769944, 'eval_runtime': 0.0153, 'eval_samples_per_second': 6416.913, 'eval_steps_per_second': 130.957, 'epoch': 46.67}


Model weights saved in temp/checkpoint-700/pytorch_model.bin
 53%|█████▎    | 800/1500 [00:16<00:09, 76.86it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.8628, 'learning_rate': 3.5e-05, 'epoch': 53.33}


                                                  
 53%|█████▎    | 800/1500 [00:16<00:09, 76.86it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 1.227048397064209, 'eval_f1': 0.3660187334696188, 'eval_runtime': 0.016, 'eval_samples_per_second': 6125.261, 'eval_steps_per_second': 125.005, 'epoch': 53.33}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 60%|██████    | 900/1500 [00:19<00:07, 75.64it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.7635, 'learning_rate': 3e-05, 'epoch': 60.0}


                                                  
 60%|██████    | 900/1500 [00:19<00:07, 75.64it/s]Saving model checkpoint to temp/checkpoint-900
Configuration saved in temp/checkpoint-900/config.json


{'eval_loss': 1.2936452627182007, 'eval_f1': 0.3845804988662131, 'eval_runtime': 0.0156, 'eval_samples_per_second': 6275.735, 'eval_steps_per_second': 128.076, 'epoch': 60.0}


Model weights saved in temp/checkpoint-900/pytorch_model.bin
 67%|██████▋   | 1000/1500 [00:21<00:06, 75.80it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.72, 'learning_rate': 2.5e-05, 'epoch': 66.67}


                                                   
 67%|██████▋   | 1000/1500 [00:21<00:06, 75.80it/s]Saving model checkpoint to temp/checkpoint-1000
Configuration saved in temp/checkpoint-1000/config.json


{'eval_loss': 1.3415125608444214, 'eval_f1': 0.3548979591836735, 'eval_runtime': 0.0179, 'eval_samples_per_second': 5478.731, 'eval_steps_per_second': 111.811, 'epoch': 66.67}


Model weights saved in temp/checkpoint-1000/pytorch_model.bin
 73%|███████▎  | 1100/1500 [00:23<00:05, 76.47it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.6807, 'learning_rate': 2e-05, 'epoch': 73.33}


                                                   
 73%|███████▎  | 1100/1500 [00:23<00:05, 76.47it/s]Saving model checkpoint to temp/checkpoint-1100
Configuration saved in temp/checkpoint-1100/config.json
Model weights saved in temp/checkpoint-1100/pytorch_model.bin


{'eval_loss': 1.3823120594024658, 'eval_f1': 0.3780133292896717, 'eval_runtime': 0.0148, 'eval_samples_per_second': 6623.51, 'eval_steps_per_second': 135.174, 'epoch': 73.33}


 80%|████████  | 1200/1500 [00:25<00:03, 77.12it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.6538, 'learning_rate': 1.5e-05, 'epoch': 80.0}


                                                   
 80%|████████  | 1200/1500 [00:25<00:03, 77.12it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 1.4156309366226196, 'eval_f1': 0.39971313200604924, 'eval_runtime': 0.0171, 'eval_samples_per_second': 5747.47, 'eval_steps_per_second': 117.295, 'epoch': 80.0}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 87%|████████▋ | 1300/1500 [00:27<00:02, 76.07it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.6506, 'learning_rate': 1e-05, 'epoch': 86.67}


                                                   
 87%|████████▋ | 1300/1500 [00:27<00:02, 76.07it/s]Saving model checkpoint to temp/checkpoint-1300
Configuration saved in temp/checkpoint-1300/config.json


{'eval_loss': 1.4331254959106445, 'eval_f1': 0.4178302900107411, 'eval_runtime': 0.017, 'eval_samples_per_second': 5757.21, 'eval_steps_per_second': 117.494, 'epoch': 86.67}


Model weights saved in temp/checkpoint-1300/pytorch_model.bin
 93%|█████████▎| 1400/1500 [00:29<00:01, 77.65it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.6408, 'learning_rate': 5e-06, 'epoch': 93.33}


                                                   
 93%|█████████▎| 1400/1500 [00:29<00:01, 77.65it/s]Saving model checkpoint to temp/checkpoint-1400
Configuration saved in temp/checkpoint-1400/config.json
Model weights saved in temp/checkpoint-1400/pytorch_model.bin


{'eval_loss': 1.4417887926101685, 'eval_f1': 0.4008878876225815, 'eval_runtime': 0.0143, 'eval_samples_per_second': 6865.228, 'eval_steps_per_second': 140.107, 'epoch': 93.33}


100%|██████████| 1500/1500 [00:32<00:00, 72.44it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.6376, 'learning_rate': 0.0, 'epoch': 100.0}


                                                   
100%|██████████| 1500/1500 [00:32<00:00, 72.44it/s]Saving model checkpoint to temp/checkpoint-1500
Configuration saved in temp/checkpoint-1500/config.json


{'eval_loss': 1.447922706604004, 'eval_f1': 0.4008878876225815, 'eval_runtime': 0.0144, 'eval_samples_per_second': 6789.366, 'eval_steps_per_second': 138.558, 'epoch': 100.0}


Model weights saved in temp/checkpoint-1500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-600 (score: 1.0827983617782593).
100%|██████████| 1500/1500 [00:32<00:00, 45.80it/s]
***** Running training *****
  Num examples = 697
  Num Epochs = 100
  Instantaneous batch size per device = 48
  Total train batch size (w. parallel, distributed & accumulation) = 48
  Gradient Accumulation steps = 1
  Total optimization steps = 1500


{'train_runtime': 32.7551, 'train_samples_per_second': 2127.914, 'train_steps_per_second': 45.794, 'train_loss': 0.8789064636230469, 'epoch': 100.0}


  7%|▋         | 100/1500 [00:01<00:17, 77.81it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.076, 'learning_rate': 1e-05, 'epoch': 6.67}



  7%|▋         | 100/1500 [00:01<00:17, 77.81it/s]Saving model checkpoint to temp/checkpoint-100
Configuration saved in temp/checkpoint-100/config.json


{'eval_loss': 1.0815467834472656, 'eval_accuracy': 0.4489795918367347, 'eval_runtime': 0.0152, 'eval_samples_per_second': 6449.333, 'eval_steps_per_second': 131.619, 'epoch': 6.67}


Model weights saved in temp/checkpoint-100/pytorch_model.bin
 13%|█▎        | 200/1500 [00:03<00:17, 74.79it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.0707, 'learning_rate': 2e-05, 'epoch': 13.33}



 13%|█▎        | 200/1500 [00:03<00:17, 74.79it/s]Saving model checkpoint to temp/checkpoint-200
Configuration saved in temp/checkpoint-200/config.json


{'eval_loss': 1.0770145654678345, 'eval_accuracy': 0.41836734693877553, 'eval_runtime': 0.0166, 'eval_samples_per_second': 5892.481, 'eval_steps_per_second': 120.255, 'epoch': 13.33}


Model weights saved in temp/checkpoint-200/pytorch_model.bin
 20%|██        | 300/1500 [00:05<00:15, 76.47it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 1.0462, 'learning_rate': 3e-05, 'epoch': 20.0}



 20%|██        | 300/1500 [00:05<00:15, 76.47it/s]Saving model checkpoint to temp/checkpoint-300
Configuration saved in temp/checkpoint-300/config.json
Model weights saved in temp/checkpoint-300/pytorch_model.bin


{'eval_loss': 1.0742554664611816, 'eval_accuracy': 0.4489795918367347, 'eval_runtime': 0.0156, 'eval_samples_per_second': 6282.353, 'eval_steps_per_second': 128.211, 'epoch': 20.0}


 27%|██▋       | 400/1500 [00:07<00:14, 74.29it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.9686, 'learning_rate': 4e-05, 'epoch': 26.67}



 27%|██▋       | 400/1500 [00:07<00:14, 74.29it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 1.1234623193740845, 'eval_accuracy': 0.3673469387755102, 'eval_runtime': 0.0144, 'eval_samples_per_second': 6805.553, 'eval_steps_per_second': 138.889, 'epoch': 26.67}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
 33%|███▎      | 500/1500 [00:10<00:13, 72.38it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.8617, 'learning_rate': 5e-05, 'epoch': 33.33}



 33%|███▎      | 500/1500 [00:10<00:13, 72.38it/s]Saving model checkpoint to temp/checkpoint-500
Configuration saved in temp/checkpoint-500/config.json


{'eval_loss': 1.2079501152038574, 'eval_accuracy': 0.4489795918367347, 'eval_runtime': 0.0148, 'eval_samples_per_second': 6638.701, 'eval_steps_per_second': 135.484, 'epoch': 33.33}


Model weights saved in temp/checkpoint-500/pytorch_model.bin
 40%|████      | 600/1500 [00:12<00:11, 75.54it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.7642, 'learning_rate': 4.5e-05, 'epoch': 40.0}



 40%|████      | 600/1500 [00:12<00:11, 75.54it/s]Saving model checkpoint to temp/checkpoint-600
Configuration saved in temp/checkpoint-600/config.json
Model weights saved in temp/checkpoint-600/pytorch_model.bin


{'eval_loss': 1.284836769104004, 'eval_accuracy': 0.4387755102040816, 'eval_runtime': 0.0128, 'eval_samples_per_second': 7682.593, 'eval_steps_per_second': 156.788, 'epoch': 40.0}


 47%|████▋     | 700/1500 [00:14<00:10, 75.86it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.7059, 'learning_rate': 4e-05, 'epoch': 46.67}



 47%|████▋     | 700/1500 [00:14<00:10, 75.86it/s]Saving model checkpoint to temp/checkpoint-700
Configuration saved in temp/checkpoint-700/config.json
Model weights saved in temp/checkpoint-700/pytorch_model.bin


{'eval_loss': 1.3347644805908203, 'eval_accuracy': 0.45918367346938777, 'eval_runtime': 0.0147, 'eval_samples_per_second': 6644.925, 'eval_steps_per_second': 135.611, 'epoch': 46.67}


 53%|█████▎    | 800/1500 [00:16<00:09, 72.67it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.6544, 'learning_rate': 3.5e-05, 'epoch': 53.33}



 53%|█████▎    | 800/1500 [00:16<00:09, 72.67it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json
Model weights saved in temp/checkpoint-800/pytorch_model.bin


{'eval_loss': 1.425398349761963, 'eval_accuracy': 0.47959183673469385, 'eval_runtime': 0.0142, 'eval_samples_per_second': 6912.099, 'eval_steps_per_second': 141.063, 'epoch': 53.33}


 60%|██████    | 900/1500 [00:18<00:08, 71.78it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.6276, 'learning_rate': 3e-05, 'epoch': 60.0}



 60%|██████    | 900/1500 [00:18<00:08, 71.78it/s]Saving model checkpoint to temp/checkpoint-900
Configuration saved in temp/checkpoint-900/config.json


{'eval_loss': 1.4655227661132812, 'eval_accuracy': 0.5, 'eval_runtime': 0.0129, 'eval_samples_per_second': 7575.55, 'eval_steps_per_second': 154.603, 'epoch': 60.0}


Model weights saved in temp/checkpoint-900/pytorch_model.bin
 67%|██████▋   | 1000/1500 [00:20<00:06, 75.70it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.6, 'learning_rate': 2.5e-05, 'epoch': 66.67}



 67%|██████▋   | 1000/1500 [00:20<00:06, 75.70it/s]Saving model checkpoint to temp/checkpoint-1000
Configuration saved in temp/checkpoint-1000/config.json


{'eval_loss': 1.512174367904663, 'eval_accuracy': 0.47959183673469385, 'eval_runtime': 0.018, 'eval_samples_per_second': 5440.659, 'eval_steps_per_second': 111.034, 'epoch': 66.67}


Model weights saved in temp/checkpoint-1000/pytorch_model.bin
 73%|███████▎  | 1100/1500 [00:22<00:05, 75.45it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.5728, 'learning_rate': 2e-05, 'epoch': 73.33}



 73%|███████▎  | 1100/1500 [00:22<00:05, 75.45it/s]Saving model checkpoint to temp/checkpoint-1100
Configuration saved in temp/checkpoint-1100/config.json


{'eval_loss': 1.5403228998184204, 'eval_accuracy': 0.46938775510204084, 'eval_runtime': 0.0162, 'eval_samples_per_second': 6050.694, 'eval_steps_per_second': 123.484, 'epoch': 73.33}


Model weights saved in temp/checkpoint-1100/pytorch_model.bin
 80%|████████  | 1200/1500 [00:24<00:03, 76.13it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.5525, 'learning_rate': 1.5e-05, 'epoch': 80.0}



 80%|████████  | 1200/1500 [00:24<00:03, 76.13it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 1.58753502368927, 'eval_accuracy': 0.46938775510204084, 'eval_runtime': 0.0136, 'eval_samples_per_second': 7224.822, 'eval_steps_per_second': 147.445, 'epoch': 80.0}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 87%|████████▋ | 1300/1500 [00:26<00:02, 76.43it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.5459, 'learning_rate': 1e-05, 'epoch': 86.67}



 87%|████████▋ | 1300/1500 [00:26<00:02, 76.43it/s]Saving model checkpoint to temp/checkpoint-1300
Configuration saved in temp/checkpoint-1300/config.json


{'eval_loss': 1.6077765226364136, 'eval_accuracy': 0.46938775510204084, 'eval_runtime': 0.0151, 'eval_samples_per_second': 6478.711, 'eval_steps_per_second': 132.219, 'epoch': 86.67}


Model weights saved in temp/checkpoint-1300/pytorch_model.bin
 93%|█████████▎| 1400/1500 [00:28<00:01, 75.59it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.526, 'learning_rate': 5e-06, 'epoch': 93.33}



 93%|█████████▎| 1400/1500 [00:28<00:01, 75.59it/s]Saving model checkpoint to temp/checkpoint-1400
Configuration saved in temp/checkpoint-1400/config.json


{'eval_loss': 1.6077141761779785, 'eval_accuracy': 0.47959183673469385, 'eval_runtime': 0.014, 'eval_samples_per_second': 7024.674, 'eval_steps_per_second': 143.361, 'epoch': 93.33}


Model weights saved in temp/checkpoint-1400/pytorch_model.bin
100%|██████████| 1500/1500 [00:30<00:00, 75.02it/s]***** Running Evaluation *****
  Num examples = 98
  Batch size = 96


{'loss': 0.5256, 'learning_rate': 0.0, 'epoch': 100.0}



100%|██████████| 1500/1500 [00:30<00:00, 75.02it/s]Saving model checkpoint to temp/checkpoint-1500
Configuration saved in temp/checkpoint-1500/config.json


{'eval_loss': 1.6162593364715576, 'eval_accuracy': 0.47959183673469385, 'eval_runtime': 0.0139, 'eval_samples_per_second': 7033.208, 'eval_steps_per_second': 143.535, 'epoch': 100.0}


Model weights saved in temp/checkpoint-1500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-300 (score: 1.0742554664611816).
100%|██████████| 1500/1500 [00:31<00:00, 47.55it/s]

{'train_runtime': 31.5273, 'train_samples_per_second': 2210.785, 'train_steps_per_second': 47.578, 'train_loss': 0.7398870595296224, 'epoch': 100.0}





TrainOutput(global_step=1500, training_loss=0.7398870595296224, metrics={'train_runtime': 31.5273, 'train_samples_per_second': 2210.785, 'train_steps_per_second': 47.578, 'train_loss': 0.7398870595296224, 'epoch': 100.0})