In [1]:
import pandas as pd
import torch
import os

from torch.utils.data import Dataset
from transformers import (BertTokenizer, BertForSequenceClassification, Trainer,
                          TrainingArguments, BertPreTrainedModel)
# from simpletransformers.language_modeling import LanguageModelingModel
from sklearn.metrics import accuracy_score, f1_score


In [2]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [3]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert'
TRAIN_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_cricket_train.tsv'
TEST_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_cricket_test.tsv'
EVAL_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_cricket_dev.tsv'


In [4]:
def tsv_to_text(tsv_file_loc):
    file_name = tsv_file_loc.split("/")[-1].split(".")[0]
    txt_name = tsv_file_loc.replace(".tsv", ".txt")
    txt_name = txt_name.replace("split_merged", "texts")

    if os.path.exists(txt_name):
        return [txt_name, file_name]

    df = pd.read_csv(tsv_file_loc, sep="\t")

    for txt in df["text"]:
        with open(txt_name, "a", encoding="utf8") as f:
            f.writelines(txt + "\n")
    return [txt_name, file_name]


tsv_to_text("/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_cricket_dev.tsv")


['/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_cricket_dev.txt',
 'BASA_cricket_dev']

In [5]:
def tsv_to_df(csv_file_loc):
    df = pd.read_csv(csv_file_loc, sep='\t')
    # remove id column
    df = df.drop(columns=['id'])

    # replace neutral with 0, positive with 1, negative with 2 in class_label column
    df.loc[df['class_label'] == 'neutral', 'class_label'] = 0
    df.loc[df['class_label'] == 'positive', 'class_label'] = 1
    df.loc[df['class_label'] == 'negative', 'class_label'] = 2

    return df


train_df = tsv_to_df(
    "/home/abdullah/Code/dl/bnlp-resources/sentiment/ABSA_datasets/BASA_restaurant_train.tsv")

texts = train_df['text'].tolist()
print(train_df)


                                                   text class_label
0     তাই, ভাল খাবারের জন্য আমিসুপারিশ করতাম, কিন্তু...           1
1     খাঁটি খাবার এবং তাদের কাছে সত্যিকারের সেবা আছে...           1
2     আমি  পাচ বছরেরও বেশি সময় ধরে উপরের হার্টল্যান...           1
3                                 এটা সুপারিশ আমি করেছি           1
4     জানিনা পূর্ববর্তী সমালোচক কে ছিলেন, লং, ডাইন্ড...           1
...                                                 ...         ...
1360  কিন্তু একরকম একটু অপেক্ষা করতে পারতাম,আমরা উদ্...           1
1361              গ্রেট bagels পুরানো-ভাবে তৈরির উপায় ।           1
1362     অ্যাট্রাকিয়াস থাকার কারনে খাবারটি ভয়াবহ ছিল।           2
1363  ওয়েটার আমাদের উপেক্ষা করেছিলেন তা আমরা আমাদের...           2
1364                            এই জায়গাটি ভীশন খারাপ।           2

[1365 rows x 2 columns]


In [6]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  return {
      'accuracy': acc,
  }


In [7]:
def f1_calculator(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  f1 = f1_score(labels, preds, average='weighted')
  return {
      'f1': f1,
  }


In [8]:
train_list = tsv_to_text(TRAIN_FILE_LOC)
test_list = tsv_to_text(TEST_FILE_LOC)
eval_list = tsv_to_text(EVAL_FILE_LOC)


In [9]:
os.environ["WANDB_DISABLED"] = "true"

train_df = tsv_to_df(TRAIN_FILE_LOC)
test_df = tsv_to_df(TEST_FILE_LOC)
eval_df = tsv_to_df(EVAL_FILE_LOC)


class TINYDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(value[idx])
                for key, value in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

train_encodings = tokenizer(train_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
test_encodings = tokenizer(test_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
eval_encodings = tokenizer(eval_df['text'].tolist(
), padding=True, truncation=True, max_length=512)

train_dataset = TINYDataset(train_encodings, train_df['class_label'].tolist())
test_dataset = TINYDataset(test_encodings, test_df['class_label'].tolist())
eval_dataset = TINYDataset(eval_encodings, eval_df['class_label'].tolist())

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14


Some weights of the model checkpoint at /home/abdullah/Code/dl/499A/best_models/epoch_3_merged_dataset_tinybert were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized fr

## 1 Epoch Train

In [10]:
# training_args = TrainingArguments(
#     output_dir=f"temp",
#     num_train_epochs=1,
#     per_device_train_batch_size=48,
#     per_device_eval_batch_size=96,
#     warmup_steps=500,
#     learning_rate=5e-5,
#     weight_decay=0.01,
#     overwrite_output_dir=True,
#     logging_dir=f"temp/logs",
#     logging_steps=29,
#     save_steps=29,
#     load_best_model_at_end=True,
#     evaluation_strategy="steps",
#     seed=14,
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=eval_dataset

# )

# trainer.train()


## 100 Epoch with 1 epoch model

In [11]:
MODEL_NAME = '/home/abdullah/Code/dl/499A/best_models/1/cricket_sentiment'
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14

for name, param in model.named_parameters():
    if 'bert.encoder.layer.0' in name:
        # print(name, param.requires_grad)
        param.requires_grad = False
# print("\n\n\n")
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=100,
    save_steps=100,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
    label_smoothing_factor=0.5,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=f1_calculator,

)

trainer.train()

# print(cool.metrics)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,

)

trainer.train()


Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running training *****
  Num examples = 2085
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 1100
  9%|▉         | 100/1100 [00:05<00:47, 21.09it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 130.8786, 'learning_rate': 1e-05, 'epoch': 9.09}


                                                  
  9%|▉         | 100/1100 [00:05<00:47, 21.09it/s]Saving model checkpoint to temp/checkpoint-100
Configuration saved in temp/checkpoint-100/config.json


{'eval_loss': 91.90149688720703, 'eval_f1': 0.6083111233648868, 'eval_runtime': 0.0447, 'eval_samples_per_second': 8326.784, 'eval_steps_per_second': 22.384, 'epoch': 9.09}


Model weights saved in temp/checkpoint-100/pytorch_model.bin
 18%|█▊        | 200/1100 [00:10<00:43, 20.77it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 88.2368, 'learning_rate': 2e-05, 'epoch': 18.18}


                                                  
 18%|█▊        | 200/1100 [00:10<00:43, 20.77it/s]Saving model checkpoint to temp/checkpoint-200
Configuration saved in temp/checkpoint-200/config.json


{'eval_loss': 48.29976272583008, 'eval_f1': 0.5047865603210774, 'eval_runtime': 0.0438, 'eval_samples_per_second': 8487.261, 'eval_steps_per_second': 22.815, 'epoch': 18.18}


Model weights saved in temp/checkpoint-200/pytorch_model.bin
 27%|██▋       | 300/1100 [00:16<00:37, 21.26it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 75.8283, 'learning_rate': 3e-05, 'epoch': 27.27}


                                                  
 27%|██▋       | 300/1100 [00:16<00:37, 21.26it/s]Saving model checkpoint to temp/checkpoint-300
Configuration saved in temp/checkpoint-300/config.json
Model weights saved in temp/checkpoint-300/pytorch_model.bin


{'eval_loss': 46.43975067138672, 'eval_f1': 0.5784316661779951, 'eval_runtime': 0.0449, 'eval_samples_per_second': 8286.409, 'eval_steps_per_second': 22.275, 'epoch': 27.27}


 36%|███▋      | 400/1100 [00:21<00:33, 21.17it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 67.1917, 'learning_rate': 4e-05, 'epoch': 36.36}


                                                  
 36%|███▋      | 400/1100 [00:21<00:33, 21.17it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 41.16146469116211, 'eval_f1': 0.6171162023597626, 'eval_runtime': 0.042, 'eval_samples_per_second': 8850.951, 'eval_steps_per_second': 23.793, 'epoch': 36.36}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
 45%|████▌     | 500/1100 [00:27<00:28, 20.95it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 60.7212, 'learning_rate': 5e-05, 'epoch': 45.45}


                                                  
 45%|████▌     | 500/1100 [00:27<00:28, 20.95it/s]Saving model checkpoint to temp/checkpoint-500
Configuration saved in temp/checkpoint-500/config.json


{'eval_loss': 36.625518798828125, 'eval_f1': 0.6266134657186047, 'eval_runtime': 0.0436, 'eval_samples_per_second': 8541.576, 'eval_steps_per_second': 22.961, 'epoch': 45.45}


Model weights saved in temp/checkpoint-500/pytorch_model.bin
 55%|█████▍    | 600/1100 [00:32<00:22, 22.37it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 54.678, 'learning_rate': 4.166666666666667e-05, 'epoch': 54.55}


                                                  
 55%|█████▍    | 600/1100 [00:32<00:22, 22.37it/s]Saving model checkpoint to temp/checkpoint-600
Configuration saved in temp/checkpoint-600/config.json
Model weights saved in temp/checkpoint-600/pytorch_model.bin


{'eval_loss': 24.201570510864258, 'eval_f1': 0.4879341828300805, 'eval_runtime': 0.0394, 'eval_samples_per_second': 9431.67, 'eval_steps_per_second': 25.354, 'epoch': 54.55}


 64%|██████▎   | 700/1100 [00:38<00:18, 21.38it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 50.8865, 'learning_rate': 3.3333333333333335e-05, 'epoch': 63.64}


                                                  
 64%|██████▎   | 700/1100 [00:38<00:18, 21.38it/s]Saving model checkpoint to temp/checkpoint-700
Configuration saved in temp/checkpoint-700/config.json


{'eval_loss': 20.12291145324707, 'eval_f1': 0.6129443877728311, 'eval_runtime': 0.043, 'eval_samples_per_second': 8642.636, 'eval_steps_per_second': 23.233, 'epoch': 63.64}


Model weights saved in temp/checkpoint-700/pytorch_model.bin
 73%|███████▎  | 800/1100 [00:43<00:13, 22.12it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 49.2842, 'learning_rate': 2.5e-05, 'epoch': 72.73}


                                                  
 73%|███████▎  | 800/1100 [00:43<00:13, 22.12it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 21.029022216796875, 'eval_f1': 0.6200078464485859, 'eval_runtime': 0.0397, 'eval_samples_per_second': 9370.664, 'eval_steps_per_second': 25.19, 'epoch': 72.73}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 82%|████████▏ | 900/1100 [00:48<00:09, 21.18it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 47.6578, 'learning_rate': 1.6666666666666667e-05, 'epoch': 81.82}


                                                  
 82%|████████▏ | 900/1100 [00:49<00:09, 21.18it/s]Saving model checkpoint to temp/checkpoint-900
Configuration saved in temp/checkpoint-900/config.json


{'eval_loss': 19.012510299682617, 'eval_f1': 0.6450407439697763, 'eval_runtime': 0.0432, 'eval_samples_per_second': 8619.433, 'eval_steps_per_second': 23.171, 'epoch': 81.82}


Model weights saved in temp/checkpoint-900/pytorch_model.bin
 91%|█████████ | 1000/1100 [00:54<00:04, 20.91it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 46.699, 'learning_rate': 8.333333333333334e-06, 'epoch': 90.91}


                                                   
 91%|█████████ | 1000/1100 [00:54<00:04, 20.91it/s]Saving model checkpoint to temp/checkpoint-1000
Configuration saved in temp/checkpoint-1000/config.json
Model weights saved in temp/checkpoint-1000/pytorch_model.bin


{'eval_loss': 19.56389045715332, 'eval_f1': 0.638535733464161, 'eval_runtime': 0.0435, 'eval_samples_per_second': 8544.195, 'eval_steps_per_second': 22.968, 'epoch': 90.91}


100%|██████████| 1100/1100 [00:59<00:00, 21.18it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 46.1549, 'learning_rate': 0.0, 'epoch': 100.0}


                                                   
100%|██████████| 1100/1100 [00:59<00:00, 21.18it/s]Saving model checkpoint to temp/checkpoint-1100
Configuration saved in temp/checkpoint-1100/config.json
Model weights saved in temp/checkpoint-1100/pytorch_model.bin


{'eval_loss': 19.52021598815918, 'eval_f1': 0.6348567368073713, 'eval_runtime': 0.0424, 'eval_samples_per_second': 8764.049, 'eval_steps_per_second': 23.559, 'epoch': 100.0}




Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-900 (score: 19.012510299682617).
100%|██████████| 1100/1100 [01:00<00:00, 18.32it/s]
***** Running training *****
  Num examples = 2085
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 1100


{'train_runtime': 60.0603, 'train_samples_per_second': 3471.514, 'train_steps_per_second': 18.315, 'train_loss': 65.2924658203125, 'epoch': 100.0}


  9%|▉         | 100/1100 [00:04<00:46, 21.48it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 46.7299, 'learning_rate': 1e-05, 'epoch': 9.09}



  9%|▉         | 100/1100 [00:04<00:46, 21.48it/s]Saving model checkpoint to temp/checkpoint-100
Configuration saved in temp/checkpoint-100/config.json
Model weights saved in temp/checkpoint-100/pytorch_model.bin


{'eval_loss': 18.507688522338867, 'eval_accuracy': 0.717741935483871, 'eval_runtime': 0.0438, 'eval_samples_per_second': 8494.7, 'eval_steps_per_second': 22.835, 'epoch': 9.09}


 18%|█▊        | 200/1100 [00:09<00:42, 21.16it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 46.108, 'learning_rate': 2e-05, 'epoch': 18.18}



 18%|█▊        | 200/1100 [00:09<00:42, 21.16it/s]Saving model checkpoint to temp/checkpoint-200
Configuration saved in temp/checkpoint-200/config.json
Model weights saved in temp/checkpoint-200/pytorch_model.bin


{'eval_loss': 18.638593673706055, 'eval_accuracy': 0.739247311827957, 'eval_runtime': 0.0419, 'eval_samples_per_second': 8880.118, 'eval_steps_per_second': 23.871, 'epoch': 18.18}


 27%|██▋       | 300/1100 [00:15<00:37, 21.23it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 45.3699, 'learning_rate': 3e-05, 'epoch': 27.27}



 27%|██▋       | 300/1100 [00:15<00:37, 21.23it/s]Saving model checkpoint to temp/checkpoint-300
Configuration saved in temp/checkpoint-300/config.json


{'eval_loss': 18.09552001953125, 'eval_accuracy': 0.7311827956989247, 'eval_runtime': 0.0417, 'eval_samples_per_second': 8923.847, 'eval_steps_per_second': 23.989, 'epoch': 27.27}


Model weights saved in temp/checkpoint-300/pytorch_model.bin
 36%|███▋      | 400/1100 [00:20<00:33, 20.79it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 45.2348, 'learning_rate': 4e-05, 'epoch': 36.36}



 36%|███▋      | 400/1100 [00:20<00:33, 20.79it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 18.409032821655273, 'eval_accuracy': 0.7365591397849462, 'eval_runtime': 0.0436, 'eval_samples_per_second': 8530.088, 'eval_steps_per_second': 22.93, 'epoch': 36.36}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
 45%|████▌     | 500/1100 [00:26<00:28, 21.27it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 44.2393, 'learning_rate': 5e-05, 'epoch': 45.45}



 45%|████▌     | 500/1100 [00:26<00:28, 21.27it/s]Saving model checkpoint to temp/checkpoint-500
Configuration saved in temp/checkpoint-500/config.json
Model weights saved in temp/checkpoint-500/pytorch_model.bin


{'eval_loss': 18.75053596496582, 'eval_accuracy': 0.7338709677419355, 'eval_runtime': 0.0403, 'eval_samples_per_second': 9223.591, 'eval_steps_per_second': 24.795, 'epoch': 45.45}


 55%|█████▍    | 600/1100 [00:31<00:23, 21.14it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 44.3362, 'learning_rate': 4.166666666666667e-05, 'epoch': 54.55}



 55%|█████▍    | 600/1100 [00:31<00:23, 21.14it/s]Saving model checkpoint to temp/checkpoint-600
Configuration saved in temp/checkpoint-600/config.json


{'eval_loss': 19.313886642456055, 'eval_accuracy': 0.7365591397849462, 'eval_runtime': 0.0447, 'eval_samples_per_second': 8322.698, 'eval_steps_per_second': 22.373, 'epoch': 54.55}


Model weights saved in temp/checkpoint-600/pytorch_model.bin
 64%|██████▎   | 700/1100 [00:36<00:18, 21.41it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 43.5695, 'learning_rate': 3.3333333333333335e-05, 'epoch': 63.64}



 64%|██████▎   | 700/1100 [00:36<00:18, 21.41it/s]Saving model checkpoint to temp/checkpoint-700
Configuration saved in temp/checkpoint-700/config.json
Model weights saved in temp/checkpoint-700/pytorch_model.bin


{'eval_loss': 12.41193962097168, 'eval_accuracy': 0.739247311827957, 'eval_runtime': 0.0455, 'eval_samples_per_second': 8167.6, 'eval_steps_per_second': 21.956, 'epoch': 63.64}


 73%|███████▎  | 800/1100 [00:42<00:14, 21.14it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 43.4564, 'learning_rate': 2.5e-05, 'epoch': 72.73}



 73%|███████▎  | 800/1100 [00:42<00:14, 21.14it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json
Model weights saved in temp/checkpoint-800/pytorch_model.bin


{'eval_loss': 15.014369010925293, 'eval_accuracy': 0.7365591397849462, 'eval_runtime': 0.0431, 'eval_samples_per_second': 8628.823, 'eval_steps_per_second': 23.196, 'epoch': 72.73}


 82%|████████▏ | 900/1100 [00:47<00:09, 21.46it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 43.2245, 'learning_rate': 1.6666666666666667e-05, 'epoch': 81.82}



 82%|████████▏ | 900/1100 [00:47<00:09, 21.46it/s]Saving model checkpoint to temp/checkpoint-900
Configuration saved in temp/checkpoint-900/config.json


{'eval_loss': 14.689544677734375, 'eval_accuracy': 0.7365591397849462, 'eval_runtime': 0.0414, 'eval_samples_per_second': 8976.006, 'eval_steps_per_second': 24.129, 'epoch': 81.82}


Model weights saved in temp/checkpoint-900/pytorch_model.bin
 91%|█████████ | 1000/1100 [00:53<00:04, 21.44it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 42.9088, 'learning_rate': 8.333333333333334e-06, 'epoch': 90.91}



 91%|█████████ | 1000/1100 [00:53<00:04, 21.44it/s]Saving model checkpoint to temp/checkpoint-1000
Configuration saved in temp/checkpoint-1000/config.json
Model weights saved in temp/checkpoint-1000/pytorch_model.bin


{'eval_loss': 13.37684440612793, 'eval_accuracy': 0.7365591397849462, 'eval_runtime': 0.0435, 'eval_samples_per_second': 8549.485, 'eval_steps_per_second': 22.982, 'epoch': 90.91}


100%|██████████| 1100/1100 [00:58<00:00, 21.11it/s]***** Running Evaluation *****
  Num examples = 372
  Batch size = 384


{'loss': 42.6756, 'learning_rate': 0.0, 'epoch': 100.0}



100%|██████████| 1100/1100 [00:58<00:00, 21.11it/s]Saving model checkpoint to temp/checkpoint-1100
Configuration saved in temp/checkpoint-1100/config.json
Model weights saved in temp/checkpoint-1100/pytorch_model.bin


{'eval_loss': 13.086823463439941, 'eval_accuracy': 0.7365591397849462, 'eval_runtime': 0.0418, 'eval_samples_per_second': 8894.646, 'eval_steps_per_second': 23.91, 'epoch': 100.0}




Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-700 (score: 12.41193962097168).
100%|██████████| 1100/1100 [00:58<00:00, 18.73it/s]

{'train_runtime': 58.731, 'train_samples_per_second': 3550.086, 'train_steps_per_second': 18.729, 'train_loss': 44.350263671875, 'epoch': 100.0}





TrainOutput(global_step=1100, training_loss=44.350263671875, metrics={'train_runtime': 58.731, 'train_samples_per_second': 3550.086, 'train_steps_per_second': 18.729, 'train_loss': 44.350263671875, 'epoch': 100.0})