## 1. Load packages

In [23]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [24]:
import torch
import numpy as np
from scipy.spatial.distance import cosine
from transformers import AutoModel, AutoTokenizer, Trainer, AutoModelForSequenceClassification, BertForSequenceClassification, BertConfig,BertModel
from datasets import load_dataset, ClassLabel, Value, load_metric

## 2.Prepare dataset for finetuning

In [25]:
# load the tokenizer pretrained on bert-base
tokenizer = AutoTokenizer.from_pretrained("princeton-nlp/sup-simcse-bert-base-uncased")

loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at C:\Users\52673/.cache\huggingface\transformers\886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  

In [26]:
# sampling the dataset for fine-tuning
train = load_dataset("quora", split = 'train[:35%]')     # classic 7/3 split
validation = load_dataset("quora", split = 'train[35%:50%]')

Using custom data configuration default
Reusing dataset quora (C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04)
Using custom data configuration default
Reusing dataset quora (C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04)


In [27]:
# convert the dtype of 'is_duplicate' to int
new_features = train.features.copy()
new_features["is_duplicate"] = Value('int32')
train = train.cast(new_features)

new_features = validation.features.copy()
new_features["is_duplicate"] = Value('int32')
validation = validation.cast(new_features)

Loading cached processed dataset at C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04\cache-0474a84a76505170.arrow
Loading cached processed dataset at C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04\cache-387810a1332fde55.arrow


In [28]:
# encode the training dataset in the form of sentences pair
# truncate at length=64 for a balance of time consuming and information coverage
encoded_train = train.map(lambda batch: tokenizer(batch['questions']['text'][0], batch['questions']['text'][1], 
                                                  padding='max_length', truncation=True, max_length=64))
encoded_train.rename_column_("is_duplicate", "labels")

Loading cached processed dataset at C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04\cache-94540c3e4c186fdb.arrow


In [29]:
# encode the validation dataset in the form of sentences pair
encoded_validation = validation.map(lambda batch: tokenizer(batch['questions']['text'][0], batch['questions']['text'][1], 
                                                            padding='max_length', truncation=True, max_length=64))
encoded_validation.rename_column_("is_duplicate", "labels")

100%|██████████| 60643/60643 [00:31<00:00, 1950.24ex/s]


In [30]:
encoded_train.set_format("torch", columns=["input_ids", "token_type_ids", "attention_mask", "labels"])
encoded_validation.set_format("torch", columns=["input_ids", "token_type_ids", "attention_mask", "labels"])

In [31]:
# check if the format is valid, and if each tensor have the same size
print({key: val.shape for key, val in encoded_validation[0].items()})
print({key: val.shape for key, val in encoded_validation[1].items()})

{'labels': torch.Size([]), 'input_ids': torch.Size([64]), 'token_type_ids': torch.Size([64]), 'attention_mask': torch.Size([64])}
{'labels': torch.Size([]), 'input_ids': torch.Size([64]), 'token_type_ids': torch.Size([64]), 'attention_mask': torch.Size([64])}


In [32]:
# take a look at an encoded sample
encoded_train[1]

{'labels': tensor(0),
 'input_ids': tensor([  101,  2054,  2003,  1996,  2466,  1997, 12849, 10606, 16506,  1006,
         12849,  2232,  1011,  1045,  1011,  2053,  2953,  1007,  6323,  1029,
           102,  2054,  2052,  4148,  2065,  1996,  2796,  2231, 10312,  1996,
         12849, 10606, 16506,  1006, 12849,  2232,  1011,  1045,  1011,  2053,
          2953,  1007,  6323,  2067,  1029,   102,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0]),
 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 

## 3. Fine-tune

In [33]:
# another way to load pretrained weights
output_config_file = "./config.json"
output_model_file = "./sup-simcse.bin"

config = BertConfig.from_json_file(output_config_file)
model = BertModel(config = config)
state_dict = torch.load(output_model_file)
model.load_state_dict(state_dict)
pass

In [34]:
# load pretrained weights to initialize Bert
model = BertForSequenceClassification.from_pretrained("princeton-nlp/sup-simcse-bert-base-uncased")

loading configuration file https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased/resolve/main/config.json from cache at C:\Users\52673/.cache\huggingface\transformers\886dba277a27c6ab50ab3d0bfd8839d354cfeed717289623026415c62b687338.1b14bcddba43d86a607eedb4b638b87d30aa00c839358953dbd36f2cd3317c83
Model config BertConfig {
  "_name_or_path": "result/bert-base-uncased-cls_before_pooler-sym_mlp-mlp_bert-bs64-gpu8-gs1-lr5e-5-m=stsb-norm0.05-l32-contra",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  

In [13]:
# clean up gpu cache before training
import gc

gc.collect()

torch.cuda.empty_cache()

In [14]:
# train
trainer = Trainer(model=model, train_dataset=encoded_train, eval_dataset=encoded_validation)

trainer.train()    
#trainer.train(resume_from_checkpoint=True) # True if already trained, to save time by continuing on a checkpoint

The following columns in the training set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: questions.
***** Running training *****
  Num examples = 141502
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 53064
  1%|          | 500/53064 [01:42<2:57:48,  4.93it/s]Saving model checkpoint to tmp_trainer\checkpoint-500
Configuration saved in tmp_trainer\checkpoint-500\config.json


{'loss': 0.6375, 'learning_rate': 4.9528870797527514e-05, 'epoch': 0.03}


Model weights saved in tmp_trainer\checkpoint-500\pytorch_model.bin
  2%|▏         | 1000/53064 [03:26<2:54:51,  4.96it/s]Saving model checkpoint to tmp_trainer\checkpoint-1000
Configuration saved in tmp_trainer\checkpoint-1000\config.json


{'loss': 0.6511, 'learning_rate': 4.9057741595055026e-05, 'epoch': 0.06}


Model weights saved in tmp_trainer\checkpoint-1000\pytorch_model.bin
  3%|▎         | 1500/53064 [05:06<2:43:17,  5.26it/s]Saving model checkpoint to tmp_trainer\checkpoint-1500
Configuration saved in tmp_trainer\checkpoint-1500\config.json


{'loss': 0.661, 'learning_rate': 4.8586612392582545e-05, 'epoch': 0.08}


Model weights saved in tmp_trainer\checkpoint-1500\pytorch_model.bin
  4%|▍         | 2000/53064 [06:46<2:44:57,  5.16it/s]Saving model checkpoint to tmp_trainer\checkpoint-2000
Configuration saved in tmp_trainer\checkpoint-2000\config.json


{'loss': 0.654, 'learning_rate': 4.811548319011006e-05, 'epoch': 0.11}


Model weights saved in tmp_trainer\checkpoint-2000\pytorch_model.bin
  5%|▍         | 2500/53064 [08:28<2:51:12,  4.92it/s]Saving model checkpoint to tmp_trainer\checkpoint-2500
Configuration saved in tmp_trainer\checkpoint-2500\config.json


{'loss': 0.6699, 'learning_rate': 4.7644353987637576e-05, 'epoch': 0.14}


Model weights saved in tmp_trainer\checkpoint-2500\pytorch_model.bin
  6%|▌         | 3000/53064 [10:11<2:42:39,  5.13it/s]Saving model checkpoint to tmp_trainer\checkpoint-3000
Configuration saved in tmp_trainer\checkpoint-3000\config.json


{'loss': 0.6653, 'learning_rate': 4.717322478516509e-05, 'epoch': 0.17}


Model weights saved in tmp_trainer\checkpoint-3000\pytorch_model.bin
  7%|▋         | 3500/53064 [11:53<2:41:16,  5.12it/s]Saving model checkpoint to tmp_trainer\checkpoint-3500
Configuration saved in tmp_trainer\checkpoint-3500\config.json


{'loss': 0.6694, 'learning_rate': 4.67020955826926e-05, 'epoch': 0.2}


Model weights saved in tmp_trainer\checkpoint-3500\pytorch_model.bin
  8%|▊         | 4000/53064 [13:33<2:37:18,  5.20it/s]Saving model checkpoint to tmp_trainer\checkpoint-4000
Configuration saved in tmp_trainer\checkpoint-4000\config.json


{'loss': 0.6673, 'learning_rate': 4.623096638022011e-05, 'epoch': 0.23}


Model weights saved in tmp_trainer\checkpoint-4000\pytorch_model.bin
  8%|▊         | 4500/53064 [15:16<2:41:45,  5.00it/s]Saving model checkpoint to tmp_trainer\checkpoint-4500
Configuration saved in tmp_trainer\checkpoint-4500\config.json


{'loss': 0.6658, 'learning_rate': 4.575983717774763e-05, 'epoch': 0.25}


Model weights saved in tmp_trainer\checkpoint-4500\pytorch_model.bin
  9%|▉         | 5000/53064 [16:56<2:32:35,  5.25it/s]Saving model checkpoint to tmp_trainer\checkpoint-5000
Configuration saved in tmp_trainer\checkpoint-5000\config.json


{'loss': 0.6683, 'learning_rate': 4.528870797527514e-05, 'epoch': 0.28}


Model weights saved in tmp_trainer\checkpoint-5000\pytorch_model.bin
 10%|█         | 5500/53064 [19:01<3:49:12,  3.46it/s]Saving model checkpoint to tmp_trainer\checkpoint-5500
Configuration saved in tmp_trainer\checkpoint-5500\config.json


{'loss': 0.6669, 'learning_rate': 4.4817578772802655e-05, 'epoch': 0.31}


Model weights saved in tmp_trainer\checkpoint-5500\pytorch_model.bin
 11%|█▏        | 6000/53064 [21:46<4:51:09,  2.69it/s]Saving model checkpoint to tmp_trainer\checkpoint-6000
Configuration saved in tmp_trainer\checkpoint-6000\config.json


{'loss': 0.669, 'learning_rate': 4.434644957033017e-05, 'epoch': 0.34}


Model weights saved in tmp_trainer\checkpoint-6000\pytorch_model.bin
 12%|█▏        | 6500/53064 [25:01<4:43:48,  2.73it/s]Saving model checkpoint to tmp_trainer\checkpoint-6500
Configuration saved in tmp_trainer\checkpoint-6500\config.json


{'loss': 0.6687, 'learning_rate': 4.387532036785768e-05, 'epoch': 0.37}


Model weights saved in tmp_trainer\checkpoint-6500\pytorch_model.bin
 13%|█▎        | 7000/53064 [28:19<5:06:26,  2.51it/s]Saving model checkpoint to tmp_trainer\checkpoint-7000
Configuration saved in tmp_trainer\checkpoint-7000\config.json


{'loss': 0.6638, 'learning_rate': 4.34041911653852e-05, 'epoch': 0.4}


Model weights saved in tmp_trainer\checkpoint-7000\pytorch_model.bin
 14%|█▍        | 7500/53064 [31:35<4:48:19,  2.63it/s]Saving model checkpoint to tmp_trainer\checkpoint-7500
Configuration saved in tmp_trainer\checkpoint-7500\config.json


{'loss': 0.6694, 'learning_rate': 4.293306196291271e-05, 'epoch': 0.42}


Model weights saved in tmp_trainer\checkpoint-7500\pytorch_model.bin
 15%|█▌        | 8000/53064 [34:52<4:53:49,  2.56it/s]Saving model checkpoint to tmp_trainer\checkpoint-8000
Configuration saved in tmp_trainer\checkpoint-8000\config.json


{'loss': 0.6709, 'learning_rate': 4.246193276044023e-05, 'epoch': 0.45}


Model weights saved in tmp_trainer\checkpoint-8000\pytorch_model.bin
 16%|█▌        | 8500/53064 [37:08<2:24:55,  5.13it/s]Saving model checkpoint to tmp_trainer\checkpoint-8500
Configuration saved in tmp_trainer\checkpoint-8500\config.json


{'loss': 0.6737, 'learning_rate': 4.199080355796774e-05, 'epoch': 0.48}


Model weights saved in tmp_trainer\checkpoint-8500\pytorch_model.bin
 17%|█▋        | 9000/53064 [38:55<2:39:12,  4.61it/s]Saving model checkpoint to tmp_trainer\checkpoint-9000
Configuration saved in tmp_trainer\checkpoint-9000\config.json


{'loss': 0.6643, 'learning_rate': 4.151967435549525e-05, 'epoch': 0.51}


Model weights saved in tmp_trainer\checkpoint-9000\pytorch_model.bin
 18%|█▊        | 9500/53064 [41:20<2:47:49,  4.33it/s]Saving model checkpoint to tmp_trainer\checkpoint-9500
Configuration saved in tmp_trainer\checkpoint-9500\config.json


{'loss': 0.6699, 'learning_rate': 4.1048545153022765e-05, 'epoch': 0.54}


Model weights saved in tmp_trainer\checkpoint-9500\pytorch_model.bin
 19%|█▉        | 10000/53064 [44:23<4:53:41,  2.44it/s]Saving model checkpoint to tmp_trainer\checkpoint-10000
Configuration saved in tmp_trainer\checkpoint-10000\config.json


{'loss': 0.6588, 'learning_rate': 4.057741595055028e-05, 'epoch': 0.57}


Model weights saved in tmp_trainer\checkpoint-10000\pytorch_model.bin
 20%|█▉        | 10500/53064 [47:43<4:17:00,  2.76it/s]Saving model checkpoint to tmp_trainer\checkpoint-10500
Configuration saved in tmp_trainer\checkpoint-10500\config.json


{'loss': 0.6651, 'learning_rate': 4.0106286748077796e-05, 'epoch': 0.59}


Model weights saved in tmp_trainer\checkpoint-10500\pytorch_model.bin
 21%|██        | 11000/53064 [51:02<4:24:43,  2.65it/s]Saving model checkpoint to tmp_trainer\checkpoint-11000
Configuration saved in tmp_trainer\checkpoint-11000\config.json


{'loss': 0.6675, 'learning_rate': 3.9635157545605314e-05, 'epoch': 0.62}


Model weights saved in tmp_trainer\checkpoint-11000\pytorch_model.bin
 22%|██▏       | 11500/53064 [54:25<4:26:46,  2.60it/s]Saving model checkpoint to tmp_trainer\checkpoint-11500
Configuration saved in tmp_trainer\checkpoint-11500\config.json


{'loss': 0.6679, 'learning_rate': 3.9164028343132826e-05, 'epoch': 0.65}


Model weights saved in tmp_trainer\checkpoint-11500\pytorch_model.bin
 23%|██▎       | 12000/53064 [57:43<4:26:14,  2.57it/s]Saving model checkpoint to tmp_trainer\checkpoint-12000
Configuration saved in tmp_trainer\checkpoint-12000\config.json


{'loss': 0.6672, 'learning_rate': 3.869289914066034e-05, 'epoch': 0.68}


Model weights saved in tmp_trainer\checkpoint-12000\pytorch_model.bin
 24%|██▎       | 12500/53064 [1:01:03<4:23:45,  2.56it/s]Saving model checkpoint to tmp_trainer\checkpoint-12500
Configuration saved in tmp_trainer\checkpoint-12500\config.json


{'loss': 0.6579, 'learning_rate': 3.822176993818785e-05, 'epoch': 0.71}


Model weights saved in tmp_trainer\checkpoint-12500\pytorch_model.bin
 24%|██▍       | 13000/53064 [1:04:17<4:08:06,  2.69it/s]Saving model checkpoint to tmp_trainer\checkpoint-13000
Configuration saved in tmp_trainer\checkpoint-13000\config.json


{'loss': 0.66, 'learning_rate': 3.775064073571536e-05, 'epoch': 0.73}


Model weights saved in tmp_trainer\checkpoint-13000\pytorch_model.bin
 25%|██▌       | 13500/53064 [1:07:33<4:15:01,  2.59it/s]Saving model checkpoint to tmp_trainer\checkpoint-13500
Configuration saved in tmp_trainer\checkpoint-13500\config.json


{'loss': 0.6622, 'learning_rate': 3.727951153324288e-05, 'epoch': 0.76}


Model weights saved in tmp_trainer\checkpoint-13500\pytorch_model.bin
 26%|██▋       | 14000/53064 [1:10:05<2:20:47,  4.62it/s]Saving model checkpoint to tmp_trainer\checkpoint-14000
Configuration saved in tmp_trainer\checkpoint-14000\config.json


{'loss': 0.6544, 'learning_rate': 3.680838233077039e-05, 'epoch': 0.79}


Model weights saved in tmp_trainer\checkpoint-14000\pytorch_model.bin
 27%|██▋       | 14500/53064 [1:11:59<5:07:08,  2.09it/s]Saving model checkpoint to tmp_trainer\checkpoint-14500
Configuration saved in tmp_trainer\checkpoint-14500\config.json


{'loss': 0.6617, 'learning_rate': 3.6337253128297905e-05, 'epoch': 0.82}


Model weights saved in tmp_trainer\checkpoint-14500\pytorch_model.bin
 28%|██▊       | 15000/53064 [1:15:21<3:55:50,  2.69it/s]Saving model checkpoint to tmp_trainer\checkpoint-15000
Configuration saved in tmp_trainer\checkpoint-15000\config.json


{'loss': 0.6602, 'learning_rate': 3.586612392582542e-05, 'epoch': 0.85}


Model weights saved in tmp_trainer\checkpoint-15000\pytorch_model.bin
 29%|██▉       | 15500/53064 [1:18:36<3:49:28,  2.73it/s]Saving model checkpoint to tmp_trainer\checkpoint-15500
Configuration saved in tmp_trainer\checkpoint-15500\config.json


{'loss': 0.662, 'learning_rate': 3.5394994723352936e-05, 'epoch': 0.88}


Model weights saved in tmp_trainer\checkpoint-15500\pytorch_model.bin
 30%|███       | 16000/53064 [1:21:55<3:53:36,  2.64it/s]Saving model checkpoint to tmp_trainer\checkpoint-16000
Configuration saved in tmp_trainer\checkpoint-16000\config.json


{'loss': 0.6596, 'learning_rate': 3.492386552088045e-05, 'epoch': 0.9}


Model weights saved in tmp_trainer\checkpoint-16000\pytorch_model.bin
 31%|███       | 16500/53064 [1:25:11<3:57:26,  2.57it/s]Saving model checkpoint to tmp_trainer\checkpoint-16500
Configuration saved in tmp_trainer\checkpoint-16500\config.json


{'loss': 0.666, 'learning_rate': 3.445273631840796e-05, 'epoch': 0.93}


Model weights saved in tmp_trainer\checkpoint-16500\pytorch_model.bin
 32%|███▏      | 17000/53064 [1:28:27<2:48:33,  3.57it/s]Saving model checkpoint to tmp_trainer\checkpoint-17000
Configuration saved in tmp_trainer\checkpoint-17000\config.json


{'loss': 0.6657, 'learning_rate': 3.398160711593548e-05, 'epoch': 0.96}


Model weights saved in tmp_trainer\checkpoint-17000\pytorch_model.bin
 33%|███▎      | 17500/53064 [1:30:43<2:04:37,  4.76it/s]Saving model checkpoint to tmp_trainer\checkpoint-17500
Configuration saved in tmp_trainer\checkpoint-17500\config.json


{'loss': 0.6571, 'learning_rate': 3.351047791346299e-05, 'epoch': 0.99}


Model weights saved in tmp_trainer\checkpoint-17500\pytorch_model.bin
 34%|███▍      | 18000/53064 [1:33:28<3:36:15,  2.70it/s]Saving model checkpoint to tmp_trainer\checkpoint-18000
Configuration saved in tmp_trainer\checkpoint-18000\config.json


{'loss': 0.6626, 'learning_rate': 3.30393487109905e-05, 'epoch': 1.02}


Model weights saved in tmp_trainer\checkpoint-18000\pytorch_model.bin
 35%|███▍      | 18500/53064 [1:36:44<3:57:54,  2.42it/s]Saving model checkpoint to tmp_trainer\checkpoint-18500
Configuration saved in tmp_trainer\checkpoint-18500\config.json


{'loss': 0.6586, 'learning_rate': 3.2568219508518015e-05, 'epoch': 1.05}


Model weights saved in tmp_trainer\checkpoint-18500\pytorch_model.bin
 36%|███▌      | 19000/53064 [1:39:57<3:36:15,  2.63it/s]Saving model checkpoint to tmp_trainer\checkpoint-19000
Configuration saved in tmp_trainer\checkpoint-19000\config.json


{'loss': 0.6582, 'learning_rate': 3.209709030604553e-05, 'epoch': 1.07}


Model weights saved in tmp_trainer\checkpoint-19000\pytorch_model.bin
 37%|███▋      | 19500/53064 [1:43:08<3:39:20,  2.55it/s]Saving model checkpoint to tmp_trainer\checkpoint-19500
Configuration saved in tmp_trainer\checkpoint-19500\config.json


{'loss': 0.6604, 'learning_rate': 3.1625961103573046e-05, 'epoch': 1.1}


Model weights saved in tmp_trainer\checkpoint-19500\pytorch_model.bin
 38%|███▊      | 20000/53064 [1:45:33<2:57:13,  3.11it/s]Saving model checkpoint to tmp_trainer\checkpoint-20000
Configuration saved in tmp_trainer\checkpoint-20000\config.json


{'loss': 0.6602, 'learning_rate': 3.1154831901100565e-05, 'epoch': 1.13}


Model weights saved in tmp_trainer\checkpoint-20000\pytorch_model.bin
 39%|███▊      | 20500/53064 [1:48:30<3:35:34,  2.52it/s]Saving model checkpoint to tmp_trainer\checkpoint-20500
Configuration saved in tmp_trainer\checkpoint-20500\config.json


{'loss': 0.6603, 'learning_rate': 3.068370269862808e-05, 'epoch': 1.16}


Model weights saved in tmp_trainer\checkpoint-20500\pytorch_model.bin
 40%|███▉      | 21000/53064 [1:51:49<3:17:27,  2.71it/s]Saving model checkpoint to tmp_trainer\checkpoint-21000
Configuration saved in tmp_trainer\checkpoint-21000\config.json


{'loss': 0.661, 'learning_rate': 3.021257349615559e-05, 'epoch': 1.19}


Model weights saved in tmp_trainer\checkpoint-21000\pytorch_model.bin
 41%|████      | 21500/53064 [1:55:10<3:28:28,  2.52it/s]Saving model checkpoint to tmp_trainer\checkpoint-21500
Configuration saved in tmp_trainer\checkpoint-21500\config.json


{'loss': 0.6669, 'learning_rate': 2.97414442936831e-05, 'epoch': 1.22}


Model weights saved in tmp_trainer\checkpoint-21500\pytorch_model.bin
 41%|████▏     | 22000/53064 [1:58:33<3:43:55,  2.31it/s]Saving model checkpoint to tmp_trainer\checkpoint-22000
Configuration saved in tmp_trainer\checkpoint-22000\config.json


{'loss': 0.6657, 'learning_rate': 2.9270315091210616e-05, 'epoch': 1.24}


Model weights saved in tmp_trainer\checkpoint-22000\pytorch_model.bin
 42%|████▏     | 22500/53064 [2:01:56<3:11:43,  2.66it/s]Saving model checkpoint to tmp_trainer\checkpoint-22500
Configuration saved in tmp_trainer\checkpoint-22500\config.json


{'loss': 0.6652, 'learning_rate': 2.8799185888738128e-05, 'epoch': 1.27}


Model weights saved in tmp_trainer\checkpoint-22500\pytorch_model.bin
 43%|████▎     | 23000/53064 [2:05:15<2:24:49,  3.46it/s]Saving model checkpoint to tmp_trainer\checkpoint-23000
Configuration saved in tmp_trainer\checkpoint-23000\config.json


{'loss': 0.6585, 'learning_rate': 2.832805668626564e-05, 'epoch': 1.3}


Model weights saved in tmp_trainer\checkpoint-23000\pytorch_model.bin
 44%|████▍     | 23500/53064 [2:07:41<3:18:27,  2.48it/s]Saving model checkpoint to tmp_trainer\checkpoint-23500
Configuration saved in tmp_trainer\checkpoint-23500\config.json


{'loss': 0.6617, 'learning_rate': 2.7856927483793156e-05, 'epoch': 1.33}


Model weights saved in tmp_trainer\checkpoint-23500\pytorch_model.bin
 45%|████▌     | 24000/53064 [2:10:36<2:35:54,  3.11it/s]Saving model checkpoint to tmp_trainer\checkpoint-24000
Configuration saved in tmp_trainer\checkpoint-24000\config.json


{'loss': 0.6519, 'learning_rate': 2.7385798281320668e-05, 'epoch': 1.36}


Model weights saved in tmp_trainer\checkpoint-24000\pytorch_model.bin
 46%|████▌     | 24500/53064 [2:13:24<2:30:45,  3.16it/s]Saving model checkpoint to tmp_trainer\checkpoint-24500
Configuration saved in tmp_trainer\checkpoint-24500\config.json


{'loss': 0.6645, 'learning_rate': 2.6914669078848186e-05, 'epoch': 1.39}


Model weights saved in tmp_trainer\checkpoint-24500\pytorch_model.bin
 47%|████▋     | 25000/53064 [2:16:12<2:37:05,  2.98it/s]Saving model checkpoint to tmp_trainer\checkpoint-25000
Configuration saved in tmp_trainer\checkpoint-25000\config.json


{'loss': 0.6666, 'learning_rate': 2.6443539876375702e-05, 'epoch': 1.41}


Model weights saved in tmp_trainer\checkpoint-25000\pytorch_model.bin
 48%|████▊     | 25500/53064 [2:19:03<2:31:24,  3.03it/s]Saving model checkpoint to tmp_trainer\checkpoint-25500
Configuration saved in tmp_trainer\checkpoint-25500\config.json


{'loss': 0.6547, 'learning_rate': 2.5972410673903214e-05, 'epoch': 1.44}


Model weights saved in tmp_trainer\checkpoint-25500\pytorch_model.bin
 49%|████▉     | 26000/53064 [2:21:53<2:35:20,  2.90it/s]Saving model checkpoint to tmp_trainer\checkpoint-26000
Configuration saved in tmp_trainer\checkpoint-26000\config.json


{'loss': 0.6687, 'learning_rate': 2.5501281471430726e-05, 'epoch': 1.47}


Model weights saved in tmp_trainer\checkpoint-26000\pytorch_model.bin
 50%|████▉     | 26500/53064 [2:24:39<2:18:26,  3.20it/s]Saving model checkpoint to tmp_trainer\checkpoint-26500
Configuration saved in tmp_trainer\checkpoint-26500\config.json


{'loss': 0.6641, 'learning_rate': 2.503015226895824e-05, 'epoch': 1.5}


Model weights saved in tmp_trainer\checkpoint-26500\pytorch_model.bin
 51%|█████     | 27000/53064 [2:26:50<1:29:42,  4.84it/s]Saving model checkpoint to tmp_trainer\checkpoint-27000
Configuration saved in tmp_trainer\checkpoint-27000\config.json


{'loss': 0.6528, 'learning_rate': 2.4559023066485753e-05, 'epoch': 1.53}


Model weights saved in tmp_trainer\checkpoint-27000\pytorch_model.bin
 52%|█████▏    | 27500/53064 [2:28:41<1:27:41,  4.86it/s]Saving model checkpoint to tmp_trainer\checkpoint-27500
Configuration saved in tmp_trainer\checkpoint-27500\config.json


{'loss': 0.663, 'learning_rate': 2.408789386401327e-05, 'epoch': 1.55}


Model weights saved in tmp_trainer\checkpoint-27500\pytorch_model.bin
 53%|█████▎    | 28000/53064 [2:30:32<1:59:30,  3.50it/s]Saving model checkpoint to tmp_trainer\checkpoint-28000
Configuration saved in tmp_trainer\checkpoint-28000\config.json


{'loss': 0.6621, 'learning_rate': 2.3616764661540784e-05, 'epoch': 1.58}


Model weights saved in tmp_trainer\checkpoint-28000\pytorch_model.bin
 54%|█████▎    | 28500/53064 [2:32:34<1:21:23,  5.03it/s]Saving model checkpoint to tmp_trainer\checkpoint-28500
Configuration saved in tmp_trainer\checkpoint-28500\config.json


{'loss': 0.6584, 'learning_rate': 2.3145635459068296e-05, 'epoch': 1.61}


Model weights saved in tmp_trainer\checkpoint-28500\pytorch_model.bin
 55%|█████▍    | 29000/53064 [2:34:29<1:22:44,  4.85it/s]Saving model checkpoint to tmp_trainer\checkpoint-29000
Configuration saved in tmp_trainer\checkpoint-29000\config.json


{'loss': 0.6591, 'learning_rate': 2.2674506256595808e-05, 'epoch': 1.64}


Model weights saved in tmp_trainer\checkpoint-29000\pytorch_model.bin
 56%|█████▌    | 29500/53064 [2:36:18<1:22:21,  4.77it/s]Saving model checkpoint to tmp_trainer\checkpoint-29500
Configuration saved in tmp_trainer\checkpoint-29500\config.json


{'loss': 0.6525, 'learning_rate': 2.2203377054123324e-05, 'epoch': 1.67}


Model weights saved in tmp_trainer\checkpoint-29500\pytorch_model.bin
 57%|█████▋    | 30000/53064 [2:38:12<1:45:45,  3.63it/s]Saving model checkpoint to tmp_trainer\checkpoint-30000
Configuration saved in tmp_trainer\checkpoint-30000\config.json


{'loss': 0.6508, 'learning_rate': 2.173224785165084e-05, 'epoch': 1.7}


Model weights saved in tmp_trainer\checkpoint-30000\pytorch_model.bin
 57%|█████▋    | 30500/53064 [2:41:01<2:20:28,  2.68it/s]Saving model checkpoint to tmp_trainer\checkpoint-30500
Configuration saved in tmp_trainer\checkpoint-30500\config.json


{'loss': 0.6614, 'learning_rate': 2.126111864917835e-05, 'epoch': 1.72}


Model weights saved in tmp_trainer\checkpoint-30500\pytorch_model.bin
 58%|█████▊    | 31000/53064 [2:44:12<2:18:11,  2.66it/s]Saving model checkpoint to tmp_trainer\checkpoint-31000
Configuration saved in tmp_trainer\checkpoint-31000\config.json


{'loss': 0.6573, 'learning_rate': 2.0789989446705866e-05, 'epoch': 1.75}


Model weights saved in tmp_trainer\checkpoint-31000\pytorch_model.bin
 59%|█████▉    | 31500/53064 [2:47:26<2:20:38,  2.56it/s]Saving model checkpoint to tmp_trainer\checkpoint-31500
Configuration saved in tmp_trainer\checkpoint-31500\config.json


{'loss': 0.6624, 'learning_rate': 2.031886024423338e-05, 'epoch': 1.78}


Model weights saved in tmp_trainer\checkpoint-31500\pytorch_model.bin
 60%|██████    | 32000/53064 [2:50:43<2:15:09,  2.60it/s]Saving model checkpoint to tmp_trainer\checkpoint-32000
Configuration saved in tmp_trainer\checkpoint-32000\config.json


{'loss': 0.6634, 'learning_rate': 1.9847731041760894e-05, 'epoch': 1.81}


Model weights saved in tmp_trainer\checkpoint-32000\pytorch_model.bin
 61%|██████    | 32500/53064 [2:53:58<2:12:22,  2.59it/s]Saving model checkpoint to tmp_trainer\checkpoint-32500
Configuration saved in tmp_trainer\checkpoint-32500\config.json


{'loss': 0.6593, 'learning_rate': 1.937660183928841e-05, 'epoch': 1.84}


Model weights saved in tmp_trainer\checkpoint-32500\pytorch_model.bin
 62%|██████▏   | 33000/53064 [2:57:14<2:14:48,  2.48it/s]Saving model checkpoint to tmp_trainer\checkpoint-33000
Configuration saved in tmp_trainer\checkpoint-33000\config.json


{'loss': 0.6594, 'learning_rate': 1.890547263681592e-05, 'epoch': 1.87}


Model weights saved in tmp_trainer\checkpoint-33000\pytorch_model.bin
 63%|██████▎   | 33500/53064 [2:59:40<1:32:40,  3.52it/s]Saving model checkpoint to tmp_trainer\checkpoint-33500
Configuration saved in tmp_trainer\checkpoint-33500\config.json


{'loss': 0.6687, 'learning_rate': 1.8434343434343433e-05, 'epoch': 1.89}


Model weights saved in tmp_trainer\checkpoint-33500\pytorch_model.bin
 64%|██████▍   | 34000/53064 [3:01:39<1:04:57,  4.89it/s]Saving model checkpoint to tmp_trainer\checkpoint-34000
Configuration saved in tmp_trainer\checkpoint-34000\config.json


{'loss': 0.6558, 'learning_rate': 1.796321423187095e-05, 'epoch': 1.92}


Model weights saved in tmp_trainer\checkpoint-34000\pytorch_model.bin
 65%|██████▌   | 34500/53064 [3:03:57<1:36:44,  3.20it/s]Saving model checkpoint to tmp_trainer\checkpoint-34500
Configuration saved in tmp_trainer\checkpoint-34500\config.json


{'loss': 0.6589, 'learning_rate': 1.7492085029398464e-05, 'epoch': 1.95}


Model weights saved in tmp_trainer\checkpoint-34500\pytorch_model.bin
 66%|██████▌   | 35000/53064 [3:06:27<1:19:10,  3.80it/s]Saving model checkpoint to tmp_trainer\checkpoint-35000
Configuration saved in tmp_trainer\checkpoint-35000\config.json


{'loss': 0.6647, 'learning_rate': 1.7020955826925976e-05, 'epoch': 1.98}


Model weights saved in tmp_trainer\checkpoint-35000\pytorch_model.bin
 67%|██████▋   | 35500/53064 [3:08:55<1:41:41,  2.88it/s]Saving model checkpoint to tmp_trainer\checkpoint-35500
Configuration saved in tmp_trainer\checkpoint-35500\config.json


{'loss': 0.6546, 'learning_rate': 1.654982662445349e-05, 'epoch': 2.01}


Model weights saved in tmp_trainer\checkpoint-35500\pytorch_model.bin
 68%|██████▊   | 36000/53064 [3:11:40<1:27:44,  3.24it/s]Saving model checkpoint to tmp_trainer\checkpoint-36000
Configuration saved in tmp_trainer\checkpoint-36000\config.json


{'loss': 0.6509, 'learning_rate': 1.6078697421981004e-05, 'epoch': 2.04}


Model weights saved in tmp_trainer\checkpoint-36000\pytorch_model.bin
 69%|██████▉   | 36500/53064 [3:14:26<1:33:32,  2.95it/s]Saving model checkpoint to tmp_trainer\checkpoint-36500
Configuration saved in tmp_trainer\checkpoint-36500\config.json


{'loss': 0.6605, 'learning_rate': 1.560756821950852e-05, 'epoch': 2.06}


Model weights saved in tmp_trainer\checkpoint-36500\pytorch_model.bin
 70%|██████▉   | 37000/53064 [3:17:12<1:27:20,  3.07it/s]Saving model checkpoint to tmp_trainer\checkpoint-37000
Configuration saved in tmp_trainer\checkpoint-37000\config.json


{'loss': 0.6606, 'learning_rate': 1.5136439017036033e-05, 'epoch': 2.09}


Model weights saved in tmp_trainer\checkpoint-37000\pytorch_model.bin
 71%|███████   | 37500/53064 [3:20:03<1:25:43,  3.03it/s]Saving model checkpoint to tmp_trainer\checkpoint-37500
Configuration saved in tmp_trainer\checkpoint-37500\config.json


{'loss': 0.6567, 'learning_rate': 1.4665309814563547e-05, 'epoch': 2.12}


Model weights saved in tmp_trainer\checkpoint-37500\pytorch_model.bin
 72%|███████▏  | 38000/53064 [3:22:50<1:19:41,  3.15it/s]Saving model checkpoint to tmp_trainer\checkpoint-38000
Configuration saved in tmp_trainer\checkpoint-38000\config.json


{'loss': 0.6643, 'learning_rate': 1.419418061209106e-05, 'epoch': 2.15}


Model weights saved in tmp_trainer\checkpoint-38000\pytorch_model.bin
 73%|███████▎  | 38500/53064 [3:25:39<1:19:55,  3.04it/s]Saving model checkpoint to tmp_trainer\checkpoint-38500
Configuration saved in tmp_trainer\checkpoint-38500\config.json


{'loss': 0.6559, 'learning_rate': 1.3723051409618576e-05, 'epoch': 2.18}


Model weights saved in tmp_trainer\checkpoint-38500\pytorch_model.bin
 73%|███████▎  | 39000/53064 [3:28:31<1:18:28,  2.99it/s]Saving model checkpoint to tmp_trainer\checkpoint-39000
Configuration saved in tmp_trainer\checkpoint-39000\config.json


{'loss': 0.6589, 'learning_rate': 1.325192220714609e-05, 'epoch': 2.2}


Model weights saved in tmp_trainer\checkpoint-39000\pytorch_model.bin
 74%|███████▍  | 39500/53064 [3:31:23<1:16:02,  2.97it/s]Saving model checkpoint to tmp_trainer\checkpoint-39500
Configuration saved in tmp_trainer\checkpoint-39500\config.json


{'loss': 0.6552, 'learning_rate': 1.2780793004673603e-05, 'epoch': 2.23}


Model weights saved in tmp_trainer\checkpoint-39500\pytorch_model.bin
 75%|███████▌  | 40000/53064 [3:33:45<1:05:14,  3.34it/s]Saving model checkpoint to tmp_trainer\checkpoint-40000
Configuration saved in tmp_trainer\checkpoint-40000\config.json


{'loss': 0.6692, 'learning_rate': 1.2309663802201115e-05, 'epoch': 2.26}


Model weights saved in tmp_trainer\checkpoint-40000\pytorch_model.bin
 76%|███████▋  | 40500/53064 [3:36:16<1:06:30,  3.15it/s]Saving model checkpoint to tmp_trainer\checkpoint-40500
Configuration saved in tmp_trainer\checkpoint-40500\config.json


{'loss': 0.6573, 'learning_rate': 1.183853459972863e-05, 'epoch': 2.29}


Model weights saved in tmp_trainer\checkpoint-40500\pytorch_model.bin
 77%|███████▋  | 41000/53064 [3:38:46<1:16:50,  2.62it/s]Saving model checkpoint to tmp_trainer\checkpoint-41000
Configuration saved in tmp_trainer\checkpoint-41000\config.json


{'loss': 0.6573, 'learning_rate': 1.1367405397256144e-05, 'epoch': 2.32}


Model weights saved in tmp_trainer\checkpoint-41000\pytorch_model.bin
 78%|███████▊  | 41500/53064 [3:41:36<1:03:45,  3.02it/s]Saving model checkpoint to tmp_trainer\checkpoint-41500
Configuration saved in tmp_trainer\checkpoint-41500\config.json


{'loss': 0.6524, 'learning_rate': 1.0896276194783658e-05, 'epoch': 2.35}


Model weights saved in tmp_trainer\checkpoint-41500\pytorch_model.bin
 79%|███████▉  | 42000/53064 [3:44:23<1:01:11,  3.01it/s]Saving model checkpoint to tmp_trainer\checkpoint-42000
Configuration saved in tmp_trainer\checkpoint-42000\config.json


{'loss': 0.6635, 'learning_rate': 1.0425146992311172e-05, 'epoch': 2.37}


Model weights saved in tmp_trainer\checkpoint-42000\pytorch_model.bin
 80%|████████  | 42500/53064 [3:47:08<56:28,  3.12it/s]Saving model checkpoint to tmp_trainer\checkpoint-42500
Configuration saved in tmp_trainer\checkpoint-42500\config.json


{'loss': 0.6561, 'learning_rate': 9.954017789838687e-06, 'epoch': 2.4}


Model weights saved in tmp_trainer\checkpoint-42500\pytorch_model.bin
 81%|████████  | 43000/53064 [3:49:57<56:00,  2.99it/s]Saving model checkpoint to tmp_trainer\checkpoint-43000
Configuration saved in tmp_trainer\checkpoint-43000\config.json


{'loss': 0.6644, 'learning_rate': 9.482888587366199e-06, 'epoch': 2.43}


Model weights saved in tmp_trainer\checkpoint-43000\pytorch_model.bin
 82%|████████▏ | 43500/53064 [3:52:45<49:15,  3.24it/s]Saving model checkpoint to tmp_trainer\checkpoint-43500
Configuration saved in tmp_trainer\checkpoint-43500\config.json


{'loss': 0.6556, 'learning_rate': 9.011759384893715e-06, 'epoch': 2.46}


Model weights saved in tmp_trainer\checkpoint-43500\pytorch_model.bin
 83%|████████▎ | 44000/53064 [3:55:34<50:13,  3.01it/s]Saving model checkpoint to tmp_trainer\checkpoint-44000
Configuration saved in tmp_trainer\checkpoint-44000\config.json


{'loss': 0.6541, 'learning_rate': 8.540630182421228e-06, 'epoch': 2.49}


Model weights saved in tmp_trainer\checkpoint-44000\pytorch_model.bin
 84%|████████▍ | 44500/53064 [3:58:25<46:56,  3.04it/s]Saving model checkpoint to tmp_trainer\checkpoint-44500
Configuration saved in tmp_trainer\checkpoint-44500\config.json


{'loss': 0.6527, 'learning_rate': 8.06950097994874e-06, 'epoch': 2.52}


Model weights saved in tmp_trainer\checkpoint-44500\pytorch_model.bin
 85%|████████▍ | 45000/53064 [4:01:13<44:41,  3.01it/s]Saving model checkpoint to tmp_trainer\checkpoint-45000
Configuration saved in tmp_trainer\checkpoint-45000\config.json


{'loss': 0.6578, 'learning_rate': 7.598371777476256e-06, 'epoch': 2.54}


Model weights saved in tmp_trainer\checkpoint-45000\pytorch_model.bin
 86%|████████▌ | 45500/53064 [4:04:01<44:14,  2.85it/s]Saving model checkpoint to tmp_trainer\checkpoint-45500
Configuration saved in tmp_trainer\checkpoint-45500\config.json


{'loss': 0.6583, 'learning_rate': 7.1272425750037685e-06, 'epoch': 2.57}


Model weights saved in tmp_trainer\checkpoint-45500\pytorch_model.bin
 87%|████████▋ | 46000/53064 [4:06:51<38:54,  3.03it/s]Saving model checkpoint to tmp_trainer\checkpoint-46000
Configuration saved in tmp_trainer\checkpoint-46000\config.json


{'loss': 0.6598, 'learning_rate': 6.656113372531283e-06, 'epoch': 2.6}


Model weights saved in tmp_trainer\checkpoint-46000\pytorch_model.bin
 88%|████████▊ | 46500/53064 [4:09:39<34:16,  3.19it/s]Saving model checkpoint to tmp_trainer\checkpoint-46500
Configuration saved in tmp_trainer\checkpoint-46500\config.json


{'loss': 0.6548, 'learning_rate': 6.184984170058798e-06, 'epoch': 2.63}


Model weights saved in tmp_trainer\checkpoint-46500\pytorch_model.bin
 89%|████████▊ | 47000/53064 [4:12:31<32:43,  3.09it/s]Saving model checkpoint to tmp_trainer\checkpoint-47000
Configuration saved in tmp_trainer\checkpoint-47000\config.json


{'loss': 0.652, 'learning_rate': 5.7138549675863105e-06, 'epoch': 2.66}


Model weights saved in tmp_trainer\checkpoint-47000\pytorch_model.bin
 90%|████████▉ | 47500/53064 [4:14:54<20:05,  4.62it/s]Saving model checkpoint to tmp_trainer\checkpoint-47500
Configuration saved in tmp_trainer\checkpoint-47500\config.json


{'loss': 0.656, 'learning_rate': 5.242725765113825e-06, 'epoch': 2.69}


Model weights saved in tmp_trainer\checkpoint-47500\pytorch_model.bin
 90%|█████████ | 48000/53064 [4:16:42<17:53,  4.72it/s]Saving model checkpoint to tmp_trainer\checkpoint-48000
Configuration saved in tmp_trainer\checkpoint-48000\config.json


{'loss': 0.6606, 'learning_rate': 4.771596562641339e-06, 'epoch': 2.71}


Model weights saved in tmp_trainer\checkpoint-48000\pytorch_model.bin
 91%|█████████▏| 48500/53064 [4:18:35<15:55,  4.78it/s]Saving model checkpoint to tmp_trainer\checkpoint-48500
Configuration saved in tmp_trainer\checkpoint-48500\config.json


{'loss': 0.663, 'learning_rate': 4.3004673601688526e-06, 'epoch': 2.74}


Model weights saved in tmp_trainer\checkpoint-48500\pytorch_model.bin
 92%|█████████▏| 49000/53064 [4:20:26<14:33,  4.65it/s]Saving model checkpoint to tmp_trainer\checkpoint-49000
Configuration saved in tmp_trainer\checkpoint-49000\config.json


{'loss': 0.6584, 'learning_rate': 3.829338157696367e-06, 'epoch': 2.77}


Model weights saved in tmp_trainer\checkpoint-49000\pytorch_model.bin
 93%|█████████▎| 49500/53064 [4:22:40<17:43,  3.35it/s]Saving model checkpoint to tmp_trainer\checkpoint-49500
Configuration saved in tmp_trainer\checkpoint-49500\config.json


{'loss': 0.6565, 'learning_rate': 3.358208955223881e-06, 'epoch': 2.8}


Model weights saved in tmp_trainer\checkpoint-49500\pytorch_model.bin
 94%|█████████▍| 50000/53064 [4:25:00<11:05,  4.61it/s]Saving model checkpoint to tmp_trainer\checkpoint-50000
Configuration saved in tmp_trainer\checkpoint-50000\config.json


{'loss': 0.6559, 'learning_rate': 2.8870797527513946e-06, 'epoch': 2.83}


Model weights saved in tmp_trainer\checkpoint-50000\pytorch_model.bin
 95%|█████████▌| 50500/53064 [4:28:03<13:46,  3.10it/s]Saving model checkpoint to tmp_trainer\checkpoint-50500
Configuration saved in tmp_trainer\checkpoint-50500\config.json


{'loss': 0.6504, 'learning_rate': 2.4159505502789087e-06, 'epoch': 2.86}


Model weights saved in tmp_trainer\checkpoint-50500\pytorch_model.bin
 96%|█████████▌| 51000/53064 [4:30:56<11:24,  3.01it/s]Saving model checkpoint to tmp_trainer\checkpoint-51000
Configuration saved in tmp_trainer\checkpoint-51000\config.json


{'loss': 0.6569, 'learning_rate': 1.944821347806423e-06, 'epoch': 2.88}


Model weights saved in tmp_trainer\checkpoint-51000\pytorch_model.bin
 97%|█████████▋| 51500/53064 [4:33:46<08:48,  2.96it/s]Saving model checkpoint to tmp_trainer\checkpoint-51500
Configuration saved in tmp_trainer\checkpoint-51500\config.json


{'loss': 0.6594, 'learning_rate': 1.4736921453339366e-06, 'epoch': 2.91}


Model weights saved in tmp_trainer\checkpoint-51500\pytorch_model.bin
 98%|█████████▊| 52000/53064 [4:36:36<06:00,  2.95it/s]Saving model checkpoint to tmp_trainer\checkpoint-52000
Configuration saved in tmp_trainer\checkpoint-52000\config.json


{'loss': 0.6546, 'learning_rate': 1.0025629428614503e-06, 'epoch': 2.94}


Model weights saved in tmp_trainer\checkpoint-52000\pytorch_model.bin
 99%|█████████▉| 52500/53064 [4:39:29<03:11,  2.95it/s]Saving model checkpoint to tmp_trainer\checkpoint-52500
Configuration saved in tmp_trainer\checkpoint-52500\config.json


{'loss': 0.6574, 'learning_rate': 5.314337403889643e-07, 'epoch': 2.97}


Model weights saved in tmp_trainer\checkpoint-52500\pytorch_model.bin
100%|█████████▉| 53000/53064 [4:42:22<00:21,  2.98it/s]Saving model checkpoint to tmp_trainer\checkpoint-53000
Configuration saved in tmp_trainer\checkpoint-53000\config.json


{'loss': 0.6524, 'learning_rate': 6.030453791647821e-08, 'epoch': 3.0}


Model weights saved in tmp_trainer\checkpoint-53000\pytorch_model.bin
100%|██████████| 53064/53064 [4:42:47<00:00,  2.98it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 53064/53064 [4:42:47<00:00,  3.13it/s]

{'train_runtime': 16967.8019, 'train_samples_per_second': 25.018, 'train_steps_per_second': 3.127, 'train_loss': 0.6604022006364533, 'epoch': 3.0}





TrainOutput(global_step=53064, training_loss=0.6604022006364533, metrics={'train_runtime': 16967.8019, 'train_samples_per_second': 25.018, 'train_steps_per_second': 3.127, 'train_loss': 0.6604022006364533, 'epoch': 3.0})

In [15]:
# save the fine_tuned model
model.save_pretrained("simcse_qqp")

Configuration saved in simcse_qqp\config.json
Model weights saved in simcse_qqp\pytorch_model.bin


# Evaluation

In [13]:
# sampling the evaluation dataset from QQP
train_eval = load_dataset("quora", split = 'train[50%:85%]')
validation_eval = load_dataset("quora", split = 'train[85%:100%]')

# repeat all the preprocessing steps above
new_features = train_eval.features.copy()
new_features["is_duplicate"] = Value('int32')
train_eval = train_eval.cast(new_features)

new_features = validation_eval.features.copy()
new_features["is_duplicate"] = Value('int32')
validation_eval = validation_eval.cast(new_features)

tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')


# encode the training dataset in the form of sentences pair
encoded_train_eval = train_eval.map(lambda batch: tokenizer(batch['questions']['text'][0], batch['questions']['text'][1], 
                                                  padding='max_length', truncation=True, max_length=64))
encoded_train_eval.rename_column_("is_duplicate", "labels")

# encode the validation dataset in the form of sentences pair
encoded_validation_eval = validation_eval.map(lambda batch: tokenizer(batch['questions']['text'][0], batch['questions']['text'][1], 
                                                            padding='max_length', truncation=True, max_length=64))
encoded_validation_eval.rename_column_("is_duplicate", "labels")

Using custom data configuration default
Reusing dataset quora (C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04)
Using custom data configuration default
Reusing dataset quora (C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04)
Loading cached processed dataset at C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04\cache-58ce8a2d3d13bc51.arrow
Loading cached processed dataset at C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04\cache-077de9dbe62a5b12.arrow
Loading cached processed dataset at C:\Users\52673\.cache\huggingface\datasets\quora\default\0.0.0\36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04\cache-aec8ebd6b027502c.arrow
Loading cached processed dataset at C

In [16]:
# load the fine-tuned CL-BERT model
simcse_qqp = AutoModelForSequenceClassification.from_pretrained("simcse_qqp")

In [17]:
# load the metric module to compute accuracy
accuracy = load_metric("accuracy")

def compute_accuracy(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

In [18]:
# load the metric module to compute f1 score
f1 = load_metric("f1")

def compute_f1(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return f1.compute(predictions=predictions, references=labels)

## Accuracy

In [19]:
trainer_qqp = Trainer(
    model=simcse_qqp,    #use the fine-tuned model saved on
    train_dataset=encoded_train_eval,
    eval_dataset=encoded_validation_eval,
    compute_metrics=compute_accuracy,
)

trainer_qqp.evaluate()

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: questions.
***** Running Evaluation *****
  Num examples = 60644
  Batch size = 8
100%|██████████| 7581/7581 [08:48<00:00, 14.33it/s]


{'eval_loss': 0.6635891199111938,
 'eval_accuracy': 0.6436745597256117,
 'eval_runtime': 531.4313,
 'eval_samples_per_second': 114.114,
 'eval_steps_per_second': 14.265}

## F1 score

In [20]:
trainer_qqp = Trainer(
    model=simcse_qqp,    #use the fine-tuned model saved on
    train_dataset=encoded_train_eval,
    eval_dataset=encoded_validation_eval,
    compute_metrics=compute_f1,
)

trainer_qqp.evaluate() 

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: questions.
***** Running Evaluation *****
  Num examples = 60644
  Batch size = 8
100%|██████████| 7581/7581 [12:49<00:00,  9.85it/s]


{'eval_loss': 0.6635891199111938,
 'eval_f1': 0.0,
 'eval_runtime': 769.8367,
 'eval_samples_per_second': 78.775,
 'eval_steps_per_second': 9.848}