In [1]:
# This is the training script for fine-tuning bert on 
# unaltered GPT data and manually labelled data
# For better performance/generalization, look for augmented dataset
# Read README.md for comments and details.

In [2]:
# all classes

classes = ["banking","valuation","household","real estate","corporate","external","sovereign","technology", "climate", "energy", "health", "eu"]


In [3]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support,top_k_accuracy_score
import math
import pickle
from datasets import Dataset

In [4]:
# load bert-based and finbert
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(classes))
finbert = AutoModelForSequenceClassification.from_pretrained('ProsusAI/finbert')
tokenizer = AutoTokenizer.from_pretrained('ProsusAI/finbert', use_fast =True)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [5]:
# weights transfer for encoder layers only 
finbert_weights = finbert.state_dict()
model_weights = model.state_dict()
del finbert_weights["bert.pooler.dense.weight"]
del finbert_weights["bert.pooler.dense.bias"]
del finbert_weights["classifier.weight"]
del finbert_weights["classifier.bias"]
finbert_weights["bert.pooler.dense.weight"] = model_weights["bert.pooler.dense.weight"]
finbert_weights["bert.pooler.dense.bias"] = model_weights["bert.pooler.dense.bias"]
finbert_weights["classifier.weight"] = model_weights["classifier.weight"]
finbert_weights["classifier.bias"] = model_weights["classifier.bias"]

model.load_state_dict(finbert_weights)


<All keys matched successfully>

In [6]:
# flatten to one list for all 3

# manual labelled
with open('train_data.pickle', 'rb') as file:
    train = pickle.load(file)

# gpt labelled p1
with open('gpt.pickle', 'rb') as file:
    gpt = pickle.load(file)

# gpt labelled p2
with open('gpt_p2.pickle', 'rb') as file:
    gpt2 = pickle.load(file)
    
gpt = [item for sublist in gpt for item in sublist]
gpt2 = [item for sublist in gpt2 for item in sublist]

mixed = gpt + gpt2

In [7]:
print(len(mixed))

2458


In [8]:
# change to sampling methods instead of argmax if treat GPT-3 data as probabilities.
sample = 1

text_max = [item["text"] for i in range(sample) for item in train]
label_max = [np.argmax(item["dist"]) for i in range(sample) for item in train]

text_max_mixed = [item["text"] for i in range(sample) for item in mixed]
label_max_mixed = [np.argmax(item["dist"]) for i in range(sample) for item in mixed]

comb_text = text_max_mixed
comb_label = label_max_mixed

In [9]:
assert len(comb_text) == len(comb_label)

In [10]:
# random shuffle
import random
temp = list(zip(comb_text, comb_label))
random.shuffle(temp)
comb_text, comb_label = zip(*temp)

In [11]:
print(len(comb_text))

2458


In [12]:
train_text, test_text = comb_text[:math.ceil(len(comb_text)*0.9)], comb_text[math.ceil(len(comb_text)*0.9):]
train_label, test_label = comb_label[:math.ceil(len(comb_label)*0.9)], comb_label[math.ceil(len(comb_label)*0.9):]

In [13]:
train_dataset = Dataset.from_dict({"text":train_text, "label":train_label})
test_dataset = Dataset.from_dict({"text":test_text, "label":test_label})

In [14]:
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)
train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))
test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

100%|██████████| 1/1 [00:00<00:00,  7.37ba/s]
100%|██████████| 1/1 [00:00<00:00, 90.89ba/s]


In [15]:
train_dataset["input_ids"]

tensor([[  101,  1037,  2488,  ...,     0,     0,     0],
        [  101,  9944,  2181,  ...,     0,     0,     0],
        [  101,  2045,  2323,  ...,     0,     0,     0],
        ...,
        [  101,  4826,  1010,  ...,     0,     0,     0],
        [  101,  2714, 10069,  ...,     0,     0,     0],
        [  101,  4613,  2011,  ...,     0,     0,     0]])

In [16]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    top3 = top_k_accuracy_score(labels, pred.predictions,k=3)
    top2 = top_k_accuracy_score(labels, pred.predictions,k=2)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'top3': top3,
         'top2': top2
    }

training_args = TrainingArguments(
    
    output_dir='./results',
    learning_rate=2e-5,
    num_train_epochs=20,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    metric_for_best_model="accuracy",
    evaluation_strategy='epoch',
    save_strategy = "epoch",
    logging_dir='./logs',
    save_total_limit = 1, # Only last 5 models are saved. Older ones are deleted.
    load_best_model_at_end=True,
)
    
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

In [17]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running training *****
  Num examples = 2213
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2780
  5%|▍         | 138/2780 [00:22<06:56,  6.34it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

  5%|▌         | 139/2780 [00:23<06:56,  6.34it/s]Saving model checkpoint to ./results\checkpoint-139
Configuration saved in ./results\checkpoint-139\config.json


{'eval_loss': 2.2275032997131348, 'eval_accuracy': 0.2653061224489796, 'eval_f1': 0.05849293465392536, 'eval_precision': 0.06293809700372548, 'eval_recall': 0.08994708994708994, 'eval_top3': 0.5959183673469388, 'eval_top2': 0.46938775510204084, 'eval_runtime': 0.3134, 'eval_samples_per_second': 781.818, 'eval_steps_per_second': 12.764, 'epoch': 1.0}


Model weights saved in ./results\checkpoint-139\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2618] due to args.save_total_limit
 10%|▉         | 277/2780 [00:46<06:13,  6.70it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 10%|█         | 278/2780 [00:46<06:13,  6.70it/s]Saving model checkpoint to ./results\checkpoint-278
Configuration saved in ./results\checkpoint-278\config.json


{'eval_loss': 1.8715399503707886, 'eval_accuracy': 0.4163265306122449, 'eval_f1': 0.12697716513505986, 'eval_precision': 0.10321704578594341, 'eval_recall': 0.16820987654320987, 'eval_top3': 0.6408163265306123, 'eval_top2': 0.5306122448979592, 'eval_runtime': 0.2999, 'eval_samples_per_second': 816.823, 'eval_steps_per_second': 13.336, 'epoch': 2.0}


Model weights saved in ./results\checkpoint-278\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-3080] due to args.save_total_limit
 15%|█▍        | 416/2780 [01:08<05:49,  6.76it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 15%|█▌        | 417/2780 [01:08<05:49,  6.76it/s]Saving model checkpoint to ./results\checkpoint-417
Configuration saved in ./results\checkpoint-417\config.json


{'eval_loss': 1.5200684070587158, 'eval_accuracy': 0.5469387755102041, 'eval_f1': 0.2503530311402597, 'eval_precision': 0.26100223686930696, 'eval_recall': 0.2699123855037833, 'eval_top3': 0.7918367346938775, 'eval_top2': 0.7020408163265306, 'eval_runtime': 0.3013, 'eval_samples_per_second': 813.081, 'eval_steps_per_second': 13.275, 'epoch': 3.0}


Model weights saved in ./results\checkpoint-417\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-139] due to args.save_total_limit
 18%|█▊        | 501/2780 [01:22<05:14,  7.24it/s]

{'loss': 1.8874, 'learning_rate': 2e-05, 'epoch': 3.6}


 20%|█▉        | 555/2780 [01:30<05:30,  6.73it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 20%|██        | 556/2780 [01:31<05:30,  6.73it/s]Saving model checkpoint to ./results\checkpoint-556
Configuration saved in ./results\checkpoint-556\config.json


{'eval_loss': 1.3819433450698853, 'eval_accuracy': 0.5551020408163265, 'eval_f1': 0.3188806483463684, 'eval_precision': 0.39156318827371456, 'eval_recall': 0.3177545585139671, 'eval_top3': 0.8204081632653061, 'eval_top2': 0.726530612244898, 'eval_runtime': 0.3181, 'eval_samples_per_second': 770.109, 'eval_steps_per_second': 12.573, 'epoch': 4.0}


Model weights saved in ./results\checkpoint-556\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-278] due to args.save_total_limit
 25%|██▍       | 694/2780 [01:53<05:05,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 25%|██▌       | 695/2780 [01:53<05:05,  6.83it/s]Saving model checkpoint to ./results\checkpoint-695
Configuration saved in ./results\checkpoint-695\config.json


{'eval_loss': 1.4581735134124756, 'eval_accuracy': 0.5755102040816327, 'eval_f1': 0.3599842954966915, 'eval_precision': 0.42091934298323297, 'eval_recall': 0.36072011172817625, 'eval_top3': 0.7755102040816326, 'eval_top2': 0.7061224489795919, 'eval_runtime': 0.2986, 'eval_samples_per_second': 820.372, 'eval_steps_per_second': 13.394, 'epoch': 5.0}


Model weights saved in ./results\checkpoint-695\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-417] due to args.save_total_limit
 30%|██▉       | 833/2780 [02:16<04:28,  7.25it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 30%|███       | 834/2780 [02:16<04:28,  7.25it/s]Saving model checkpoint to ./results\checkpoint-834
Configuration saved in ./results\checkpoint-834\config.json


{'eval_loss': 1.540336012840271, 'eval_accuracy': 0.5591836734693878, 'eval_f1': 0.3557663612617173, 'eval_precision': 0.4424158570897701, 'eval_recall': 0.3416300694526501, 'eval_top3': 0.8244897959183674, 'eval_top2': 0.7142857142857143, 'eval_runtime': 0.2942, 'eval_samples_per_second': 832.893, 'eval_steps_per_second': 13.598, 'epoch': 6.0}


Model weights saved in ./results\checkpoint-834\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-556] due to args.save_total_limit
 35%|███▍      | 972/2780 [02:38<04:29,  6.70it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 35%|███▌      | 973/2780 [02:38<04:29,  6.70it/s]Saving model checkpoint to ./results\checkpoint-973
Configuration saved in ./results\checkpoint-973\config.json


{'eval_loss': 1.7193952798843384, 'eval_accuracy': 0.5469387755102041, 'eval_f1': 0.3401250429089562, 'eval_precision': 0.35141594516594515, 'eval_recall': 0.3512200165425972, 'eval_top3': 0.7673469387755102, 'eval_top2': 0.6653061224489796, 'eval_runtime': 0.2993, 'eval_samples_per_second': 818.683, 'eval_steps_per_second': 13.366, 'epoch': 7.0}


Model weights saved in ./results\checkpoint-973\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-834] due to args.save_total_limit
 36%|███▌      | 1001/2780 [02:44<04:14,  7.00it/s]

{'loss': 0.6868, 'learning_rate': 1.56140350877193e-05, 'epoch': 7.19}


 40%|███▉      | 1111/2780 [03:01<04:14,  6.57it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 40%|████      | 1112/2780 [03:01<04:13,  6.57it/s]Saving model checkpoint to ./results\checkpoint-1112
Configuration saved in ./results\checkpoint-1112\config.json


{'eval_loss': 1.8059898614883423, 'eval_accuracy': 0.5714285714285714, 'eval_f1': 0.3483436226971616, 'eval_precision': 0.37123497658616267, 'eval_recall': 0.35008749961706953, 'eval_top3': 0.7918367346938775, 'eval_top2': 0.7061224489795919, 'eval_runtime': 0.307, 'eval_samples_per_second': 797.935, 'eval_steps_per_second': 13.028, 'epoch': 8.0}


Model weights saved in ./results\checkpoint-1112\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-973] due to args.save_total_limit
 45%|████▍     | 1250/2780 [03:23<03:44,  6.81it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

 45%|████▌     | 1251/2780 [03:23<03:44,  6.81it/s]Saving model checkpoint to ./results\checkpoint-1251
Configuration saved in ./results\checkpoint-1251\config.json


{'eval_loss': 1.9232038259506226, 'eval_accuracy': 0.5551020408163265, 'eval_f1': 0.35279981873331284, 'eval_precision': 0.36122939560439565, 'eval_recall': 0.3605815045667196, 'eval_top3': 0.7877551020408163, 'eval_top2': 0.7183673469387755, 'eval_runtime': 0.284, 'eval_samples_per_second': 862.632, 'eval_steps_per_second': 14.084, 'epoch': 9.0}


Model weights saved in ./results\checkpoint-1251\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1112] due to args.save_total_limit
 50%|████▉     | 1389/2780 [03:45<03:15,  7.11it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 50%|█████     | 1390/2780 [03:46<03:15,  7.11it/s]Saving model checkpoint to ./results\checkpoint-1390
Configuration saved in ./results\checkpoint-1390\config.json


{'eval_loss': 2.0355265140533447, 'eval_accuracy': 0.5755102040816327, 'eval_f1': 0.37504393779734574, 'eval_precision': 0.39033024634444513, 'eval_recall': 0.3730854159281578, 'eval_top3': 0.7673469387755102, 'eval_top2': 0.7142857142857143, 'eval_runtime': 0.2885, 'eval_samples_per_second': 849.237, 'eval_steps_per_second': 13.865, 'epoch': 10.0}


Model weights saved in ./results\checkpoint-1390\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1251] due to args.save_total_limit
 54%|█████▍    | 1501/2780 [04:04<03:01,  7.07it/s]

{'loss': 0.1463, 'learning_rate': 1.1228070175438597e-05, 'epoch': 10.79}


 55%|█████▍    | 1528/2780 [04:08<02:59,  6.97it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 55%|█████▌    | 1529/2780 [04:08<02:59,  6.97it/s]Saving model checkpoint to ./results\checkpoint-1529
Configuration saved in ./results\checkpoint-1529\config.json


{'eval_loss': 2.1347901821136475, 'eval_accuracy': 0.5795918367346938, 'eval_f1': 0.370981538194653, 'eval_precision': 0.3877801120448179, 'eval_recall': 0.3759495308554448, 'eval_top3': 0.7836734693877551, 'eval_top2': 0.726530612244898, 'eval_runtime': 0.2961, 'eval_samples_per_second': 827.487, 'eval_steps_per_second': 13.51, 'epoch': 11.0}


Model weights saved in ./results\checkpoint-1529\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-695] due to args.save_total_limit
 60%|█████▉    | 1667/2780 [04:30<02:38,  7.03it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

 60%|██████    | 1668/2780 [04:30<02:38,  7.03it/s]Saving model checkpoint to ./results\checkpoint-1668
Configuration saved in ./results\checkpoint-1668\config.json


{'eval_loss': 2.2617690563201904, 'eval_accuracy': 0.5755102040816327, 'eval_f1': 0.38435621327470854, 'eval_precision': 0.39310233156135227, 'eval_recall': 0.3863850021444107, 'eval_top3': 0.7836734693877551, 'eval_top2': 0.6979591836734694, 'eval_runtime': 0.2887, 'eval_samples_per_second': 848.637, 'eval_steps_per_second': 13.855, 'epoch': 12.0}


Model weights saved in ./results\checkpoint-1668\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1390] due to args.save_total_limit
 65%|██████▍   | 1806/2780 [04:52<02:21,  6.89it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

 65%|██████▌   | 1807/2780 [04:52<02:21,  6.89it/s]Saving model checkpoint to ./results\checkpoint-1807
Configuration saved in ./results\checkpoint-1807\config.json


{'eval_loss': 2.381368637084961, 'eval_accuracy': 0.5551020408163265, 'eval_f1': 0.3353155116212118, 'eval_precision': 0.36285986079641885, 'eval_recall': 0.3279865295994328, 'eval_top3': 0.7795918367346939, 'eval_top2': 0.7061224489795919, 'eval_runtime': 0.3012, 'eval_samples_per_second': 813.444, 'eval_steps_per_second': 13.281, 'epoch': 13.0}


Model weights saved in ./results\checkpoint-1807\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1668] due to args.save_total_limit
 70%|██████▉   | 1945/2780 [05:14<01:56,  7.16it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

 70%|███████   | 1946/2780 [05:14<01:56,  7.16it/s]Saving model checkpoint to ./results\checkpoint-1946
Configuration saved in ./results\checkpoint-1946\config.json


{'eval_loss': 2.413862705230713, 'eval_accuracy': 0.5673469387755102, 'eval_f1': 0.37501217997045977, 'eval_precision': 0.3875987554112554, 'eval_recall': 0.3741174819803852, 'eval_top3': 0.7836734693877551, 'eval_top2': 0.689795918367347, 'eval_runtime': 0.2887, 'eval_samples_per_second': 848.571, 'eval_steps_per_second': 13.854, 'epoch': 14.0}


Model weights saved in ./results\checkpoint-1946\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1807] due to args.save_total_limit
 72%|███████▏  | 2001/2780 [05:24<01:52,  6.94it/s]

{'loss': 0.0235, 'learning_rate': 6.842105263157896e-06, 'epoch': 14.39}


 75%|███████▍  | 2084/2780 [05:36<01:36,  7.18it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

 75%|███████▌  | 2085/2780 [05:36<01:36,  7.18it/s]Saving model checkpoint to ./results\checkpoint-2085
Configuration saved in ./results\checkpoint-2085\config.json


{'eval_loss': 2.438964366912842, 'eval_accuracy': 0.563265306122449, 'eval_f1': 0.3579034804331658, 'eval_precision': 0.3580599575935397, 'eval_recall': 0.36332930435753014, 'eval_top3': 0.7836734693877551, 'eval_top2': 0.7020408163265306, 'eval_runtime': 0.288, 'eval_samples_per_second': 850.794, 'eval_steps_per_second': 13.891, 'epoch': 15.0}


Model weights saved in ./results\checkpoint-2085\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1946] due to args.save_total_limit
 80%|███████▉  | 2223/2780 [05:57<01:18,  7.14it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

 80%|████████  | 2224/2780 [05:58<01:17,  7.14it/s]Saving model checkpoint to ./results\checkpoint-2224
Configuration saved in ./results\checkpoint-2224\config.json


{'eval_loss': 2.487133741378784, 'eval_accuracy': 0.5510204081632653, 'eval_f1': 0.37234127989135685, 'eval_precision': 0.37817673504517585, 'eval_recall': 0.37349610887917345, 'eval_top3': 0.7755102040816326, 'eval_top2': 0.7020408163265306, 'eval_runtime': 0.2972, 'eval_samples_per_second': 824.412, 'eval_steps_per_second': 13.46, 'epoch': 16.0}


Model weights saved in ./results\checkpoint-2224\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2085] due to args.save_total_limit
 85%|████████▍ | 2362/2780 [06:19<00:58,  7.19it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

 85%|████████▌ | 2363/2780 [06:20<00:58,  7.19it/s]Saving model checkpoint to ./results\checkpoint-2363
Configuration saved in ./results\checkpoint-2363\config.json


{'eval_loss': 2.5146498680114746, 'eval_accuracy': 0.5591836734693878, 'eval_f1': 0.35328236578236577, 'eval_precision': 0.36413035532672255, 'eval_recall': 0.3514929228974928, 'eval_top3': 0.7755102040816326, 'eval_top2': 0.7020408163265306, 'eval_runtime': 0.2905, 'eval_samples_per_second': 843.269, 'eval_steps_per_second': 13.768, 'epoch': 17.0}


Model weights saved in ./results\checkpoint-2363\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2224] due to args.save_total_limit
 90%|████████▉ | 2501/2780 [06:41<00:38,  7.23it/s]

{'loss': 0.0087, 'learning_rate': 2.456140350877193e-06, 'epoch': 17.99}


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

 90%|█████████ | 2502/2780 [06:41<00:38,  7.23it/s]Saving model checkpoint to ./results\checkpoint-2502
Configuration saved in ./results\checkpoint-2502\config.json


{'eval_loss': 2.5423431396484375, 'eval_accuracy': 0.563265306122449, 'eval_f1': 0.3782422554200994, 'eval_precision': 0.38402190504270295, 'eval_recall': 0.3792707006752705, 'eval_top3': 0.7836734693877551, 'eval_top2': 0.6979591836734694, 'eval_runtime': 0.2926, 'eval_samples_per_second': 837.273, 'eval_steps_per_second': 13.67, 'epoch': 18.0}


Model weights saved in ./results\checkpoint-2502\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2363] due to args.save_total_limit
 95%|█████████▍| 2640/2780 [07:03<00:19,  7.01it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

 95%|█████████▌| 2641/2780 [07:03<00:19,  7.01it/s]Saving model checkpoint to ./results\checkpoint-2641
Configuration saved in ./results\checkpoint-2641\config.json


{'eval_loss': 2.5475220680236816, 'eval_accuracy': 0.5551020408163265, 'eval_f1': 0.347444781422565, 'eval_precision': 0.3520355550246854, 'eval_recall': 0.34880475085448204, 'eval_top3': 0.7755102040816326, 'eval_top2': 0.6979591836734694, 'eval_runtime': 0.3034, 'eval_samples_per_second': 807.405, 'eval_steps_per_second': 13.182, 'epoch': 19.0}


Model weights saved in ./results\checkpoint-2641\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2502] due to args.save_total_limit
100%|█████████▉| 2779/2780 [07:25<00:00,  7.01it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64

100%|██████████| 2780/2780 [07:26<00:00,  7.01it/s]Saving model checkpoint to ./results\checkpoint-2780
Configuration saved in ./results\checkpoint-2780\config.json


{'eval_loss': 2.555901288986206, 'eval_accuracy': 0.5551020408163265, 'eval_f1': 0.3685013382171906, 'eval_precision': 0.37002416972382646, 'eval_recall': 0.37137419529892646, 'eval_top3': 0.7795918367346939, 'eval_top2': 0.6979591836734694, 'eval_runtime': 0.3009, 'eval_samples_per_second': 814.113, 'eval_steps_per_second': 13.292, 'epoch': 20.0}


Model weights saved in ./results\checkpoint-2780\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2641] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results\checkpoint-1529 (score: 0.5795918367346938).
100%|██████████| 2780/2780 [07:28<00:00,  6.20it/s]

{'train_runtime': 448.3402, 'train_samples_per_second': 98.72, 'train_steps_per_second': 6.201, 'train_loss': 0.49576307299325795, 'epoch': 20.0}





TrainOutput(global_step=2780, training_loss=0.49576307299325795, metrics={'train_runtime': 448.3402, 'train_samples_per_second': 98.72, 'train_steps_per_second': 6.201, 'train_loss': 0.49576307299325795, 'epoch': 20.0})

In [18]:
trainer.evaluate()

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 245
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 4/4 [00:00<00:00, 17.92it/s]


{'eval_loss': 2.1347901821136475,
 'eval_accuracy': 0.5795918367346938,
 'eval_f1': 0.370981538194653,
 'eval_precision': 0.3877801120448179,
 'eval_recall': 0.3759495308554448,
 'eval_top3': 0.7836734693877551,
 'eval_top2': 0.726530612244898,
 'eval_runtime': 0.2973,
 'eval_samples_per_second': 824.003,
 'eval_steps_per_second': 13.453,
 'epoch': 20.0}