In [1]:
# This is the training script for fine-tuning bert on 
# unaltered GPT data and manually labelled data
# For better performance/generalization, look for augmented dataset
# Read README.md for comments and details.

In [2]:
# all classes

classes = ["banking","valuation","household","real estate","corporate","external","sovereign","technology", "climate", "energy", "health", "eu"]


In [3]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support,top_k_accuracy_score
import math
import pickle
from datasets import Dataset

In [4]:
# load bert-based and finbert
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(classes))
finbert = AutoModelForSequenceClassification.from_pretrained('ProsusAI/finbert')
tokenizer = AutoTokenizer.from_pretrained('ProsusAI/finbert', use_fast =True)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [5]:
# weights transfer for encoder layers only 
finbert_weights = finbert.state_dict()
model_weights = model.state_dict()
del finbert_weights["bert.pooler.dense.weight"]
del finbert_weights["bert.pooler.dense.bias"]
del finbert_weights["classifier.weight"]
del finbert_weights["classifier.bias"]
finbert_weights["bert.pooler.dense.weight"] = model_weights["bert.pooler.dense.weight"]
finbert_weights["bert.pooler.dense.bias"] = model_weights["bert.pooler.dense.bias"]
finbert_weights["classifier.weight"] = model_weights["classifier.weight"]
finbert_weights["classifier.bias"] = model_weights["classifier.bias"]

model.load_state_dict(finbert_weights)


<All keys matched successfully>

In [6]:
# flatten to one list for all 3

# manual labelled
with open('train_data.pickle', 'rb') as file:
    train = pickle.load(file)

# gpt labelled p1
with open('gpt.pickle', 'rb') as file:
    gpt = pickle.load(file)

# gpt labelled p2
with open('gpt_p2.pickle', 'rb') as file:
    gpt2 = pickle.load(file)
    
gpt = [item for sublist in gpt for item in sublist]
gpt2 = [item for sublist in gpt2 for item in sublist]

mixed = gpt + gpt2

In [7]:
print(len(mixed))

2458


In [8]:
# change to sampling methods instead of argmax if treat GPT-3 data as probabilities.
sample = 1

text_max = [item["text"] for i in range(sample) for item in train]
label_max = [np.argmax(item["dist"]) for i in range(sample) for item in train]

text_max_mixed = [item["text"] for i in range(sample) for item in mixed]
label_max_mixed = [np.argmax(item["dist"]) for i in range(sample) for item in mixed]

comb_text = text_max_mixed
comb_label = label_max_mixed

In [9]:
assert len(comb_text) == len(comb_label)

In [10]:
# random shuffle
import random
temp = list(zip(comb_text, comb_label))
random.shuffle(temp)
comb_text, comb_label = zip(*temp)

In [11]:
print(len(comb_text))

2458


In [12]:
train_text, test_text = comb_text, text_max
train_label, test_label = comb_label, label_max

In [13]:
train_dataset = Dataset.from_dict({"text":train_text, "label":train_label})
test_dataset = Dataset.from_dict({"text":test_text, "label":test_label})

In [14]:
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)
train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))
test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

100%|██████████| 1/1 [00:00<00:00,  7.24ba/s]
100%|██████████| 1/1 [00:00<00:00, 66.65ba/s]


In [15]:
train_dataset["input_ids"]

tensor([[ 101, 2023, 2095,  ...,    0,    0,    0],
        [ 101, 1999, 2240,  ...,    0,    0,    0],
        [ 101, 2023, 2001,  ...,    0,    0,    0],
        ...,
        [ 101, 9308, 1010,  ...,    0,    0,    0],
        [ 101, 9308, 1010,  ...,    0,    0,    0],
        [ 101, 2021, 2062,  ...,    0,    0,    0]])

In [16]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    top3 = top_k_accuracy_score(labels, pred.predictions,k=3)
    top2 = top_k_accuracy_score(labels, pred.predictions,k=2)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'top3': top3,
         'top2': top2
    }

training_args = TrainingArguments(
    
    output_dir='./results',
    learning_rate=2e-5,
    num_train_epochs=20,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    metric_for_best_model="accuracy",
    evaluation_strategy='epoch',
    save_strategy = "epoch",
    logging_dir='./logs',
    save_total_limit = 1, # Only last 5 models are saved. Older ones are deleted.
    load_best_model_at_end=True,
)
    
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

In [17]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running training *****
  Num examples = 2458
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 3080
  5%|▍         | 153/3080 [00:16<05:41,  8.58it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

  5%|▌         | 154/3080 [00:17<05:41,  8.58it/s]Saving model checkpoint to ./results\checkpoint-154
Configuration saved in ./results\checkpoint-154\config.json


{'eval_loss': 2.509948968887329, 'eval_accuracy': 0.13793103448275862, 'eval_f1': 0.046496732026143794, 'eval_precision': 0.0320860240449657, 'eval_recall': 0.10346320346320344, 'eval_top3': 0.3448275862068966, 'eval_top2': 0.21839080459770116, 'eval_runtime': 0.3794, 'eval_samples_per_second': 687.981, 'eval_steps_per_second': 13.18, 'epoch': 1.0}


Model weights saved in ./results\checkpoint-154\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-765] due to args.save_total_limit
 10%|▉         | 307/3080 [00:37<05:34,  8.30it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 10%|█         | 308/3080 [00:38<05:33,  8.30it/s]Saving model checkpoint to ./results\checkpoint-308
Configuration saved in ./results\checkpoint-308\config.json


{'eval_loss': 2.377951145172119, 'eval_accuracy': 0.2796934865900383, 'eval_f1': 0.14517426296872502, 'eval_precision': 0.11616984437030764, 'eval_recall': 0.24255256196045671, 'eval_top3': 0.4674329501915709, 'eval_top2': 0.3793103448275862, 'eval_runtime': 0.3759, 'eval_samples_per_second': 694.411, 'eval_steps_per_second': 13.303, 'epoch': 2.0}


Model weights saved in ./results\checkpoint-308\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-3060] due to args.save_total_limit
 15%|█▌        | 462/3080 [01:02<05:49,  7.48it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 15%|█▌        | 462/3080 [01:02<05:49,  7.48it/s]Saving model checkpoint to ./results\checkpoint-462
Configuration saved in ./results\checkpoint-462\config.json


{'eval_loss': 2.1593668460845947, 'eval_accuracy': 0.3524904214559387, 'eval_f1': 0.18819043282158035, 'eval_precision': 0.14408120169150943, 'eval_recall': 0.2944583681425787, 'eval_top3': 0.5862068965517241, 'eval_top2': 0.45977011494252873, 'eval_runtime': 0.4444, 'eval_samples_per_second': 587.267, 'eval_steps_per_second': 11.25, 'epoch': 3.0}


Model weights saved in ./results\checkpoint-462\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-154] due to args.save_total_limit
 16%|█▋        | 501/3080 [01:10<06:20,  6.78it/s]

{'loss': 1.8909, 'learning_rate': 2e-05, 'epoch': 3.25}


 20%|██        | 616/3080 [01:27<05:28,  7.49it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 20%|██        | 616/3080 [01:28<05:28,  7.49it/s]Saving model checkpoint to ./results\checkpoint-616
Configuration saved in ./results\checkpoint-616\config.json


{'eval_loss': 1.9616974592208862, 'eval_accuracy': 0.3793103448275862, 'eval_f1': 0.23150608484369717, 'eval_precision': 0.24747889208821505, 'eval_recall': 0.3282302979671401, 'eval_top3': 0.6819923371647509, 'eval_top2': 0.5402298850574713, 'eval_runtime': 0.5369, 'eval_samples_per_second': 486.122, 'eval_steps_per_second': 9.313, 'epoch': 4.0}


Model weights saved in ./results\checkpoint-616\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-308] due to args.save_total_limit
 25%|██▍       | 769/3080 [01:53<04:02,  9.52it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 25%|██▌       | 770/3080 [01:53<04:02,  9.52it/s]Saving model checkpoint to ./results\checkpoint-770
Configuration saved in ./results\checkpoint-770\config.json


{'eval_loss': 1.894345760345459, 'eval_accuracy': 0.42528735632183906, 'eval_f1': 0.3047316888351679, 'eval_precision': 0.3851778945643043, 'eval_recall': 0.3899781018202071, 'eval_top3': 0.7394636015325671, 'eval_top2': 0.5938697318007663, 'eval_runtime': 0.3355, 'eval_samples_per_second': 777.998, 'eval_steps_per_second': 14.904, 'epoch': 5.0}


Model weights saved in ./results\checkpoint-770\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-462] due to args.save_total_limit
 30%|███       | 924/3080 [02:13<04:35,  7.84it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 30%|███       | 924/3080 [02:14<04:35,  7.84it/s]Saving model checkpoint to ./results\checkpoint-924
Configuration saved in ./results\checkpoint-924\config.json


{'eval_loss': 1.7519351243972778, 'eval_accuracy': 0.5019157088122606, 'eval_f1': 0.41481266140792994, 'eval_precision': 0.49724804883473617, 'eval_recall': 0.4939346472241209, 'eval_top3': 0.7586206896551724, 'eval_top2': 0.6666666666666666, 'eval_runtime': 0.4586, 'eval_samples_per_second': 569.104, 'eval_steps_per_second': 10.902, 'epoch': 6.0}


Model weights saved in ./results\checkpoint-924\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-616] due to args.save_total_limit
 32%|███▎      | 1001/3080 [02:24<03:24, 10.17it/s]

{'loss': 0.8022, 'learning_rate': 1.612403100775194e-05, 'epoch': 6.49}


 35%|███▌      | 1078/3080 [02:32<03:09, 10.59it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 35%|███▌      | 1078/3080 [02:32<03:09, 10.59it/s]Saving model checkpoint to ./results\checkpoint-1078
Configuration saved in ./results\checkpoint-1078\config.json


{'eval_loss': 1.8573945760726929, 'eval_accuracy': 0.5095785440613027, 'eval_f1': 0.4429744736323114, 'eval_precision': 0.4836867793764345, 'eval_recall': 0.5064798992430571, 'eval_top3': 0.8199233716475096, 'eval_top2': 0.6819923371647509, 'eval_runtime': 0.3386, 'eval_samples_per_second': 770.843, 'eval_steps_per_second': 14.767, 'epoch': 7.0}


Model weights saved in ./results\checkpoint-1078\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-770] due to args.save_total_limit
 40%|███▉      | 1231/3080 [02:50<02:59, 10.30it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 40%|████      | 1232/3080 [02:51<02:59, 10.30it/s]Saving model checkpoint to ./results\checkpoint-1232
Configuration saved in ./results\checkpoint-1232\config.json


{'eval_loss': 2.0106313228607178, 'eval_accuracy': 0.5402298850574713, 'eval_f1': 0.4840353342646256, 'eval_precision': 0.5379163424628045, 'eval_recall': 0.5337978785347206, 'eval_top3': 0.7931034482758621, 'eval_top2': 0.6973180076628352, 'eval_runtime': 0.3332, 'eval_samples_per_second': 783.322, 'eval_steps_per_second': 15.006, 'epoch': 8.0}


Model weights saved in ./results\checkpoint-1232\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-924] due to args.save_total_limit
 45%|████▌     | 1386/3080 [03:12<04:21,  6.47it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 45%|████▌     | 1386/3080 [03:13<04:21,  6.47it/s]Saving model checkpoint to ./results\checkpoint-1386
Configuration saved in ./results\checkpoint-1386\config.json


{'eval_loss': 2.1779556274414062, 'eval_accuracy': 0.5134099616858238, 'eval_f1': 0.45702494221602397, 'eval_precision': 0.5356256922337089, 'eval_recall': 0.4906970709602289, 'eval_top3': 0.8122605363984674, 'eval_top2': 0.685823754789272, 'eval_runtime': 0.5501, 'eval_samples_per_second': 474.439, 'eval_steps_per_second': 9.089, 'epoch': 9.0}


Model weights saved in ./results\checkpoint-1386\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1078] due to args.save_total_limit
 49%|████▊     | 1501/3080 [03:32<03:50,  6.86it/s]

{'loss': 0.2109, 'learning_rate': 1.2248062015503876e-05, 'epoch': 9.74}


 50%|█████     | 1540/3080 [03:37<03:26,  7.46it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 50%|█████     | 1540/3080 [03:38<03:26,  7.46it/s]Saving model checkpoint to ./results\checkpoint-1540
Configuration saved in ./results\checkpoint-1540\config.json


{'eval_loss': 2.19553804397583, 'eval_accuracy': 0.5287356321839081, 'eval_f1': 0.48242750561375525, 'eval_precision': 0.5534202997438291, 'eval_recall': 0.5209792157160578, 'eval_top3': 0.8084291187739464, 'eval_top2': 0.7432950191570882, 'eval_runtime': 0.4647, 'eval_samples_per_second': 561.698, 'eval_steps_per_second': 10.76, 'epoch': 10.0}


Model weights saved in ./results\checkpoint-1540\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1386] due to args.save_total_limit
 55%|█████▌    | 1694/3080 [04:03<03:32,  6.51it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 55%|█████▌    | 1694/3080 [04:03<03:32,  6.51it/s]Saving model checkpoint to ./results\checkpoint-1694
Configuration saved in ./results\checkpoint-1694\config.json


{'eval_loss': 2.43583083152771, 'eval_accuracy': 0.5402298850574713, 'eval_f1': 0.49456125804654344, 'eval_precision': 0.5408735526776355, 'eval_recall': 0.5375889217994481, 'eval_top3': 0.8122605363984674, 'eval_top2': 0.7164750957854407, 'eval_runtime': 0.5643, 'eval_samples_per_second': 462.497, 'eval_steps_per_second': 8.86, 'epoch': 11.0}


Model weights saved in ./results\checkpoint-1694\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1540] due to args.save_total_limit
 60%|██████    | 1848/3080 [04:28<02:40,  7.65it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 60%|██████    | 1848/3080 [04:28<02:40,  7.65it/s]Saving model checkpoint to ./results\checkpoint-1848
Configuration saved in ./results\checkpoint-1848\config.json


{'eval_loss': 2.3724257946014404, 'eval_accuracy': 0.5478927203065134, 'eval_f1': 0.4980456969494342, 'eval_precision': 0.5215233527424746, 'eval_recall': 0.5482006784638364, 'eval_top3': 0.8199233716475096, 'eval_top2': 0.7394636015325671, 'eval_runtime': 0.47, 'eval_samples_per_second': 555.364, 'eval_steps_per_second': 10.639, 'epoch': 12.0}


Model weights saved in ./results\checkpoint-1848\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1232] due to args.save_total_limit
 65%|██████▍   | 2001/3080 [04:52<02:38,  6.80it/s]

{'loss': 0.0378, 'learning_rate': 8.372093023255815e-06, 'epoch': 12.99}


 65%|██████▌   | 2002/3080 [04:52<02:26,  7.34it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 65%|██████▌   | 2002/3080 [04:53<02:26,  7.34it/s]Saving model checkpoint to ./results\checkpoint-2002
Configuration saved in ./results\checkpoint-2002\config.json


{'eval_loss': 2.462397336959839, 'eval_accuracy': 0.5632183908045977, 'eval_f1': 0.5286202727663669, 'eval_precision': 0.5703607694987006, 'eval_recall': 0.5580073922179185, 'eval_top3': 0.8275862068965517, 'eval_top2': 0.7547892720306514, 'eval_runtime': 0.4731, 'eval_samples_per_second': 551.728, 'eval_steps_per_second': 10.57, 'epoch': 13.0}


Model weights saved in ./results\checkpoint-2002\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1694] due to args.save_total_limit
 70%|██████▉   | 2155/3080 [05:16<02:09,  7.12it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 70%|███████   | 2156/3080 [05:17<02:09,  7.12it/s]Saving model checkpoint to ./results\checkpoint-2156
Configuration saved in ./results\checkpoint-2156\config.json


{'eval_loss': 2.463299512863159, 'eval_accuracy': 0.5708812260536399, 'eval_f1': 0.5367163812879824, 'eval_precision': 0.5843131135523058, 'eval_recall': 0.5660638338269918, 'eval_top3': 0.8275862068965517, 'eval_top2': 0.7624521072796935, 'eval_runtime': 0.4554, 'eval_samples_per_second': 573.112, 'eval_steps_per_second': 10.979, 'epoch': 14.0}


Model weights saved in ./results\checkpoint-2156\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-1848] due to args.save_total_limit
 75%|███████▌  | 2310/3080 [05:39<01:43,  7.43it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 75%|███████▌  | 2310/3080 [05:39<01:43,  7.43it/s]Saving model checkpoint to ./results\checkpoint-2310
Configuration saved in ./results\checkpoint-2310\config.json


{'eval_loss': 2.4987900257110596, 'eval_accuracy': 0.5517241379310345, 'eval_f1': 0.5138530721652047, 'eval_precision': 0.5553619233974395, 'eval_recall': 0.547157666894509, 'eval_top3': 0.8199233716475096, 'eval_top2': 0.7586206896551724, 'eval_runtime': 0.5032, 'eval_samples_per_second': 518.64, 'eval_steps_per_second': 9.936, 'epoch': 15.0}


Model weights saved in ./results\checkpoint-2310\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2002] due to args.save_total_limit
 80%|████████  | 2464/3080 [06:01<01:34,  6.53it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64

 80%|████████  | 2464/3080 [06:02<01:34,  6.53it/s]Saving model checkpoint to ./results\checkpoint-2464
Configuration saved in ./results\checkpoint-2464\config.json


{'eval_loss': 2.643785238265991, 'eval_accuracy': 0.5517241379310345, 'eval_f1': 0.5144684242542572, 'eval_precision': 0.5558894459837856, 'eval_recall': 0.5464754056859319, 'eval_top3': 0.8199233716475096, 'eval_top2': 0.7471264367816092, 'eval_runtime': 0.5518, 'eval_samples_per_second': 473.028, 'eval_steps_per_second': 9.062, 'epoch': 16.0}


Model weights saved in ./results\checkpoint-2464\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2310] due to args.save_total_limit
 81%|████████  | 2501/3080 [06:10<01:38,  5.89it/s]

{'loss': 0.0097, 'learning_rate': 4.4961240310077525e-06, 'epoch': 16.23}


 85%|████████▌ | 2618/3080 [06:30<01:10,  6.59it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 85%|████████▌ | 2618/3080 [06:30<01:10,  6.59it/s]Saving model checkpoint to ./results\checkpoint-2618
Configuration saved in ./results\checkpoint-2618\config.json


{'eval_loss': 2.512448310852051, 'eval_accuracy': 0.5747126436781609, 'eval_f1': 0.5442073581817862, 'eval_precision': 0.5868235712180875, 'eval_recall': 0.5721994379889117, 'eval_top3': 0.8237547892720306, 'eval_top2': 0.7701149425287356, 'eval_runtime': 0.5535, 'eval_samples_per_second': 471.511, 'eval_steps_per_second': 9.033, 'epoch': 17.0}


Model weights saved in ./results\checkpoint-2618\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2156] due to args.save_total_limit
 90%|████████▉ | 2771/3080 [06:55<00:37,  8.30it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 90%|█████████ | 2772/3080 [06:56<00:37,  8.30it/s]Saving model checkpoint to ./results\checkpoint-2772
Configuration saved in ./results\checkpoint-2772\config.json


{'eval_loss': 2.6014089584350586, 'eval_accuracy': 0.5670498084291188, 'eval_f1': 0.5290317691656496, 'eval_precision': 0.5684118242305892, 'eval_recall': 0.563845219108377, 'eval_top3': 0.8352490421455939, 'eval_top2': 0.7586206896551724, 'eval_runtime': 0.3691, 'eval_samples_per_second': 707.216, 'eval_steps_per_second': 13.548, 'epoch': 18.0}


Model weights saved in ./results\checkpoint-2772\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2464] due to args.save_total_limit
 95%|█████████▌| 2926/3080 [07:20<00:19,  7.84it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

 95%|█████████▌| 2926/3080 [07:20<00:19,  7.84it/s]Saving model checkpoint to ./results\checkpoint-2926
Configuration saved in ./results\checkpoint-2926\config.json


{'eval_loss': 2.6371397972106934, 'eval_accuracy': 0.5555555555555556, 'eval_f1': 0.5144757036613818, 'eval_precision': 0.5655765043922939, 'eval_recall': 0.5493193210298474, 'eval_top3': 0.8275862068965517, 'eval_top2': 0.7624521072796935, 'eval_runtime': 0.4333, 'eval_samples_per_second': 602.322, 'eval_steps_per_second': 11.539, 'epoch': 19.0}


Model weights saved in ./results\checkpoint-2926\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2772] due to args.save_total_limit
 97%|█████████▋| 3001/3080 [07:32<00:11,  7.06it/s]

{'loss': 0.0055, 'learning_rate': 6.201550387596899e-07, 'epoch': 19.48}


100%|██████████| 3080/3080 [07:43<00:00, 10.62it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3080/3080 [07:43<00:00, 10.62it/s]Saving model checkpoint to ./results\checkpoint-3080
Configuration saved in ./results\checkpoint-3080\config.json


{'eval_loss': 2.6133997440338135, 'eval_accuracy': 0.5632183908045977, 'eval_f1': 0.5221213693587845, 'eval_precision': 0.5603289932573821, 'eval_recall': 0.5580912508544087, 'eval_top3': 0.8275862068965517, 'eval_top2': 0.7662835249042146, 'eval_runtime': 0.3341, 'eval_samples_per_second': 781.26, 'eval_steps_per_second': 14.967, 'epoch': 20.0}


Model weights saved in ./results\checkpoint-3080\pytorch_model.bin
Deleting older checkpoint [results\checkpoint-2926] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results\checkpoint-2618 (score: 0.5747126436781609).
100%|██████████| 3080/3080 [07:46<00:00,  6.61it/s]

{'train_runtime': 466.0765, 'train_samples_per_second': 105.476, 'train_steps_per_second': 6.608, 'train_loss': 0.48012792072319366, 'epoch': 20.0}





TrainOutput(global_step=3080, training_loss=0.48012792072319366, metrics={'train_runtime': 466.0765, 'train_samples_per_second': 105.476, 'train_steps_per_second': 6.608, 'train_loss': 0.48012792072319366, 'epoch': 20.0})

In [18]:
trainer.evaluate()

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 261
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 5/5 [00:00<00:00, 14.32it/s]


{'eval_loss': 2.512448310852051,
 'eval_accuracy': 0.5747126436781609,
 'eval_f1': 0.5442073581817862,
 'eval_precision': 0.5868235712180875,
 'eval_recall': 0.5721994379889117,
 'eval_top3': 0.8237547892720306,
 'eval_top2': 0.7701149425287356,
 'eval_runtime': 0.4489,
 'eval_samples_per_second': 581.427,
 'eval_steps_per_second': 11.138,
 'epoch': 20.0}