In [1]:
import pandas as pd
import numpy as np
from scipy.special import softmax
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from transformers import Trainer
from transformers import TrainingArguments
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer, DataCollatorWithPadding, BertForSequenceClassification

from datasets import load_metric
from datasets import load_dataset
from datasets import load_from_disk

import helper_data, helper_model
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")




[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Howard\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Howard\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
import torch
torch.cuda.empty_cache()

In [3]:
import os
print(os.getcwd())

d:\OneDrive\Cornell\Fall 2023\Analyzing-the-Correlation-Between-Retail-Traders--Sentiments-and-Equity-Market-Movements\Sentiment_Analysis


In [4]:
# !pip install datasets

In [5]:
# raw_datasets = load_from_disk("data")
raw_datasets = load_from_disk("data/finetune_data/")
checkpoint = "bert-base-uncased"
checkpoint = "bert-base-cased"
checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(example):
    return tokenizer(example["text"], padding=True, truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer) # pad all the examples to the length of the longest element when we batch elements together — dynamic padding.

In [6]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 4320
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1081
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 5656
    })
})

In [7]:
raw_datasets['train']

Dataset({
    features: ['text', 'label'],
    num_rows: 4320
})

In [8]:
tokenized_datasets['train']['text']

['$AAPL shows signs of recovery after the slump. ????',
 "$JPM's stock faces downward pressure as the company navigates regulatory challenges in the banking industry.",
 '$GOOG just reported robust earnings. Stock is soaring. ????',
 '$ROKU streams in the wrong direction with intensifying competition in the streaming market.',
 "$GME's stock soars to new heights, triggering concerns of a speculative bubble reminiscent of the past.",
 'Travis Perkins Hikes Dividend 20% As Profit And Revenue Rise',
 'The stock rose for a second day on Wednesday bringing its two-day rise to GBX12 .0 or 2.0 % .',
 "$DIS's stock experiences volatility as the company deals with theme park closures and delays in movie releases.",
 '@Intuitiv $CLNE gasoline prices spike in California. http://stks.co/t1bOi',
 "$DIS's stock takes a hit as the company deals with theme park closures and delays in movie releases.",
 'Both operating profit and net sales for the six-month period increased , respectively from EUR7 .5 

In [9]:
metric = load_metric("glue", "mrpc")

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("glue", "mrpc")


In [10]:
# !pip install transformers[torch]
# !pip install accelerate -U

In [11]:
# !pip install accelerate==0.24.0

In [12]:
import accelerate
print(accelerate.__version__)

0.24.1


In [30]:
training_args = TrainingArguments(
    output_dir="test-trainer",
    evaluation_strategy="epoch",
    num_train_epochs=50,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=256,
    # During the first 500 training steps, the learning rate gradually increases from 0 (or a small base rate) to the specified learning rate.
    # This gradual increase helps in stabilizing the training process and often leads to better performance, as it prevents the model from making too large updates too quickly.
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='logs',
    logging_steps=10,  # how frequently the training progress is logged
    save_strategy="epoch",  # Set save strategy to match evaluation strategy
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none",  # disable wandb
    fp16=True,  # Enable mixed precision training
)
# model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
model = BertForSequenceClassification.from_pretrained(checkpoint, num_labels=2).to(device)

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [31]:
trainer.train()



{'loss': 0.6841, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.04}
{'loss': 0.6867, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.07}
{'loss': 0.6759, 'learning_rate': 3e-06, 'epoch': 0.11}
{'loss': 0.6667, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.15}
{'loss': 0.6582, 'learning_rate': 5e-06, 'epoch': 0.19}
{'loss': 0.6204, 'learning_rate': 6e-06, 'epoch': 0.22}
{'loss': 0.607, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.26}
{'loss': 0.5432, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.3}
{'loss': 0.5105, 'learning_rate': 9e-06, 'epoch': 0.33}
{'loss': 0.4587, 'learning_rate': 1e-05, 'epoch': 0.37}
{'loss': 0.399, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.41}
{'loss': 0.3849, 'learning_rate': 1.2e-05, 'epoch': 0.44}
{'loss': 0.3854, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.48}
{'loss': 0.3176, 'learning_rate': 1.3900000000000002e-05, 'epoch': 0.52}
{'loss': 0.3166, 'learning_rate': 1.49e-05, 'epoch': 0.56}
{'loss': 0.273, 'learning

{'eval_loss': 0.2005515843629837, 'eval_accuracy': 0.9324699352451434, 'eval_f1': 0.9318394024276377, 'eval_runtime': 4.5346, 'eval_samples_per_second': 238.39, 'eval_steps_per_second': 1.103, 'epoch': 1.0}
{'loss': 0.1357, 'learning_rate': 2.7800000000000005e-05, 'epoch': 1.04}
{'loss': 0.088, 'learning_rate': 2.88e-05, 'epoch': 1.07}
{'loss': 0.1889, 'learning_rate': 2.98e-05, 'epoch': 1.11}
{'loss': 0.1042, 'learning_rate': 3.08e-05, 'epoch': 1.15}
{'loss': 0.0819, 'learning_rate': 3.18e-05, 'epoch': 1.19}
{'loss': 0.3905, 'learning_rate': 3.2800000000000004e-05, 'epoch': 1.22}
{'loss': 0.1159, 'learning_rate': 3.38e-05, 'epoch': 1.26}
{'loss': 0.1018, 'learning_rate': 3.48e-05, 'epoch': 1.3}
{'loss': 0.1418, 'learning_rate': 3.58e-05, 'epoch': 1.33}
{'loss': 0.145, 'learning_rate': 3.68e-05, 'epoch': 1.37}
{'loss': 0.199, 'learning_rate': 3.7800000000000004e-05, 'epoch': 1.41}
{'loss': 0.1046, 'learning_rate': 3.8700000000000006e-05, 'epoch': 1.44}
{'loss': 0.2, 'learning_rate': 3.

{'eval_loss': 0.21893110871315002, 'eval_accuracy': 0.938945420906568, 'eval_f1': 0.9376181474480151, 'eval_runtime': 4.3811, 'eval_samples_per_second': 246.744, 'eval_steps_per_second': 1.141, 'epoch': 2.0}
{'loss': 0.0906, 'learning_rate': 4.981923076923078e-05, 'epoch': 2.04}
{'loss': 0.1025, 'learning_rate': 4.978076923076923e-05, 'epoch': 2.07}
{'loss': 0.0402, 'learning_rate': 4.9742307692307694e-05, 'epoch': 2.11}
{'loss': 0.1404, 'learning_rate': 4.9703846153846156e-05, 'epoch': 2.15}
{'loss': 0.1476, 'learning_rate': 4.966538461538462e-05, 'epoch': 2.19}
{'loss': 0.0916, 'learning_rate': 4.962692307692308e-05, 'epoch': 2.22}
{'loss': 0.1597, 'learning_rate': 4.9588461538461536e-05, 'epoch': 2.26}
{'loss': 0.1435, 'learning_rate': 4.955384615384616e-05, 'epoch': 2.3}
{'loss': 0.089, 'learning_rate': 4.951538461538462e-05, 'epoch': 2.33}
{'loss': 0.0609, 'learning_rate': 4.9476923076923084e-05, 'epoch': 2.37}
{'loss': 0.2076, 'learning_rate': 4.943846153846154e-05, 'epoch': 2.41

{'eval_loss': 0.3661186695098877, 'eval_accuracy': 0.9204440333024977, 'eval_f1': 0.9156862745098039, 'eval_runtime': 4.7769, 'eval_samples_per_second': 226.298, 'eval_steps_per_second': 1.047, 'epoch': 3.0}
{'loss': 0.1262, 'learning_rate': 4.878461538461539e-05, 'epoch': 3.04}
{'loss': 0.0672, 'learning_rate': 4.8746153846153845e-05, 'epoch': 3.07}
{'loss': 0.0363, 'learning_rate': 4.870769230769231e-05, 'epoch': 3.11}
{'loss': 0.0018, 'learning_rate': 4.8669230769230776e-05, 'epoch': 3.15}
{'loss': 0.0793, 'learning_rate': 4.863076923076924e-05, 'epoch': 3.19}
{'loss': 0.0297, 'learning_rate': 4.859230769230769e-05, 'epoch': 3.22}
{'loss': 0.0008, 'learning_rate': 4.8553846153846155e-05, 'epoch': 3.26}
{'loss': 0.077, 'learning_rate': 4.851538461538462e-05, 'epoch': 3.3}
{'loss': 0.0298, 'learning_rate': 4.847692307692308e-05, 'epoch': 3.33}
{'loss': 0.073, 'learning_rate': 4.8438461538461535e-05, 'epoch': 3.37}
{'loss': 0.0232, 'learning_rate': 4.8400000000000004e-05, 'epoch': 3.41

{'eval_loss': 0.28287768363952637, 'eval_accuracy': 0.9500462534690102, 'eval_f1': 0.9507299270072993, 'eval_runtime': 4.7593, 'eval_samples_per_second': 227.136, 'eval_steps_per_second': 1.051, 'epoch': 4.0}
{'loss': 0.0887, 'learning_rate': 4.774615384615385e-05, 'epoch': 4.04}
{'loss': 0.0005, 'learning_rate': 4.770769230769231e-05, 'epoch': 4.07}
{'loss': 0.0503, 'learning_rate': 4.766923076923077e-05, 'epoch': 4.11}
{'loss': 0.0009, 'learning_rate': 4.7630769230769234e-05, 'epoch': 4.15}
{'loss': 0.0005, 'learning_rate': 4.7592307692307696e-05, 'epoch': 4.19}
{'loss': 0.0529, 'learning_rate': 4.755384615384615e-05, 'epoch': 4.22}
{'loss': 0.1037, 'learning_rate': 4.751538461538462e-05, 'epoch': 4.26}
{'loss': 0.038, 'learning_rate': 4.747692307692308e-05, 'epoch': 4.3}
{'loss': 0.0171, 'learning_rate': 4.7438461538461545e-05, 'epoch': 4.33}
{'loss': 0.0474, 'learning_rate': 4.74e-05, 'epoch': 4.37}
{'loss': 0.0837, 'learning_rate': 4.736153846153846e-05, 'epoch': 4.41}
{'loss': 0.

{'eval_loss': 0.2491028755903244, 'eval_accuracy': 0.9426456984273821, 'eval_f1': 0.9419475655430711, 'eval_runtime': 4.6594, 'eval_samples_per_second': 232.006, 'eval_steps_per_second': 1.073, 'epoch': 5.0}
{'loss': 0.0042, 'learning_rate': 4.6707692307692306e-05, 'epoch': 5.04}
{'loss': 0.0013, 'learning_rate': 4.666923076923077e-05, 'epoch': 5.07}
{'loss': 0.0006, 'learning_rate': 4.663076923076924e-05, 'epoch': 5.11}
{'loss': 0.0138, 'learning_rate': 4.65923076923077e-05, 'epoch': 5.15}
{'loss': 0.0443, 'learning_rate': 4.6553846153846154e-05, 'epoch': 5.19}
{'loss': 0.0632, 'learning_rate': 4.6515384615384616e-05, 'epoch': 5.22}
{'loss': 0.0007, 'learning_rate': 4.647692307692308e-05, 'epoch': 5.26}
{'loss': 0.0007, 'learning_rate': 4.643846153846154e-05, 'epoch': 5.3}
{'loss': 0.0068, 'learning_rate': 4.64e-05, 'epoch': 5.33}
{'loss': 0.0491, 'learning_rate': 4.6361538461538465e-05, 'epoch': 5.37}
{'loss': 0.1069, 'learning_rate': 4.632307692307693e-05, 'epoch': 5.41}
{'loss': 0.

{'eval_loss': 0.2896479666233063, 'eval_accuracy': 0.9500462534690102, 'eval_f1': 0.9507299270072993, 'eval_runtime': 4.6362, 'eval_samples_per_second': 233.164, 'eval_steps_per_second': 1.078, 'epoch': 6.0}
{'loss': 0.0004, 'learning_rate': 4.5673076923076925e-05, 'epoch': 6.04}
{'loss': 0.0294, 'learning_rate': 4.563461538461539e-05, 'epoch': 6.07}
{'loss': 0.0001, 'learning_rate': 4.559615384615385e-05, 'epoch': 6.11}
{'loss': 0.0001, 'learning_rate': 4.555769230769231e-05, 'epoch': 6.15}
{'loss': 0.0289, 'learning_rate': 4.551923076923077e-05, 'epoch': 6.19}
{'loss': 0.0002, 'learning_rate': 4.548076923076923e-05, 'epoch': 6.22}
{'loss': 0.0001, 'learning_rate': 4.54423076923077e-05, 'epoch': 6.26}
{'loss': 0.0001, 'learning_rate': 4.540384615384616e-05, 'epoch': 6.3}
{'loss': 0.0001, 'learning_rate': 4.5365384615384616e-05, 'epoch': 6.33}
{'loss': 0.0192, 'learning_rate': 4.532692307692308e-05, 'epoch': 6.37}
{'loss': 0.0011, 'learning_rate': 4.528846153846154e-05, 'epoch': 6.41}


{'eval_loss': 0.2532671093940735, 'eval_accuracy': 0.9537465309898242, 'eval_f1': 0.9542124542124543, 'eval_runtime': 4.7434, 'eval_samples_per_second': 227.896, 'eval_steps_per_second': 1.054, 'epoch': 7.0}
{'loss': 0.0041, 'learning_rate': 4.463461538461538e-05, 'epoch': 7.04}
{'loss': 0.0003, 'learning_rate': 4.4596153846153845e-05, 'epoch': 7.07}
{'loss': 0.0429, 'learning_rate': 4.4557692307692314e-05, 'epoch': 7.11}
{'loss': 0.0002, 'learning_rate': 4.451923076923077e-05, 'epoch': 7.15}
{'loss': 0.0356, 'learning_rate': 4.448076923076923e-05, 'epoch': 7.19}
{'loss': 0.02, 'learning_rate': 4.4442307692307694e-05, 'epoch': 7.22}
{'loss': 0.0002, 'learning_rate': 4.4403846153846156e-05, 'epoch': 7.26}
{'loss': 0.0009, 'learning_rate': 4.436538461538462e-05, 'epoch': 7.3}
{'loss': 0.0001, 'learning_rate': 4.4326923076923074e-05, 'epoch': 7.33}
{'loss': 0.0002, 'learning_rate': 4.428846153846154e-05, 'epoch': 7.37}
{'loss': 0.0084, 'learning_rate': 4.4250000000000005e-05, 'epoch': 7.4

{'eval_loss': 0.380806565284729, 'eval_accuracy': 0.9426456984273821, 'eval_f1': 0.9418386491557222, 'eval_runtime': 4.9895, 'eval_samples_per_second': 216.656, 'eval_steps_per_second': 1.002, 'epoch': 8.0}
{'loss': 0.0137, 'learning_rate': 4.359615384615385e-05, 'epoch': 8.04}
{'loss': 0.0004, 'learning_rate': 4.355769230769231e-05, 'epoch': 8.07}
{'loss': 0.0002, 'learning_rate': 4.351923076923077e-05, 'epoch': 8.11}
{'loss': 0.0584, 'learning_rate': 4.348076923076923e-05, 'epoch': 8.15}
{'loss': 0.0134, 'learning_rate': 4.344230769230769e-05, 'epoch': 8.19}
{'loss': 0.0408, 'learning_rate': 4.340384615384616e-05, 'epoch': 8.22}
{'loss': 0.0323, 'learning_rate': 4.336538461538462e-05, 'epoch': 8.26}
{'loss': 0.0481, 'learning_rate': 4.3326923076923083e-05, 'epoch': 8.3}
{'loss': 0.0005, 'learning_rate': 4.328846153846154e-05, 'epoch': 8.33}
{'loss': 0.0001, 'learning_rate': 4.325e-05, 'epoch': 8.37}
{'loss': 0.0005, 'learning_rate': 4.321153846153846e-05, 'epoch': 8.41}
{'loss': 0.00

{'eval_loss': 0.28510457277297974, 'eval_accuracy': 0.9528214616096207, 'eval_f1': 0.952734012974977, 'eval_runtime': 4.7878, 'eval_samples_per_second': 225.783, 'eval_steps_per_second': 1.044, 'epoch': 9.0}
{'loss': 0.0002, 'learning_rate': 4.2557692307692306e-05, 'epoch': 9.04}
{'loss': 0.0437, 'learning_rate': 4.2519230769230775e-05, 'epoch': 9.07}
{'loss': 0.0004, 'learning_rate': 4.248076923076924e-05, 'epoch': 9.11}
{'loss': 0.0109, 'learning_rate': 4.244230769230769e-05, 'epoch': 9.15}
{'loss': 0.0001, 'learning_rate': 4.2403846153846155e-05, 'epoch': 9.19}
{'loss': 0.0001, 'learning_rate': 4.236538461538462e-05, 'epoch': 9.22}
{'loss': 0.0001, 'learning_rate': 4.232692307692308e-05, 'epoch': 9.26}
{'loss': 0.0793, 'learning_rate': 4.2288461538461535e-05, 'epoch': 9.3}
{'loss': 0.0529, 'learning_rate': 4.2250000000000004e-05, 'epoch': 9.33}
{'loss': 0.0356, 'learning_rate': 4.2211538461538466e-05, 'epoch': 9.37}
{'loss': 0.0013, 'learning_rate': 4.217307692307693e-05, 'epoch': 9

{'eval_loss': 0.4281562566757202, 'eval_accuracy': 0.9500462534690102, 'eval_f1': 0.9516994633273703, 'eval_runtime': 5.1322, 'eval_samples_per_second': 210.632, 'eval_steps_per_second': 0.974, 'epoch': 10.0}
{'loss': 0.0036, 'learning_rate': 4.151923076923077e-05, 'epoch': 10.04}
{'loss': 0.0, 'learning_rate': 4.1480769230769234e-05, 'epoch': 10.07}
{'loss': 0.0, 'learning_rate': 4.1442307692307696e-05, 'epoch': 10.11}
{'loss': 0.0, 'learning_rate': 4.140384615384615e-05, 'epoch': 10.15}
{'loss': 0.0, 'learning_rate': 4.136538461538462e-05, 'epoch': 10.19}
{'loss': 0.0, 'learning_rate': 4.132692307692308e-05, 'epoch': 10.22}
{'loss': 0.0, 'learning_rate': 4.1288461538461544e-05, 'epoch': 10.26}
{'loss': 0.0, 'learning_rate': 4.125e-05, 'epoch': 10.3}
{'loss': 0.0, 'learning_rate': 4.121153846153846e-05, 'epoch': 10.33}
{'loss': 0.0, 'learning_rate': 4.1173076923076924e-05, 'epoch': 10.37}
{'loss': 0.0, 'learning_rate': 4.1134615384615386e-05, 'epoch': 10.41}
{'loss': 0.0, 'learning_ra

{'eval_loss': 0.38532689213752747, 'eval_accuracy': 0.9546716003700277, 'eval_f1': 0.9557362240289069, 'eval_runtime': 4.3186, 'eval_samples_per_second': 250.313, 'eval_steps_per_second': 1.158, 'epoch': 11.0}
{'loss': 0.0, 'learning_rate': 4.0480769230769236e-05, 'epoch': 11.04}
{'loss': 0.0, 'learning_rate': 4.04423076923077e-05, 'epoch': 11.07}
{'loss': 0.0447, 'learning_rate': 4.0403846153846154e-05, 'epoch': 11.11}
{'loss': 0.0001, 'learning_rate': 4.0365384615384616e-05, 'epoch': 11.15}
{'loss': 0.0, 'learning_rate': 4.032692307692308e-05, 'epoch': 11.19}
{'loss': 0.0, 'learning_rate': 4.028846153846154e-05, 'epoch': 11.22}
{'loss': 0.0001, 'learning_rate': 4.025e-05, 'epoch': 11.26}
{'loss': 0.0, 'learning_rate': 4.0211538461538465e-05, 'epoch': 11.3}
{'loss': 0.0, 'learning_rate': 4.017307692307693e-05, 'epoch': 11.33}
{'loss': 0.0, 'learning_rate': 4.013461538461539e-05, 'epoch': 11.37}
{'loss': 0.0, 'learning_rate': 4.009615384615385e-05, 'epoch': 11.41}
{'loss': 0.0, 'learni

{'eval_loss': 0.4230538010597229, 'eval_accuracy': 0.9491211840888066, 'eval_f1': 0.950316169828365, 'eval_runtime': 4.5589, 'eval_samples_per_second': 237.119, 'eval_steps_per_second': 1.097, 'epoch': 12.0}
{'loss': 0.0, 'learning_rate': 3.9442307692307695e-05, 'epoch': 12.04}
{'loss': 0.0, 'learning_rate': 3.940384615384616e-05, 'epoch': 12.07}
{'loss': 0.0001, 'learning_rate': 3.936538461538461e-05, 'epoch': 12.11}
{'loss': 0.0014, 'learning_rate': 3.932692307692308e-05, 'epoch': 12.15}
{'loss': 0.0053, 'learning_rate': 3.928846153846154e-05, 'epoch': 12.19}
{'loss': 0.0, 'learning_rate': 3.9250000000000005e-05, 'epoch': 12.22}
{'loss': 0.0, 'learning_rate': 3.921153846153846e-05, 'epoch': 12.26}
{'loss': 0.0245, 'learning_rate': 3.917307692307692e-05, 'epoch': 12.3}
{'loss': 0.0, 'learning_rate': 3.9134615384615385e-05, 'epoch': 12.33}
{'loss': 0.0158, 'learning_rate': 3.909615384615385e-05, 'epoch': 12.37}
{'loss': 0.0, 'learning_rate': 3.905769230769231e-05, 'epoch': 12.41}
{'los

{'eval_loss': 0.42610037326812744, 'eval_accuracy': 0.9509713228492137, 'eval_f1': 0.951061865189289, 'eval_runtime': 4.095, 'eval_samples_per_second': 263.977, 'eval_steps_per_second': 1.221, 'epoch': 13.0}
{'loss': 0.0, 'learning_rate': 3.840384615384616e-05, 'epoch': 13.04}
{'loss': 0.0, 'learning_rate': 3.836538461538462e-05, 'epoch': 13.07}
{'loss': 0.0, 'learning_rate': 3.832692307692308e-05, 'epoch': 13.11}
{'loss': 0.0, 'learning_rate': 3.828846153846154e-05, 'epoch': 13.15}
{'loss': 0.0, 'learning_rate': 3.825e-05, 'epoch': 13.19}
{'loss': 0.0603, 'learning_rate': 3.8211538461538464e-05, 'epoch': 13.22}
{'loss': 0.0, 'learning_rate': 3.8173076923076926e-05, 'epoch': 13.26}
{'loss': 0.0, 'learning_rate': 3.813461538461539e-05, 'epoch': 13.3}
{'loss': 0.0, 'learning_rate': 3.809615384615385e-05, 'epoch': 13.33}
{'loss': 0.0, 'learning_rate': 3.805769230769231e-05, 'epoch': 13.37}
{'loss': 0.0, 'learning_rate': 3.8019230769230774e-05, 'epoch': 13.41}
{'loss': 0.0, 'learning_rate'

{'eval_loss': 0.453595906496048, 'eval_accuracy': 0.9444958371877891, 'eval_f1': 0.943502824858757, 'eval_runtime': 4.6835, 'eval_samples_per_second': 230.811, 'eval_steps_per_second': 1.068, 'epoch': 14.0}
{'loss': 0.0, 'learning_rate': 3.736538461538462e-05, 'epoch': 14.04}
{'loss': 0.0, 'learning_rate': 3.732692307692307e-05, 'epoch': 14.07}
{'loss': 0.0, 'learning_rate': 3.728846153846154e-05, 'epoch': 14.11}
{'loss': 0.0, 'learning_rate': 3.7250000000000004e-05, 'epoch': 14.15}
{'loss': 0.0, 'learning_rate': 3.7211538461538466e-05, 'epoch': 14.19}
{'loss': 0.0, 'learning_rate': 3.717307692307693e-05, 'epoch': 14.22}
{'loss': 0.0, 'learning_rate': 3.7134615384615384e-05, 'epoch': 14.26}
{'loss': 0.0, 'learning_rate': 3.7096153846153846e-05, 'epoch': 14.3}
{'loss': 0.0, 'learning_rate': 3.705769230769231e-05, 'epoch': 14.33}
{'loss': 0.0, 'learning_rate': 3.701923076923077e-05, 'epoch': 14.37}
{'loss': 0.0, 'learning_rate': 3.698076923076923e-05, 'epoch': 14.41}
{'loss': 0.0, 'learn

{'eval_loss': 0.43625548481941223, 'eval_accuracy': 0.9528214616096207, 'eval_f1': 0.9540954095409541, 'eval_runtime': 4.4139, 'eval_samples_per_second': 244.909, 'eval_steps_per_second': 1.133, 'epoch': 15.0}
{'loss': 0.0, 'learning_rate': 3.632692307692308e-05, 'epoch': 15.04}
{'loss': 0.0, 'learning_rate': 3.628846153846154e-05, 'epoch': 15.07}
{'loss': 0.0012, 'learning_rate': 3.625e-05, 'epoch': 15.11}
{'loss': 0.0074, 'learning_rate': 3.621153846153846e-05, 'epoch': 15.15}
{'loss': 0.0, 'learning_rate': 3.6173076923076925e-05, 'epoch': 15.19}
{'loss': 0.0664, 'learning_rate': 3.613461538461539e-05, 'epoch': 15.22}
{'loss': 0.0495, 'learning_rate': 3.609615384615385e-05, 'epoch': 15.26}
{'loss': 0.098, 'learning_rate': 3.605769230769231e-05, 'epoch': 15.3}
{'loss': 0.0463, 'learning_rate': 3.601923076923077e-05, 'epoch': 15.33}
{'loss': 0.0003, 'learning_rate': 3.5980769230769235e-05, 'epoch': 15.37}
{'loss': 0.0002, 'learning_rate': 3.594230769230769e-05, 'epoch': 15.41}
{'loss':

{'eval_loss': 0.4173656702041626, 'eval_accuracy': 0.9500462534690102, 'eval_f1': 0.9509090909090909, 'eval_runtime': 4.4133, 'eval_samples_per_second': 244.939, 'eval_steps_per_second': 1.133, 'epoch': 16.0}
{'loss': 0.0, 'learning_rate': 3.528846153846154e-05, 'epoch': 16.04}
{'loss': 0.0, 'learning_rate': 3.525e-05, 'epoch': 16.07}
{'loss': 0.0002, 'learning_rate': 3.5211538461538465e-05, 'epoch': 16.11}
{'loss': 0.0, 'learning_rate': 3.517307692307693e-05, 'epoch': 16.15}
{'loss': 0.0, 'learning_rate': 3.513461538461539e-05, 'epoch': 16.19}
{'loss': 0.0, 'learning_rate': 3.5096153846153845e-05, 'epoch': 16.22}
{'loss': 0.0, 'learning_rate': 3.505769230769231e-05, 'epoch': 16.26}
{'loss': 0.0165, 'learning_rate': 3.501923076923077e-05, 'epoch': 16.3}
{'loss': 0.0659, 'learning_rate': 3.498076923076923e-05, 'epoch': 16.33}
{'loss': 0.0477, 'learning_rate': 3.4942307692307694e-05, 'epoch': 16.37}
{'loss': 0.028, 'learning_rate': 3.4903846153846156e-05, 'epoch': 16.41}
{'loss': 0.0002,

{'eval_loss': 0.3226091265678406, 'eval_accuracy': 0.9417206290471786, 'eval_f1': 0.9417206290471785, 'eval_runtime': 4.3864, 'eval_samples_per_second': 246.446, 'eval_steps_per_second': 1.14, 'epoch': 17.0}
{'loss': 0.0189, 'learning_rate': 3.4250000000000006e-05, 'epoch': 17.04}
{'loss': 0.0324, 'learning_rate': 3.421153846153846e-05, 'epoch': 17.07}
{'loss': 0.0001, 'learning_rate': 3.417307692307692e-05, 'epoch': 17.11}
{'loss': 0.0001, 'learning_rate': 3.4134615384615386e-05, 'epoch': 17.15}
{'loss': 0.0218, 'learning_rate': 3.409615384615385e-05, 'epoch': 17.19}
{'loss': 0.0002, 'learning_rate': 3.405769230769231e-05, 'epoch': 17.22}
{'loss': 0.0, 'learning_rate': 3.401923076923077e-05, 'epoch': 17.26}
{'loss': 0.0001, 'learning_rate': 3.3980769230769234e-05, 'epoch': 17.3}
{'loss': 0.0001, 'learning_rate': 3.3942307692307696e-05, 'epoch': 17.33}
{'loss': 0.0, 'learning_rate': 3.390384615384615e-05, 'epoch': 17.37}
{'loss': 0.0001, 'learning_rate': 3.3865384615384614e-05, 'epoch'

{'eval_loss': 0.4134155511856079, 'eval_accuracy': 0.9509713228492137, 'eval_f1': 0.9521228545618788, 'eval_runtime': 4.4244, 'eval_samples_per_second': 244.328, 'eval_steps_per_second': 1.13, 'epoch': 18.0}
{'loss': 0.0319, 'learning_rate': 3.321153846153846e-05, 'epoch': 18.04}
{'loss': 0.0004, 'learning_rate': 3.3173076923076926e-05, 'epoch': 18.07}
{'loss': 0.0138, 'learning_rate': 3.313461538461539e-05, 'epoch': 18.11}
{'loss': 0.0, 'learning_rate': 3.309615384615385e-05, 'epoch': 18.15}
{'loss': 0.0034, 'learning_rate': 3.305769230769231e-05, 'epoch': 18.19}
{'loss': 0.0385, 'learning_rate': 3.301923076923077e-05, 'epoch': 18.22}
{'loss': 0.0134, 'learning_rate': 3.298076923076923e-05, 'epoch': 18.26}
{'loss': 0.0687, 'learning_rate': 3.294230769230769e-05, 'epoch': 18.3}
{'loss': 0.0241, 'learning_rate': 3.2903846153846155e-05, 'epoch': 18.33}
{'loss': 0.0002, 'learning_rate': 3.286538461538462e-05, 'epoch': 18.37}
{'loss': 0.0183, 'learning_rate': 3.282692307692308e-05, 'epoch'

{'eval_loss': 0.35416364669799805, 'eval_accuracy': 0.9565217391304348, 'eval_f1': 0.9566020313942751, 'eval_runtime': 4.8517, 'eval_samples_per_second': 222.808, 'eval_steps_per_second': 1.031, 'epoch': 19.0}
{'loss': 0.0001, 'learning_rate': 3.217307692307692e-05, 'epoch': 19.04}
{'loss': 0.0024, 'learning_rate': 3.2134615384615384e-05, 'epoch': 19.07}
{'loss': 0.0551, 'learning_rate': 3.2096153846153847e-05, 'epoch': 19.11}
{'loss': 0.0001, 'learning_rate': 3.205769230769231e-05, 'epoch': 19.15}
{'loss': 0.0003, 'learning_rate': 3.201923076923077e-05, 'epoch': 19.19}
{'loss': 0.0001, 'learning_rate': 3.198076923076923e-05, 'epoch': 19.22}
{'loss': 0.0764, 'learning_rate': 3.1942307692307695e-05, 'epoch': 19.26}
{'loss': 0.0419, 'learning_rate': 3.190384615384616e-05, 'epoch': 19.3}
{'loss': 0.0168, 'learning_rate': 3.186538461538462e-05, 'epoch': 19.33}
{'loss': 0.0008, 'learning_rate': 3.1826923076923075e-05, 'epoch': 19.37}
{'loss': 0.0003, 'learning_rate': 3.178846153846154e-05, 

{'eval_loss': 0.3513902723789215, 'eval_accuracy': 0.9481961147086031, 'eval_f1': 0.9477611940298507, 'eval_runtime': 4.8045, 'eval_samples_per_second': 224.997, 'eval_steps_per_second': 1.041, 'epoch': 20.0}
{'loss': 0.001, 'learning_rate': 3.113461538461539e-05, 'epoch': 20.04}
{'loss': 0.0247, 'learning_rate': 3.109615384615385e-05, 'epoch': 20.07}
{'loss': 0.0002, 'learning_rate': 3.105769230769231e-05, 'epoch': 20.11}
{'loss': 0.0002, 'learning_rate': 3.1019230769230774e-05, 'epoch': 20.15}
{'loss': 0.0001, 'learning_rate': 3.098076923076923e-05, 'epoch': 20.19}
{'loss': 0.0001, 'learning_rate': 3.094230769230769e-05, 'epoch': 20.22}
{'loss': 0.0001, 'learning_rate': 3.090384615384615e-05, 'epoch': 20.26}
{'loss': 0.0001, 'learning_rate': 3.0865384615384616e-05, 'epoch': 20.3}
{'loss': 0.0599, 'learning_rate': 3.082692307692308e-05, 'epoch': 20.33}
{'loss': 0.0002, 'learning_rate': 3.078846153846154e-05, 'epoch': 20.37}
{'loss': 0.0846, 'learning_rate': 3.075e-05, 'epoch': 20.41}


{'eval_loss': 0.3856884837150574, 'eval_accuracy': 0.9537465309898242, 'eval_f1': 0.9541284403669723, 'eval_runtime': 4.3156, 'eval_samples_per_second': 250.486, 'eval_steps_per_second': 1.159, 'epoch': 21.0}
{'loss': 0.0001, 'learning_rate': 3.0096153846153845e-05, 'epoch': 21.04}
{'loss': 0.0001, 'learning_rate': 3.0057692307692308e-05, 'epoch': 21.07}
{'loss': 0.0001, 'learning_rate': 3.0019230769230773e-05, 'epoch': 21.11}
{'loss': 0.0002, 'learning_rate': 2.9980769230769235e-05, 'epoch': 21.15}
{'loss': 0.0001, 'learning_rate': 2.994230769230769e-05, 'epoch': 21.19}
{'loss': 0.0001, 'learning_rate': 2.9903846153846156e-05, 'epoch': 21.22}
{'loss': 0.005, 'learning_rate': 2.986538461538462e-05, 'epoch': 21.26}
{'loss': 0.0017, 'learning_rate': 2.982692307692308e-05, 'epoch': 21.3}
{'loss': 0.0001, 'learning_rate': 2.9788461538461536e-05, 'epoch': 21.33}
{'loss': 0.0001, 'learning_rate': 2.975e-05, 'epoch': 21.37}
{'loss': 0.0001, 'learning_rate': 2.9711538461538464e-05, 'epoch': 21

{'eval_loss': 0.34853070974349976, 'eval_accuracy': 0.9454209065679926, 'eval_f1': 0.9456221198156681, 'eval_runtime': 4.6933, 'eval_samples_per_second': 230.33, 'eval_steps_per_second': 1.065, 'epoch': 22.0}
{'loss': 0.0002, 'learning_rate': 2.9057692307692307e-05, 'epoch': 22.04}
{'loss': 0.0002, 'learning_rate': 2.901923076923077e-05, 'epoch': 22.07}
{'loss': 0.0002, 'learning_rate': 2.8980769230769235e-05, 'epoch': 22.11}
{'loss': 0.0002, 'learning_rate': 2.8942307692307697e-05, 'epoch': 22.15}
{'loss': 0.0001, 'learning_rate': 2.8903846153846152e-05, 'epoch': 22.19}
{'loss': 0.0001, 'learning_rate': 2.8865384615384618e-05, 'epoch': 22.22}
{'loss': 0.0001, 'learning_rate': 2.882692307692308e-05, 'epoch': 22.26}
{'loss': 0.0001, 'learning_rate': 2.8788461538461542e-05, 'epoch': 22.3}
{'loss': 0.0511, 'learning_rate': 2.8749999999999997e-05, 'epoch': 22.33}
{'loss': 0.0053, 'learning_rate': 2.8711538461538463e-05, 'epoch': 22.37}
{'loss': 0.0003, 'learning_rate': 2.8673076923076925e-

{'eval_loss': 0.3966580927371979, 'eval_accuracy': 0.9454209065679926, 'eval_f1': 0.9454209065679925, 'eval_runtime': 4.5869, 'eval_samples_per_second': 235.669, 'eval_steps_per_second': 1.09, 'epoch': 23.0}
{'loss': 0.0001, 'learning_rate': 2.801923076923077e-05, 'epoch': 23.04}
{'loss': 0.0001, 'learning_rate': 2.7980769230769234e-05, 'epoch': 23.07}
{'loss': 0.0002, 'learning_rate': 2.7942307692307696e-05, 'epoch': 23.11}
{'loss': 0.0001, 'learning_rate': 2.790384615384615e-05, 'epoch': 23.15}
{'loss': 0.0001, 'learning_rate': 2.7865384615384614e-05, 'epoch': 23.19}
{'loss': 0.0, 'learning_rate': 2.782692307692308e-05, 'epoch': 23.22}
{'loss': 0.0001, 'learning_rate': 2.778846153846154e-05, 'epoch': 23.26}
{'loss': 0.0001, 'learning_rate': 2.7750000000000004e-05, 'epoch': 23.3}
{'loss': 0.0, 'learning_rate': 2.7711538461538462e-05, 'epoch': 23.33}
{'loss': 0.0, 'learning_rate': 2.7673076923076925e-05, 'epoch': 23.37}
{'loss': 0.0, 'learning_rate': 2.7634615384615387e-05, 'epoch': 23

{'eval_loss': 0.44649025797843933, 'eval_accuracy': 0.9481961147086031, 'eval_f1': 0.9477611940298507, 'eval_runtime': 4.2101, 'eval_samples_per_second': 256.763, 'eval_steps_per_second': 1.188, 'epoch': 24.0}
{'loss': 0.0, 'learning_rate': 2.698076923076923e-05, 'epoch': 24.04}
{'loss': 0.0, 'learning_rate': 2.6942307692307696e-05, 'epoch': 24.07}
{'loss': 0.0, 'learning_rate': 2.6903846153846158e-05, 'epoch': 24.11}
{'loss': 0.0, 'learning_rate': 2.6865384615384613e-05, 'epoch': 24.15}
{'loss': 0.0, 'learning_rate': 2.682692307692308e-05, 'epoch': 24.19}
{'loss': 0.0, 'learning_rate': 2.678846153846154e-05, 'epoch': 24.22}
{'loss': 0.0, 'learning_rate': 2.6750000000000003e-05, 'epoch': 24.26}
{'loss': 0.0, 'learning_rate': 2.671153846153846e-05, 'epoch': 24.3}
{'loss': 0.0, 'learning_rate': 2.6673076923076924e-05, 'epoch': 24.33}
{'loss': 0.0172, 'learning_rate': 2.6634615384615386e-05, 'epoch': 24.37}
{'loss': 0.0, 'learning_rate': 2.659615384615385e-05, 'epoch': 24.41}
{'loss': 0.0

{'eval_loss': 0.4879067540168762, 'eval_accuracy': 0.9463459759481961, 'eval_f1': 0.9468864468864469, 'eval_runtime': 4.5282, 'eval_samples_per_second': 238.726, 'eval_steps_per_second': 1.104, 'epoch': 25.0}
{'loss': 0.0, 'learning_rate': 2.5942307692307695e-05, 'epoch': 25.04}
{'loss': 0.0, 'learning_rate': 2.5903846153846157e-05, 'epoch': 25.07}
{'loss': 0.0, 'learning_rate': 2.586538461538462e-05, 'epoch': 25.11}
{'loss': 0.0001, 'learning_rate': 2.5826923076923075e-05, 'epoch': 25.15}
{'loss': 0.0, 'learning_rate': 2.578846153846154e-05, 'epoch': 25.19}
{'loss': 0.0, 'learning_rate': 2.5750000000000002e-05, 'epoch': 25.22}
{'loss': 0.0, 'learning_rate': 2.5711538461538465e-05, 'epoch': 25.26}
{'loss': 0.0, 'learning_rate': 2.5673076923076923e-05, 'epoch': 25.3}
{'loss': 0.0, 'learning_rate': 2.5634615384615386e-05, 'epoch': 25.33}
{'loss': 0.0, 'learning_rate': 2.5596153846153848e-05, 'epoch': 25.37}
{'loss': 0.0, 'learning_rate': 2.555769230769231e-05, 'epoch': 25.41}
{'loss': 0.

{'eval_loss': 0.5003106594085693, 'eval_accuracy': 0.9472710453283997, 'eval_f1': 0.9470752089136492, 'eval_runtime': 4.3197, 'eval_samples_per_second': 250.25, 'eval_steps_per_second': 1.157, 'epoch': 26.0}
{'loss': 0.0, 'learning_rate': 2.4903846153846157e-05, 'epoch': 26.04}
{'loss': 0.0, 'learning_rate': 2.4865384615384615e-05, 'epoch': 26.07}
{'loss': 0.0, 'learning_rate': 2.4826923076923078e-05, 'epoch': 26.11}
{'loss': 0.0, 'learning_rate': 2.478846153846154e-05, 'epoch': 26.15}
{'loss': 0.0, 'learning_rate': 2.4750000000000002e-05, 'epoch': 26.19}
{'loss': 0.0, 'learning_rate': 2.4711538461538464e-05, 'epoch': 26.22}
{'loss': 0.0, 'learning_rate': 2.4673076923076923e-05, 'epoch': 26.26}
{'loss': 0.0, 'learning_rate': 2.4634615384615385e-05, 'epoch': 26.3}
{'loss': 0.0, 'learning_rate': 2.4596153846153847e-05, 'epoch': 26.33}
{'loss': 0.0, 'learning_rate': 2.455769230769231e-05, 'epoch': 26.37}
{'loss': 0.0, 'learning_rate': 2.4519230769230768e-05, 'epoch': 26.41}
{'loss': 0.0, 

{'eval_loss': 0.48671725392341614, 'eval_accuracy': 0.9463459759481961, 'eval_f1': 0.9461966604823748, 'eval_runtime': 4.2227, 'eval_samples_per_second': 255.999, 'eval_steps_per_second': 1.184, 'epoch': 27.0}
{'loss': 0.0, 'learning_rate': 2.3865384615384618e-05, 'epoch': 27.04}
{'loss': 0.0531, 'learning_rate': 2.3826923076923077e-05, 'epoch': 27.07}
{'loss': 0.0, 'learning_rate': 2.378846153846154e-05, 'epoch': 27.11}
{'loss': 0.0, 'learning_rate': 2.375e-05, 'epoch': 27.15}
{'loss': 0.0, 'learning_rate': 2.3711538461538463e-05, 'epoch': 27.19}
{'loss': 0.0, 'learning_rate': 2.3673076923076926e-05, 'epoch': 27.22}
{'loss': 0.0, 'learning_rate': 2.3634615384615384e-05, 'epoch': 27.26}
{'loss': 0.0, 'learning_rate': 2.359615384615385e-05, 'epoch': 27.3}
{'loss': 0.0, 'learning_rate': 2.355769230769231e-05, 'epoch': 27.33}
{'loss': 0.0, 'learning_rate': 2.351923076923077e-05, 'epoch': 27.37}
{'loss': 0.0, 'learning_rate': 2.348076923076923e-05, 'epoch': 27.41}
{'loss': 0.0, 'learning_r

{'eval_loss': 0.44085076451301575, 'eval_accuracy': 0.9481961147086031, 'eval_f1': 0.948339483394834, 'eval_runtime': 4.4877, 'eval_samples_per_second': 240.882, 'eval_steps_per_second': 1.114, 'epoch': 28.0}
{'loss': 0.0, 'learning_rate': 2.283076923076923e-05, 'epoch': 28.04}
{'loss': 0.0, 'learning_rate': 2.2792307692307693e-05, 'epoch': 28.07}
{'loss': 0.0415, 'learning_rate': 2.2753846153846155e-05, 'epoch': 28.11}
{'loss': 0.0, 'learning_rate': 2.2715384615384617e-05, 'epoch': 28.15}
{'loss': 0.0001, 'learning_rate': 2.267692307692308e-05, 'epoch': 28.19}
{'loss': 0.0001, 'learning_rate': 2.2638461538461538e-05, 'epoch': 28.22}
{'loss': 0.0, 'learning_rate': 2.26e-05, 'epoch': 28.26}
{'loss': 0.0, 'learning_rate': 2.2561538461538463e-05, 'epoch': 28.3}
{'loss': 0.0003, 'learning_rate': 2.2523076923076925e-05, 'epoch': 28.33}
{'loss': 0.0, 'learning_rate': 2.2484615384615384e-05, 'epoch': 28.37}
{'loss': 0.0, 'learning_rate': 2.2446153846153846e-05, 'epoch': 28.41}
{'loss': 0.0393

{'eval_loss': 0.44591426849365234, 'eval_accuracy': 0.9481961147086031, 'eval_f1': 0.9488117001828154, 'eval_runtime': 4.3575, 'eval_samples_per_second': 248.08, 'eval_steps_per_second': 1.147, 'epoch': 29.0}
{'loss': 0.0001, 'learning_rate': 2.1792307692307692e-05, 'epoch': 29.04}
{'loss': 0.0, 'learning_rate': 2.1753846153846155e-05, 'epoch': 29.07}
{'loss': 0.0663, 'learning_rate': 2.1715384615384617e-05, 'epoch': 29.11}
{'loss': 0.0, 'learning_rate': 2.167692307692308e-05, 'epoch': 29.15}
{'loss': 0.0045, 'learning_rate': 2.163846153846154e-05, 'epoch': 29.19}
{'loss': 0.0009, 'learning_rate': 2.16e-05, 'epoch': 29.22}
{'loss': 0.0, 'learning_rate': 2.1561538461538462e-05, 'epoch': 29.26}
{'loss': 0.0, 'learning_rate': 2.1523076923076924e-05, 'epoch': 29.3}
{'loss': 0.0, 'learning_rate': 2.1484615384615386e-05, 'epoch': 29.33}
{'loss': 0.0, 'learning_rate': 2.1446153846153845e-05, 'epoch': 29.37}
{'loss': 0.0001, 'learning_rate': 2.140769230769231e-05, 'epoch': 29.41}
{'loss': 0.00

{'eval_loss': 0.47858795523643494, 'eval_accuracy': 0.9463459759481961, 'eval_f1': 0.9463955637707948, 'eval_runtime': 4.5062, 'eval_samples_per_second': 239.891, 'eval_steps_per_second': 1.11, 'epoch': 30.0}
{'loss': 0.0133, 'learning_rate': 2.0753846153846154e-05, 'epoch': 30.04}
{'loss': 0.0, 'learning_rate': 2.0715384615384616e-05, 'epoch': 30.07}
{'loss': 0.0, 'learning_rate': 2.067692307692308e-05, 'epoch': 30.11}
{'loss': 0.0, 'learning_rate': 2.063846153846154e-05, 'epoch': 30.15}
{'loss': 0.0, 'learning_rate': 2.06e-05, 'epoch': 30.19}
{'loss': 0.0, 'learning_rate': 2.056153846153846e-05, 'epoch': 30.22}
{'loss': 0.0, 'learning_rate': 2.0523076923076927e-05, 'epoch': 30.26}
{'loss': 0.0075, 'learning_rate': 2.0484615384615386e-05, 'epoch': 30.3}
{'loss': 0.0, 'learning_rate': 2.0446153846153848e-05, 'epoch': 30.33}
{'loss': 0.0, 'learning_rate': 2.0407692307692307e-05, 'epoch': 30.37}
{'loss': 0.0989, 'learning_rate': 2.0369230769230772e-05, 'epoch': 30.41}
{'loss': 0.0, 'lear

{'eval_loss': 0.47502198815345764, 'eval_accuracy': 0.9491211840888066, 'eval_f1': 0.9494949494949495, 'eval_runtime': 4.3724, 'eval_samples_per_second': 247.233, 'eval_steps_per_second': 1.144, 'epoch': 31.0}
{'loss': 0.0, 'learning_rate': 1.9715384615384616e-05, 'epoch': 31.04}
{'loss': 0.0, 'learning_rate': 1.9676923076923078e-05, 'epoch': 31.07}
{'loss': 0.0, 'learning_rate': 1.963846153846154e-05, 'epoch': 31.11}
{'loss': 0.0, 'learning_rate': 1.9600000000000002e-05, 'epoch': 31.15}
{'loss': 0.0, 'learning_rate': 1.956153846153846e-05, 'epoch': 31.19}
{'loss': 0.0, 'learning_rate': 1.9523076923076923e-05, 'epoch': 31.22}
{'loss': 0.0, 'learning_rate': 1.9484615384615385e-05, 'epoch': 31.26}
{'loss': 0.0, 'learning_rate': 1.9446153846153847e-05, 'epoch': 31.3}
{'loss': 0.0, 'learning_rate': 1.9407692307692306e-05, 'epoch': 31.33}
{'loss': 0.0, 'learning_rate': 1.936923076923077e-05, 'epoch': 31.37}
{'loss': 0.0, 'learning_rate': 1.9330769230769234e-05, 'epoch': 31.41}
{'loss': 0.0,

{'eval_loss': 0.4974527955055237, 'eval_accuracy': 0.9500462534690102, 'eval_f1': 0.9500924214417745, 'eval_runtime': 5.1642, 'eval_samples_per_second': 209.325, 'eval_steps_per_second': 0.968, 'epoch': 32.0}
{'loss': 0.0, 'learning_rate': 1.8676923076923077e-05, 'epoch': 32.04}
{'loss': 0.0, 'learning_rate': 1.863846153846154e-05, 'epoch': 32.07}
{'loss': 0.0, 'learning_rate': 1.86e-05, 'epoch': 32.11}
{'loss': 0.0, 'learning_rate': 1.8561538461538464e-05, 'epoch': 32.15}
{'loss': 0.0, 'learning_rate': 1.8523076923076922e-05, 'epoch': 32.19}
{'loss': 0.0, 'learning_rate': 1.8484615384615388e-05, 'epoch': 32.22}
{'loss': 0.0, 'learning_rate': 1.8446153846153847e-05, 'epoch': 32.26}
{'loss': 0.0, 'learning_rate': 1.840769230769231e-05, 'epoch': 32.3}
{'loss': 0.0, 'learning_rate': 1.8369230769230768e-05, 'epoch': 32.33}
{'loss': 0.0, 'learning_rate': 1.8330769230769233e-05, 'epoch': 32.37}
{'loss': 0.0, 'learning_rate': 1.8292307692307692e-05, 'epoch': 32.41}
{'loss': 0.0, 'learning_rat

{'eval_loss': 0.5129739046096802, 'eval_accuracy': 0.9491211840888066, 'eval_f1': 0.9492151431209602, 'eval_runtime': 4.7608, 'eval_samples_per_second': 227.064, 'eval_steps_per_second': 1.05, 'epoch': 33.0}
{'loss': 0.0, 'learning_rate': 1.763846153846154e-05, 'epoch': 33.04}
{'loss': 0.0, 'learning_rate': 1.76e-05, 'epoch': 33.07}
{'loss': 0.0, 'learning_rate': 1.7561538461538463e-05, 'epoch': 33.11}
{'loss': 0.0, 'learning_rate': 1.7523076923076922e-05, 'epoch': 33.15}
{'loss': 0.0, 'learning_rate': 1.7484615384615384e-05, 'epoch': 33.19}
{'loss': 0.0, 'learning_rate': 1.744615384615385e-05, 'epoch': 33.22}
{'loss': 0.0, 'learning_rate': 1.740769230769231e-05, 'epoch': 33.26}
{'loss': 0.0, 'learning_rate': 1.736923076923077e-05, 'epoch': 33.3}
{'loss': 0.0, 'learning_rate': 1.733076923076923e-05, 'epoch': 33.33}
{'loss': 0.0, 'learning_rate': 1.7292307692307695e-05, 'epoch': 33.37}
{'loss': 0.0, 'learning_rate': 1.7253846153846154e-05, 'epoch': 33.41}
{'loss': 0.0, 'learning_rate': 

{'eval_loss': 0.520006537437439, 'eval_accuracy': 0.9500462534690102, 'eval_f1': 0.9501845018450185, 'eval_runtime': 4.8417, 'eval_samples_per_second': 223.269, 'eval_steps_per_second': 1.033, 'epoch': 34.0}
{'loss': 0.0, 'learning_rate': 1.66e-05, 'epoch': 34.04}
{'loss': 0.0, 'learning_rate': 1.6561538461538462e-05, 'epoch': 34.07}
{'loss': 0.0, 'learning_rate': 1.6523076923076925e-05, 'epoch': 34.11}
{'loss': 0.0, 'learning_rate': 1.6484615384615383e-05, 'epoch': 34.15}
{'loss': 0.0, 'learning_rate': 1.6446153846153846e-05, 'epoch': 34.19}
{'loss': 0.0, 'learning_rate': 1.6407692307692308e-05, 'epoch': 34.22}
{'loss': 0.0, 'learning_rate': 1.636923076923077e-05, 'epoch': 34.26}
{'loss': 0.0, 'learning_rate': 1.6330769230769232e-05, 'epoch': 34.3}
{'loss': 0.0, 'learning_rate': 1.6292307692307694e-05, 'epoch': 34.33}
{'loss': 0.0, 'learning_rate': 1.6253846153846156e-05, 'epoch': 34.37}
{'loss': 0.0, 'learning_rate': 1.6215384615384615e-05, 'epoch': 34.41}
{'loss': 0.0, 'learning_rat

{'eval_loss': 0.5336973071098328, 'eval_accuracy': 0.9491211840888066, 'eval_f1': 0.9493087557603687, 'eval_runtime': 4.8436, 'eval_samples_per_second': 223.179, 'eval_steps_per_second': 1.032, 'epoch': 35.0}
{'loss': 0.0, 'learning_rate': 1.5561538461538462e-05, 'epoch': 35.04}
{'loss': 0.0, 'learning_rate': 1.5523076923076924e-05, 'epoch': 35.07}
{'loss': 0.0, 'learning_rate': 1.5484615384615386e-05, 'epoch': 35.11}
{'loss': 0.0, 'learning_rate': 1.5446153846153845e-05, 'epoch': 35.15}
{'loss': 0.0, 'learning_rate': 1.540769230769231e-05, 'epoch': 35.19}
{'loss': 0.0, 'learning_rate': 1.536923076923077e-05, 'epoch': 35.22}
{'loss': 0.0, 'learning_rate': 1.533076923076923e-05, 'epoch': 35.26}
{'loss': 0.0, 'learning_rate': 1.529230769230769e-05, 'epoch': 35.3}
{'loss': 0.0, 'learning_rate': 1.5253846153846154e-05, 'epoch': 35.33}
{'loss': 0.0, 'learning_rate': 1.5215384615384618e-05, 'epoch': 35.37}
{'loss': 0.0, 'learning_rate': 1.5176923076923077e-05, 'epoch': 35.41}
{'loss': 0.0, '

{'eval_loss': 0.5403782725334167, 'eval_accuracy': 0.9491211840888066, 'eval_f1': 0.9493087557603687, 'eval_runtime': 4.7436, 'eval_samples_per_second': 227.887, 'eval_steps_per_second': 1.054, 'epoch': 36.0}
{'loss': 0.0, 'learning_rate': 1.4523076923076922e-05, 'epoch': 36.04}
{'loss': 0.0, 'learning_rate': 1.4484615384615386e-05, 'epoch': 36.07}
{'loss': 0.0, 'learning_rate': 1.4446153846153848e-05, 'epoch': 36.11}
{'loss': 0.0, 'learning_rate': 1.4407692307692308e-05, 'epoch': 36.15}
{'loss': 0.0, 'learning_rate': 1.436923076923077e-05, 'epoch': 36.19}
{'loss': 0.0, 'learning_rate': 1.4330769230769231e-05, 'epoch': 36.22}
{'loss': 0.0, 'learning_rate': 1.4292307692307693e-05, 'epoch': 36.26}
{'loss': 0.0, 'learning_rate': 1.4253846153846154e-05, 'epoch': 36.3}
{'loss': 0.0, 'learning_rate': 1.4215384615384616e-05, 'epoch': 36.33}
{'loss': 0.0, 'learning_rate': 1.4176923076923076e-05, 'epoch': 36.37}
{'loss': 0.0, 'learning_rate': 1.413846153846154e-05, 'epoch': 36.41}
{'loss': 0.0,

{'eval_loss': 0.5467534065246582, 'eval_accuracy': 0.9491211840888066, 'eval_f1': 0.9493087557603687, 'eval_runtime': 4.6913, 'eval_samples_per_second': 230.427, 'eval_steps_per_second': 1.066, 'epoch': 37.0}
{'loss': 0.0, 'learning_rate': 1.3484615384615385e-05, 'epoch': 37.04}
{'loss': 0.0, 'learning_rate': 1.3446153846153847e-05, 'epoch': 37.07}
{'loss': 0.0, 'learning_rate': 1.3407692307692308e-05, 'epoch': 37.11}
{'loss': 0.0, 'learning_rate': 1.336923076923077e-05, 'epoch': 37.15}
{'loss': 0.0, 'learning_rate': 1.3330769230769232e-05, 'epoch': 37.19}
{'loss': 0.0, 'learning_rate': 1.3292307692307692e-05, 'epoch': 37.22}
{'loss': 0.0, 'learning_rate': 1.3253846153846156e-05, 'epoch': 37.26}
{'loss': 0.0, 'learning_rate': 1.3215384615384615e-05, 'epoch': 37.3}
{'loss': 0.0, 'learning_rate': 1.3176923076923079e-05, 'epoch': 37.33}
{'loss': 0.0, 'learning_rate': 1.3138461538461538e-05, 'epoch': 37.37}
{'loss': 0.0, 'learning_rate': 1.3100000000000002e-05, 'epoch': 37.41}
{'loss': 0.0

{'eval_loss': 0.5528544783592224, 'eval_accuracy': 0.9491211840888066, 'eval_f1': 0.9493087557603687, 'eval_runtime': 4.863, 'eval_samples_per_second': 222.289, 'eval_steps_per_second': 1.028, 'epoch': 38.0}
{'loss': 0.0, 'learning_rate': 1.2450000000000001e-05, 'epoch': 38.04}
{'loss': 0.0, 'learning_rate': 1.2411538461538462e-05, 'epoch': 38.07}
{'loss': 0.0, 'learning_rate': 1.2373076923076924e-05, 'epoch': 38.11}
{'loss': 0.0, 'learning_rate': 1.2334615384615384e-05, 'epoch': 38.15}
{'loss': 0.0, 'learning_rate': 1.2296153846153846e-05, 'epoch': 38.19}
{'loss': 0.0, 'learning_rate': 1.2257692307692309e-05, 'epoch': 38.22}
{'loss': 0.0, 'learning_rate': 1.221923076923077e-05, 'epoch': 38.26}
{'loss': 0.0, 'learning_rate': 1.2180769230769231e-05, 'epoch': 38.3}
{'loss': 0.0567, 'learning_rate': 1.2142307692307693e-05, 'epoch': 38.33}
{'loss': 0.0, 'learning_rate': 1.2103846153846154e-05, 'epoch': 38.37}
{'loss': 0.0, 'learning_rate': 1.2065384615384616e-05, 'epoch': 38.41}
{'loss': 0

{'eval_loss': 0.5347281694412231, 'eval_accuracy': 0.9500462534690102, 'eval_f1': 0.9502762430939228, 'eval_runtime': 4.897, 'eval_samples_per_second': 220.748, 'eval_steps_per_second': 1.021, 'epoch': 39.0}
{'loss': 0.0, 'learning_rate': 1.1411538461538463e-05, 'epoch': 39.04}
{'loss': 0.0, 'learning_rate': 1.1373076923076923e-05, 'epoch': 39.07}
{'loss': 0.0, 'learning_rate': 1.1334615384615385e-05, 'epoch': 39.11}
{'loss': 0.0, 'learning_rate': 1.1296153846153846e-05, 'epoch': 39.15}
{'loss': 0.0079, 'learning_rate': 1.1257692307692308e-05, 'epoch': 39.19}
{'loss': 0.0021, 'learning_rate': 1.121923076923077e-05, 'epoch': 39.22}
{'loss': 0.0389, 'learning_rate': 1.118076923076923e-05, 'epoch': 39.26}
{'loss': 0.0005, 'learning_rate': 1.1142307692307693e-05, 'epoch': 39.3}
{'loss': 0.0, 'learning_rate': 1.1103846153846155e-05, 'epoch': 39.33}
{'loss': 0.0, 'learning_rate': 1.1065384615384617e-05, 'epoch': 39.37}
{'loss': 0.0, 'learning_rate': 1.1026923076923078e-05, 'epoch': 39.41}
{'

{'eval_loss': 0.515914261341095, 'eval_accuracy': 0.9454209065679926, 'eval_f1': 0.9463148316651502, 'eval_runtime': 4.7025, 'eval_samples_per_second': 229.877, 'eval_steps_per_second': 1.063, 'epoch': 40.0}
{'loss': 0.0, 'learning_rate': 1.0373076923076924e-05, 'epoch': 40.04}
{'loss': 0.0, 'learning_rate': 1.0334615384615386e-05, 'epoch': 40.07}
{'loss': 0.0, 'learning_rate': 1.0296153846153847e-05, 'epoch': 40.11}
{'loss': 0.0, 'learning_rate': 1.0257692307692309e-05, 'epoch': 40.15}
{'loss': 0.0, 'learning_rate': 1.021923076923077e-05, 'epoch': 40.19}
{'loss': 0.0, 'learning_rate': 1.0180769230769232e-05, 'epoch': 40.22}
{'loss': 0.0, 'learning_rate': 1.0142307692307692e-05, 'epoch': 40.26}
{'loss': 0.0, 'learning_rate': 1.0103846153846154e-05, 'epoch': 40.3}
{'loss': 0.0, 'learning_rate': 1.0065384615384615e-05, 'epoch': 40.33}
{'loss': 0.0, 'learning_rate': 1.0026923076923077e-05, 'epoch': 40.37}
{'loss': 0.0, 'learning_rate': 9.988461538461539e-06, 'epoch': 40.41}
{'loss': 0.0, 

{'eval_loss': 0.4312877953052521, 'eval_accuracy': 0.9454209065679926, 'eval_f1': 0.9455216989843028, 'eval_runtime': 4.9847, 'eval_samples_per_second': 216.863, 'eval_steps_per_second': 1.003, 'epoch': 41.0}
{'loss': 0.0, 'learning_rate': 9.334615384615384e-06, 'epoch': 41.04}
{'loss': 0.0, 'learning_rate': 9.296153846153846e-06, 'epoch': 41.07}
{'loss': 0.0, 'learning_rate': 9.257692307692307e-06, 'epoch': 41.11}
{'loss': 0.0, 'learning_rate': 9.21923076923077e-06, 'epoch': 41.15}
{'loss': 0.0, 'learning_rate': 9.180769230769231e-06, 'epoch': 41.19}
{'loss': 0.0, 'learning_rate': 9.142307692307693e-06, 'epoch': 41.22}
{'loss': 0.0, 'learning_rate': 9.103846153846154e-06, 'epoch': 41.26}
{'loss': 0.0, 'learning_rate': 9.065384615384616e-06, 'epoch': 41.3}
{'loss': 0.0, 'learning_rate': 9.026923076923078e-06, 'epoch': 41.33}
{'loss': 0.0, 'learning_rate': 8.988461538461539e-06, 'epoch': 41.37}
{'loss': 0.0, 'learning_rate': 8.95e-06, 'epoch': 41.41}
{'loss': 0.0, 'learning_rate': 8.911

{'eval_loss': 0.48870059847831726, 'eval_accuracy': 0.9463459759481961, 'eval_f1': 0.9466911764705882, 'eval_runtime': 4.8336, 'eval_samples_per_second': 223.644, 'eval_steps_per_second': 1.034, 'epoch': 42.0}
{'loss': 0.0, 'learning_rate': 8.296153846153847e-06, 'epoch': 42.04}
{'loss': 0.0, 'learning_rate': 8.257692307692308e-06, 'epoch': 42.07}
{'loss': 0.0, 'learning_rate': 8.21923076923077e-06, 'epoch': 42.11}
{'loss': 0.0, 'learning_rate': 8.18076923076923e-06, 'epoch': 42.15}
{'loss': 0.0, 'learning_rate': 8.142307692307693e-06, 'epoch': 42.19}
{'loss': 0.0, 'learning_rate': 8.103846153846155e-06, 'epoch': 42.22}
{'loss': 0.0, 'learning_rate': 8.065384615384617e-06, 'epoch': 42.26}
{'loss': 0.0, 'learning_rate': 8.026923076923077e-06, 'epoch': 42.3}
{'loss': 0.0, 'learning_rate': 7.98846153846154e-06, 'epoch': 42.33}
{'loss': 0.0, 'learning_rate': 7.95e-06, 'epoch': 42.37}
{'loss': 0.0, 'learning_rate': 7.911538461538462e-06, 'epoch': 42.41}
{'loss': 0.0, 'learning_rate': 7.8730

{'eval_loss': 0.510130763053894, 'eval_accuracy': 0.9463459759481961, 'eval_f1': 0.9465930018416208, 'eval_runtime': 5.1922, 'eval_samples_per_second': 208.196, 'eval_steps_per_second': 0.963, 'epoch': 43.0}
{'loss': 0.0, 'learning_rate': 7.257692307692308e-06, 'epoch': 43.04}
{'loss': 0.0, 'learning_rate': 7.2192307692307694e-06, 'epoch': 43.07}
{'loss': 0.0, 'learning_rate': 7.180769230769232e-06, 'epoch': 43.11}
{'loss': 0.0, 'learning_rate': 7.142307692307693e-06, 'epoch': 43.15}
{'loss': 0.0, 'learning_rate': 7.103846153846154e-06, 'epoch': 43.19}
{'loss': 0.0, 'learning_rate': 7.0653846153846156e-06, 'epoch': 43.22}
{'loss': 0.0, 'learning_rate': 7.026923076923077e-06, 'epoch': 43.26}
{'loss': 0.0, 'learning_rate': 6.988461538461538e-06, 'epoch': 43.3}
{'loss': 0.0, 'learning_rate': 6.950000000000001e-06, 'epoch': 43.33}
{'loss': 0.0, 'learning_rate': 6.9115384615384625e-06, 'epoch': 43.37}
{'loss': 0.0, 'learning_rate': 6.873076923076924e-06, 'epoch': 43.41}
{'loss': 0.0, 'learn

{'eval_loss': 0.5089054703712463, 'eval_accuracy': 0.9491211840888066, 'eval_f1': 0.9492151431209602, 'eval_runtime': 4.0791, 'eval_samples_per_second': 265.01, 'eval_steps_per_second': 1.226, 'epoch': 44.0}
{'loss': 0.0, 'learning_rate': 6.21923076923077e-06, 'epoch': 44.04}
{'loss': 0.0, 'learning_rate': 6.180769230769231e-06, 'epoch': 44.07}
{'loss': 0.0, 'learning_rate': 6.142307692307692e-06, 'epoch': 44.11}
{'loss': 0.0, 'learning_rate': 6.1038461538461545e-06, 'epoch': 44.15}
{'loss': 0.0, 'learning_rate': 6.065384615384616e-06, 'epoch': 44.19}
{'loss': 0.0, 'learning_rate': 6.026923076923077e-06, 'epoch': 44.22}
{'loss': 0.0, 'learning_rate': 5.9884615384615385e-06, 'epoch': 44.26}
{'loss': 0.0, 'learning_rate': 5.95e-06, 'epoch': 44.3}
{'loss': 0.0, 'learning_rate': 5.911538461538462e-06, 'epoch': 44.33}
{'loss': 0.0, 'learning_rate': 5.873076923076923e-06, 'epoch': 44.37}
{'loss': 0.0, 'learning_rate': 5.834615384615385e-06, 'epoch': 44.41}
{'loss': 0.0, 'learning_rate': 5.79

{'eval_loss': 0.49002665281295776, 'eval_accuracy': 0.9509713228492137, 'eval_f1': 0.9515981735159817, 'eval_runtime': 4.5159, 'eval_samples_per_second': 239.375, 'eval_steps_per_second': 1.107, 'epoch': 45.0}
{'loss': 0.0, 'learning_rate': 5.1807692307692304e-06, 'epoch': 45.04}
{'loss': 0.0, 'learning_rate': 5.142307692307693e-06, 'epoch': 45.07}
{'loss': 0.0, 'learning_rate': 5.103846153846154e-06, 'epoch': 45.11}
{'loss': 0.0, 'learning_rate': 5.065384615384615e-06, 'epoch': 45.15}
{'loss': 0.0, 'learning_rate': 5.026923076923077e-06, 'epoch': 45.19}
{'loss': 0.0, 'learning_rate': 4.988461538461539e-06, 'epoch': 45.22}
{'loss': 0.0, 'learning_rate': 4.950000000000001e-06, 'epoch': 45.26}
{'loss': 0.0, 'learning_rate': 4.911538461538462e-06, 'epoch': 45.3}
{'loss': 0.0, 'learning_rate': 4.8730769230769235e-06, 'epoch': 45.33}
{'loss': 0.0, 'learning_rate': 4.834615384615385e-06, 'epoch': 45.37}
{'loss': 0.0, 'learning_rate': 4.796153846153846e-06, 'epoch': 45.41}
{'loss': 0.0, 'lear

{'eval_loss': 0.49573689699172974, 'eval_accuracy': 0.9509713228492137, 'eval_f1': 0.9515981735159817, 'eval_runtime': 4.3979, 'eval_samples_per_second': 245.801, 'eval_steps_per_second': 1.137, 'epoch': 46.0}
{'loss': 0.0, 'learning_rate': 4.142307692307693e-06, 'epoch': 46.04}
{'loss': 0.0, 'learning_rate': 4.103846153846154e-06, 'epoch': 46.07}
{'loss': 0.0, 'learning_rate': 4.0653846153846155e-06, 'epoch': 46.11}
{'loss': 0.0, 'learning_rate': 4.026923076923078e-06, 'epoch': 46.15}
{'loss': 0.0, 'learning_rate': 3.988461538461539e-06, 'epoch': 46.19}
{'loss': 0.0, 'learning_rate': 3.95e-06, 'epoch': 46.22}
{'loss': 0.0, 'learning_rate': 3.911538461538462e-06, 'epoch': 46.26}
{'loss': 0.0, 'learning_rate': 3.873076923076923e-06, 'epoch': 46.3}
{'loss': 0.0, 'learning_rate': 3.834615384615385e-06, 'epoch': 46.33}
{'loss': 0.0, 'learning_rate': 3.7961538461538464e-06, 'epoch': 46.37}
{'loss': 0.0, 'learning_rate': 3.7576923076923077e-06, 'epoch': 46.41}
{'loss': 0.0, 'learning_rate': 

{'eval_loss': 0.516170859336853, 'eval_accuracy': 0.9509713228492137, 'eval_f1': 0.9509713228492138, 'eval_runtime': 4.3864, 'eval_samples_per_second': 246.445, 'eval_steps_per_second': 1.14, 'epoch': 47.0}
{'loss': 0.0, 'learning_rate': 3.103846153846154e-06, 'epoch': 47.04}
{'loss': 0.0, 'learning_rate': 3.0653846153846153e-06, 'epoch': 47.07}
{'loss': 0.0, 'learning_rate': 3.026923076923077e-06, 'epoch': 47.11}
{'loss': 0.0, 'learning_rate': 2.9884615384615384e-06, 'epoch': 47.15}
{'loss': 0.0, 'learning_rate': 2.95e-06, 'epoch': 47.19}
{'loss': 0.0, 'learning_rate': 2.911538461538462e-06, 'epoch': 47.22}
{'loss': 0.0, 'learning_rate': 2.873076923076923e-06, 'epoch': 47.26}
{'loss': 0.0, 'learning_rate': 2.834615384615385e-06, 'epoch': 47.3}
{'loss': 0.0, 'learning_rate': 2.7961538461538463e-06, 'epoch': 47.33}
{'loss': 0.0, 'learning_rate': 2.7576923076923076e-06, 'epoch': 47.37}
{'loss': 0.0, 'learning_rate': 2.7192307692307693e-06, 'epoch': 47.41}
{'loss': 0.0, 'learning_rate': 2

{'eval_loss': 0.5168130397796631, 'eval_accuracy': 0.9518963922294172, 'eval_f1': 0.9519408502772643, 'eval_runtime': 4.317, 'eval_samples_per_second': 250.405, 'eval_steps_per_second': 1.158, 'epoch': 48.0}
{'loss': 0.0, 'learning_rate': 2.0653846153846156e-06, 'epoch': 48.04}
{'loss': 0.0, 'learning_rate': 2.026923076923077e-06, 'epoch': 48.07}
{'loss': 0.0, 'learning_rate': 1.9884615384615386e-06, 'epoch': 48.11}
{'loss': 0.0, 'learning_rate': 1.95e-06, 'epoch': 48.15}
{'loss': 0.0, 'learning_rate': 1.9115384615384613e-06, 'epoch': 48.19}
{'loss': 0.0, 'learning_rate': 1.8730769230769232e-06, 'epoch': 48.22}
{'loss': 0.0, 'learning_rate': 1.8346153846153846e-06, 'epoch': 48.26}
{'loss': 0.0, 'learning_rate': 1.7961538461538463e-06, 'epoch': 48.3}
{'loss': 0.0, 'learning_rate': 1.7576923076923078e-06, 'epoch': 48.33}
{'loss': 0.0, 'learning_rate': 1.7192307692307692e-06, 'epoch': 48.37}
{'loss': 0.0, 'learning_rate': 1.680769230769231e-06, 'epoch': 48.41}
{'loss': 0.0, 'learning_rate

{'eval_loss': 0.5183690190315247, 'eval_accuracy': 0.9518963922294172, 'eval_f1': 0.9519408502772643, 'eval_runtime': 4.2257, 'eval_samples_per_second': 255.816, 'eval_steps_per_second': 1.183, 'epoch': 49.0}
{'loss': 0.0, 'learning_rate': 1.0269230769230772e-06, 'epoch': 49.04}
{'loss': 0.0, 'learning_rate': 9.884615384615385e-07, 'epoch': 49.07}
{'loss': 0.0, 'learning_rate': 9.5e-07, 'epoch': 49.11}
{'loss': 0.0, 'learning_rate': 9.115384615384615e-07, 'epoch': 49.15}
{'loss': 0.0, 'learning_rate': 8.730769230769232e-07, 'epoch': 49.19}
{'loss': 0.0, 'learning_rate': 8.346153846153847e-07, 'epoch': 49.22}
{'loss': 0.0, 'learning_rate': 7.961538461538461e-07, 'epoch': 49.26}
{'loss': 0.0, 'learning_rate': 7.576923076923077e-07, 'epoch': 49.3}
{'loss': 0.0, 'learning_rate': 7.192307692307693e-07, 'epoch': 49.33}
{'loss': 0.0, 'learning_rate': 6.807692307692308e-07, 'epoch': 49.37}
{'loss': 0.0, 'learning_rate': 6.423076923076923e-07, 'epoch': 49.41}
{'loss': 0.0, 'learning_rate': 6.03

{'eval_loss': 0.5192036628723145, 'eval_accuracy': 0.9518963922294172, 'eval_f1': 0.9519408502772643, 'eval_runtime': 4.4437, 'eval_samples_per_second': 243.268, 'eval_steps_per_second': 1.125, 'epoch': 50.0}
{'train_runtime': 5199.1178, 'train_samples_per_second': 41.546, 'train_steps_per_second': 2.597, 'train_loss': 0.020438045919493393, 'epoch': 50.0}


TrainOutput(global_step=13500, training_loss=0.020438045919493393, metrics={'train_runtime': 5199.1178, 'train_samples_per_second': 41.546, 'train_steps_per_second': 2.597, 'train_loss': 0.020438045919493393, 'epoch': 50.0})

In [38]:
predictions = trainer.predict(tokenized_datasets["test"])
print(predictions.predictions.shape, predictions.label_ids.shape)

(5656, 2) (5656,)


In [39]:
predictions

PredictionOutput(predictions=array([[-4.12 ,  3.912],
       [-4.707,  4.586],
       [ 5.75 , -6.008],
       ...,
       [ 5.688, -6.133],
       [-4.766,  4.64 ],
       [-4.785,  4.66 ]], dtype=float16), label_ids=array([1, 1, 1, ..., 1, 1, 1], dtype=int64), metrics={'test_loss': 1.9729912281036377, 'test_accuracy': 0.7625530410183875, 'test_f1': 0.8185380354006215, 'test_runtime': 11.1195, 'test_samples_per_second': 508.658, 'test_steps_per_second': 2.068})

In [40]:
# Apply softmax to convert logits to probabilities
probabilities = softmax(predictions.predictions, axis=1)

# Get the predicted class labels
predicted_labels = np.argmax(probabilities, axis=1)

print("Probabilities:\n", probabilities)
print("Predicted Labels:\n", predicted_labels)

Probabilities:
 [[3.252e-04 1.000e+00]
 [9.173e-05 1.000e+00]
 [1.000e+00 7.808e-06]
 ...
 [1.000e+00 7.331e-06]
 [8.219e-05 1.000e+00]
 [7.904e-05 1.000e+00]]
Predicted Labels:
 [1 1 0 ... 0 1 1]


In [41]:
true_labels = tokenized_datasets["test"]["label"]

accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels, average='weighted')
f1 = f1_score(true_labels, predicted_labels, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.7625530410183875
Precision: 0.8038747346072187
Recall: 0.7625530410183875
F1 Score: 0.7606205495221969


In [36]:
helper_model.print_wrong_classifications(predicted_labels, true_labels, raw_datasets['test'])

Predicted Label: 0, True Label: 1
Original Text: user I'd be afraid to short AMZN - they are looking like a near-monopoly in eBooks and infrastructure-as-a-service

Predicted Label: 1, True Label: 0
Original Text: AAP - user if so then the current downtrend will break. Otherwise just a short-term correction in med-term downtrend.

Predicted Label: 0, True Label: 1
Original Text: It really worries me how everyone expects the market to rally now,usually exact opposite happens every time we shall see soon bac spx jpm

Predicted Label: 1, True Label: 0
Original Text: Won't believe AAP uptrend is back until it crosses above MA(50)

Predicted Label: 0, True Label: 1
Original Text: SWY - 30% of float short and breaking out - ouch

Predicted Label: 0, True Label: 1
Original Text: CHDN - ong   49.02. Trailing Stop  56.66 from 6 prior Stops of 54.17, 49.88, 49.82, 45.47, 43.02 & 41.92 -  

Predicted Label: 1, True Label: 0
Original Text: user: been adding VXY long off the bottom today for trade,

In [37]:
data = {'Predicted_Labels': predicted_labels}
df = pd.DataFrame(data)
df.to_csv('data/predicted_labels.csv', index=False)