In [None]:
# Тренував також
# model = BertForSequenceClassification.from_pretrained('bert-base-uncased' і model = BertForSequenceClassification.from_pretrained('neuralmind/bert-base-portuguese-cased'

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer
import torch

df = pd.read_csv('new_data_task3/processed_reviews.csv')

df['review_text'] = df['review_text'].str.lower().str.replace(r'[^\w\s]+', '', regex=True)

train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)

train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

def tokenize_and_encode(texts):
    return tokenizer(texts, padding="max_length", truncation=True, max_length=128, return_tensors='pt')

train_encodings = tokenize_and_encode(train_df['review_text'].tolist())
test_encodings = tokenize_and_encode(test_df['review_text'].tolist())

class ReviewsDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(int(self.labels[idx]) - 1)  
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = ReviewsDataset(train_encodings, train_df['review_score'])
test_dataset = ReviewsDataset(test_encodings, test_df['review_score'])



In [4]:
from transformers import Trainer, TrainingArguments, RobertaForSequenceClassification
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=5)
training_args = TrainingArguments(
    output_dir='new_data_task3/model_training_results',
    num_train_epochs=15,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    learning_rate=5e-5,
    logging_steps=250,
    evaluation_strategy='steps',  
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/18345 [00:00<?, ?it/s]

{'loss': 1.1555, 'grad_norm': 10.642463684082031, 'learning_rate': 2.5e-05, 'epoch': 0.2}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.9088464379310608, 'eval_accuracy': 0.6783988957902002, 'eval_precision': 0.4951899194564732, 'eval_recall': 0.6783988957902002, 'eval_f1': 0.5709376434616337, 'eval_runtime': 16.7471, 'eval_samples_per_second': 259.567, 'eval_steps_per_second': 16.242, 'epoch': 0.2}
{'loss': 0.9634, 'grad_norm': 3.3866331577301025, 'learning_rate': 5e-05, 'epoch': 0.41}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.9098528623580933, 'eval_accuracy': 0.6855302507476421, 'eval_precision': 0.4980409659527007, 'eval_recall': 0.6855302507476421, 'eval_f1': 0.5760021788110219, 'eval_runtime': 16.7995, 'eval_samples_per_second': 258.758, 'eval_steps_per_second': 16.191, 'epoch': 0.41}
{'loss': 0.9131, 'grad_norm': 6.533314228057861, 'learning_rate': 4.9299523676099755e-05, 'epoch': 0.61}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8492785096168518, 'eval_accuracy': 0.6915113871635611, 'eval_precision': 0.5871898109674039, 'eval_recall': 0.6915113871635611, 'eval_f1': 0.5884202658525809, 'eval_runtime': 16.4948, 'eval_samples_per_second': 263.537, 'eval_steps_per_second': 16.49, 'epoch': 0.61}
{'loss': 0.8831, 'grad_norm': 2.8655331134796143, 'learning_rate': 4.85990473521995e-05, 'epoch': 0.82}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8558946847915649, 'eval_accuracy': 0.6802392454566367, 'eval_precision': 0.5975498647932422, 'eval_recall': 0.6802392454566367, 'eval_f1': 0.6360434935449162, 'eval_runtime': 16.5777, 'eval_samples_per_second': 262.219, 'eval_steps_per_second': 16.408, 'epoch': 0.82}
{'loss': 0.8706, 'grad_norm': 4.0626935958862305, 'learning_rate': 4.7898571028299245e-05, 'epoch': 1.02}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8493467569351196, 'eval_accuracy': 0.6938118242466068, 'eval_precision': 0.6189966314381197, 'eval_recall': 0.6938118242466068, 'eval_f1': 0.6402026524774711, 'eval_runtime': 16.563, 'eval_samples_per_second': 262.452, 'eval_steps_per_second': 16.422, 'epoch': 1.02}
{'loss': 0.8267, 'grad_norm': 5.1468329429626465, 'learning_rate': 4.719809470439899e-05, 'epoch': 1.23}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.9094341993331909, 'eval_accuracy': 0.6728778467908902, 'eval_precision': 0.6173749298727137, 'eval_recall': 0.6728778467908902, 'eval_f1': 0.6298145893126509, 'eval_runtime': 16.4328, 'eval_samples_per_second': 264.532, 'eval_steps_per_second': 16.552, 'epoch': 1.23}
{'loss': 0.8574, 'grad_norm': 5.337569236755371, 'learning_rate': 4.649761838049874e-05, 'epoch': 1.43}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8336029052734375, 'eval_accuracy': 0.7016333103289625, 'eval_precision': 0.607358784956818, 'eval_recall': 0.7016333103289625, 'eval_f1': 0.6264534499301351, 'eval_runtime': 16.4612, 'eval_samples_per_second': 264.075, 'eval_steps_per_second': 16.524, 'epoch': 1.43}
{'loss': 0.8388, 'grad_norm': 4.907498836517334, 'learning_rate': 4.579714205659849e-05, 'epoch': 1.64}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8217290043830872, 'eval_accuracy': 0.7034736599953991, 'eval_precision': 0.6158401260361723, 'eval_recall': 0.7034736599953991, 'eval_f1': 0.6431727941347252, 'eval_runtime': 16.347, 'eval_samples_per_second': 265.92, 'eval_steps_per_second': 16.639, 'epoch': 1.64}
{'loss': 0.8192, 'grad_norm': 3.5503756999969482, 'learning_rate': 4.509666573269823e-05, 'epoch': 1.84}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8275024890899658, 'eval_accuracy': 0.7027835288704853, 'eval_precision': 0.6330013440776118, 'eval_recall': 0.7027835288704853, 'eval_f1': 0.6555877959609148, 'eval_runtime': 16.3295, 'eval_samples_per_second': 266.205, 'eval_steps_per_second': 16.657, 'epoch': 1.84}
{'loss': 0.8169, 'grad_norm': 5.325777053833008, 'learning_rate': 4.4396189408797985e-05, 'epoch': 2.04}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8130196928977966, 'eval_accuracy': 0.7011732229123534, 'eval_precision': 0.6280470269204799, 'eval_recall': 0.7011732229123534, 'eval_f1': 0.6570364446392459, 'eval_runtime': 16.3384, 'eval_samples_per_second': 266.061, 'eval_steps_per_second': 16.648, 'epoch': 2.04}
{'loss': 0.8085, 'grad_norm': 3.4863338470458984, 'learning_rate': 4.369571308489773e-05, 'epoch': 2.25}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8145673871040344, 'eval_accuracy': 0.7016333103289625, 'eval_precision': 0.6222221313794716, 'eval_recall': 0.7016333103289625, 'eval_f1': 0.6444292256783442, 'eval_runtime': 16.4485, 'eval_samples_per_second': 264.279, 'eval_steps_per_second': 16.536, 'epoch': 2.25}
{'loss': 0.7886, 'grad_norm': 4.75832462310791, 'learning_rate': 4.299523676099748e-05, 'epoch': 2.45}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.829500675201416, 'eval_accuracy': 0.7055440533701404, 'eval_precision': 0.6287629940935606, 'eval_recall': 0.7055440533701404, 'eval_f1': 0.6510851249616469, 'eval_runtime': 16.3295, 'eval_samples_per_second': 266.205, 'eval_steps_per_second': 16.657, 'epoch': 2.45}
{'loss': 0.7996, 'grad_norm': 2.374807596206665, 'learning_rate': 4.229476043709723e-05, 'epoch': 2.66}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8549365401268005, 'eval_accuracy': 0.7073844030365769, 'eval_precision': 0.6178588822405313, 'eval_recall': 0.7073844030365769, 'eval_f1': 0.6318369190417946, 'eval_runtime': 16.5774, 'eval_samples_per_second': 262.224, 'eval_steps_per_second': 16.408, 'epoch': 2.66}
{'loss': 0.809, 'grad_norm': 9.781270027160645, 'learning_rate': 4.1594284113196974e-05, 'epoch': 2.86}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.8209746479988098, 'eval_accuracy': 0.706234184495054, 'eval_precision': 0.64328595150348, 'eval_recall': 0.706234184495054, 'eval_f1': 0.6581172135558632, 'eval_runtime': 16.3735, 'eval_samples_per_second': 265.49, 'eval_steps_per_second': 16.612, 'epoch': 2.86}
{'loss': 0.7744, 'grad_norm': 3.5352444648742676, 'learning_rate': 4.0893807789296726e-05, 'epoch': 3.07}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8085777759552002, 'eval_accuracy': 0.7039337474120083, 'eval_precision': 0.6281604234854714, 'eval_recall': 0.7039337474120083, 'eval_f1': 0.6498110606061657, 'eval_runtime': 16.5858, 'eval_samples_per_second': 262.092, 'eval_steps_per_second': 16.4, 'epoch': 3.07}
{'loss': 0.7602, 'grad_norm': 2.6049063205718994, 'learning_rate': 4.019333146539647e-05, 'epoch': 3.27}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.8169298768043518, 'eval_accuracy': 0.7071543593282724, 'eval_precision': 0.6337132854441144, 'eval_recall': 0.7071543593282724, 'eval_f1': 0.645508877056424, 'eval_runtime': 16.4243, 'eval_samples_per_second': 264.669, 'eval_steps_per_second': 16.561, 'epoch': 3.27}
{'loss': 0.766, 'grad_norm': 5.100327014923096, 'learning_rate': 3.949285514149622e-05, 'epoch': 3.48}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8021347522735596, 'eval_accuracy': 0.7096848401196227, 'eval_precision': 0.6339248958898218, 'eval_recall': 0.7096848401196227, 'eval_f1': 0.6582675445882773, 'eval_runtime': 16.4092, 'eval_samples_per_second': 264.912, 'eval_steps_per_second': 16.576, 'epoch': 3.48}
{'loss': 0.7662, 'grad_norm': 5.9471435546875, 'learning_rate': 3.879237881759596e-05, 'epoch': 3.68}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8058275580406189, 'eval_accuracy': 0.7055440533701404, 'eval_precision': 0.6263018564017637, 'eval_recall': 0.7055440533701404, 'eval_f1': 0.6412025757384605, 'eval_runtime': 16.3534, 'eval_samples_per_second': 265.817, 'eval_steps_per_second': 16.633, 'epoch': 3.68}
{'loss': 0.7676, 'grad_norm': 11.177175521850586, 'learning_rate': 3.8091902493695714e-05, 'epoch': 3.88}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8163300156593323, 'eval_accuracy': 0.7083045778697953, 'eval_precision': 0.6277741915224806, 'eval_recall': 0.7083045778697953, 'eval_f1': 0.6520757592268505, 'eval_runtime': 16.3156, 'eval_samples_per_second': 266.432, 'eval_steps_per_second': 16.671, 'epoch': 3.88}
{'loss': 0.7596, 'grad_norm': 5.9444451332092285, 'learning_rate': 3.7391426169795466e-05, 'epoch': 4.09}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.821631669998169, 'eval_accuracy': 0.7076144467448815, 'eval_precision': 0.6353064021200154, 'eval_recall': 0.7076144467448815, 'eval_f1': 0.6625589511044938, 'eval_runtime': 16.3364, 'eval_samples_per_second': 266.094, 'eval_steps_per_second': 16.65, 'epoch': 4.09}
{'loss': 0.7397, 'grad_norm': 3.8366708755493164, 'learning_rate': 3.669094984589521e-05, 'epoch': 4.29}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8685342669487, 'eval_accuracy': 0.706234184495054, 'eval_precision': 0.630675185864745, 'eval_recall': 0.706234184495054, 'eval_f1': 0.660571878191737, 'eval_runtime': 16.3318, 'eval_samples_per_second': 266.168, 'eval_steps_per_second': 16.655, 'epoch': 4.29}
{'loss': 0.7362, 'grad_norm': 3.4888417720794678, 'learning_rate': 3.5990473521994963e-05, 'epoch': 4.5}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8215165138244629, 'eval_accuracy': 0.7043938348286174, 'eval_precision': 0.6407825861510583, 'eval_recall': 0.7043938348286174, 'eval_f1': 0.6667007246760425, 'eval_runtime': 16.3612, 'eval_samples_per_second': 265.69, 'eval_steps_per_second': 16.625, 'epoch': 4.5}
{'loss': 0.7238, 'grad_norm': 8.3033447265625, 'learning_rate': 3.52899971980947e-05, 'epoch': 4.7}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8284943103790283, 'eval_accuracy': 0.7073844030365769, 'eval_precision': 0.6321873378512248, 'eval_recall': 0.7073844030365769, 'eval_f1': 0.6539823850854634, 'eval_runtime': 16.3379, 'eval_samples_per_second': 266.069, 'eval_steps_per_second': 16.648, 'epoch': 4.7}
{'loss': 0.732, 'grad_norm': 4.5619120597839355, 'learning_rate': 3.4589520874194454e-05, 'epoch': 4.91}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.803267240524292, 'eval_accuracy': 0.7119852772026685, 'eval_precision': 0.6332861716682161, 'eval_recall': 0.7119852772026685, 'eval_f1': 0.6611706948588612, 'eval_runtime': 16.424, 'eval_samples_per_second': 264.673, 'eval_steps_per_second': 16.561, 'epoch': 4.91}
{'loss': 0.7195, 'grad_norm': 5.440273284912109, 'learning_rate': 3.38890445502942e-05, 'epoch': 5.11}


  0%|          | 0/272 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.829176664352417, 'eval_accuracy': 0.6997929606625258, 'eval_precision': 0.6415606981640722, 'eval_recall': 0.6997929606625258, 'eval_f1': 0.6640822148963607, 'eval_runtime': 16.3449, 'eval_samples_per_second': 265.954, 'eval_steps_per_second': 16.641, 'epoch': 5.11}
{'loss': 0.6787, 'grad_norm': 8.012469291687012, 'learning_rate': 3.318856822639395e-05, 'epoch': 5.31}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.8489651679992676, 'eval_accuracy': 0.7025534851621809, 'eval_precision': 0.6432303482031227, 'eval_recall': 0.7025534851621809, 'eval_f1': 0.6583588075732596, 'eval_runtime': 16.5945, 'eval_samples_per_second': 261.955, 'eval_steps_per_second': 16.391, 'epoch': 5.31}
{'loss': 0.7137, 'grad_norm': 9.951729774475098, 'learning_rate': 3.24880919024937e-05, 'epoch': 5.52}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.8503360152244568, 'eval_accuracy': 0.7060041407867494, 'eval_precision': 0.6516902565402355, 'eval_recall': 0.7060041407867494, 'eval_f1': 0.6558112479553853, 'eval_runtime': 16.3902, 'eval_samples_per_second': 265.219, 'eval_steps_per_second': 16.595, 'epoch': 5.52}
{'loss': 0.7007, 'grad_norm': 3.016857385635376, 'learning_rate': 3.178761557859344e-05, 'epoch': 5.72}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.8318053483963013, 'eval_accuracy': 0.7025534851621809, 'eval_precision': 0.6364192927228705, 'eval_recall': 0.7025534851621809, 'eval_f1': 0.6467415160462833, 'eval_runtime': 16.5991, 'eval_samples_per_second': 261.882, 'eval_steps_per_second': 16.386, 'epoch': 5.72}
{'loss': 0.7125, 'grad_norm': 8.908042907714844, 'learning_rate': 3.1087139254693194e-05, 'epoch': 5.93}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.8355309963226318, 'eval_accuracy': 0.7014032666206579, 'eval_precision': 0.6336837850536209, 'eval_recall': 0.7014032666206579, 'eval_f1': 0.6508352810520489, 'eval_runtime': 16.5105, 'eval_samples_per_second': 263.287, 'eval_steps_per_second': 16.474, 'epoch': 5.93}
{'loss': 0.6625, 'grad_norm': 4.422828674316406, 'learning_rate': 3.0386662930792943e-05, 'epoch': 6.13}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9210554957389832, 'eval_accuracy': 0.7037037037037037, 'eval_precision': 0.6393643128445113, 'eval_recall': 0.7037037037037037, 'eval_f1': 0.6570161211088832, 'eval_runtime': 16.589, 'eval_samples_per_second': 262.042, 'eval_steps_per_second': 16.396, 'epoch': 6.13}
{'loss': 0.6584, 'grad_norm': 6.2579874992370605, 'learning_rate': 2.9686186606892692e-05, 'epoch': 6.34}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.8735936880111694, 'eval_accuracy': 0.6984126984126984, 'eval_precision': 0.6322514525111581, 'eval_recall': 0.6984126984126984, 'eval_f1': 0.6481973064908731, 'eval_runtime': 16.5859, 'eval_samples_per_second': 262.09, 'eval_steps_per_second': 16.399, 'epoch': 6.34}
{'loss': 0.6695, 'grad_norm': 11.477547645568848, 'learning_rate': 2.8985710282992434e-05, 'epoch': 6.54}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.8784855008125305, 'eval_accuracy': 0.6912813434552565, 'eval_precision': 0.6425783859047065, 'eval_recall': 0.6912813434552565, 'eval_f1': 0.6591900678101817, 'eval_runtime': 16.464, 'eval_samples_per_second': 264.03, 'eval_steps_per_second': 16.521, 'epoch': 6.54}
{'loss': 0.6656, 'grad_norm': 11.36752986907959, 'learning_rate': 2.8285233959092182e-05, 'epoch': 6.75}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9176187515258789, 'eval_accuracy': 0.6974925235794801, 'eval_precision': 0.6439509183335073, 'eval_recall': 0.6974925235794801, 'eval_f1': 0.6591457757891864, 'eval_runtime': 16.5461, 'eval_samples_per_second': 262.721, 'eval_steps_per_second': 16.439, 'epoch': 6.75}
{'loss': 0.6711, 'grad_norm': 4.314661026000977, 'learning_rate': 2.758475763519193e-05, 'epoch': 6.95}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.8887467384338379, 'eval_accuracy': 0.7011732229123534, 'eval_precision': 0.6479354426789308, 'eval_recall': 0.7011732229123534, 'eval_f1': 0.6632881568525922, 'eval_runtime': 16.3416, 'eval_samples_per_second': 266.008, 'eval_steps_per_second': 16.645, 'epoch': 6.95}
{'loss': 0.6239, 'grad_norm': 10.45396614074707, 'learning_rate': 2.688428131129168e-05, 'epoch': 7.15}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9366103410720825, 'eval_accuracy': 0.6945019553715206, 'eval_precision': 0.6520190479013441, 'eval_recall': 0.6945019553715206, 'eval_f1': 0.6685330922321683, 'eval_runtime': 16.4076, 'eval_samples_per_second': 264.938, 'eval_steps_per_second': 16.578, 'epoch': 7.15}
{'loss': 0.6312, 'grad_norm': 7.087414264678955, 'learning_rate': 2.6183804987391425e-05, 'epoch': 7.36}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9030426144599915, 'eval_accuracy': 0.6965723487462618, 'eval_precision': 0.6484854365693705, 'eval_recall': 0.6965723487462618, 'eval_f1': 0.66571435152902, 'eval_runtime': 16.3377, 'eval_samples_per_second': 266.072, 'eval_steps_per_second': 16.649, 'epoch': 7.36}
{'loss': 0.6311, 'grad_norm': 18.579225540161133, 'learning_rate': 2.5483328663491174e-05, 'epoch': 7.56}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9242367744445801, 'eval_accuracy': 0.688290775247297, 'eval_precision': 0.6556495306809552, 'eval_recall': 0.688290775247297, 'eval_f1': 0.6689902487819408, 'eval_runtime': 16.5131, 'eval_samples_per_second': 263.246, 'eval_steps_per_second': 16.472, 'epoch': 7.56}
{'loss': 0.6456, 'grad_norm': 9.965653419494629, 'learning_rate': 2.4782852339590923e-05, 'epoch': 7.77}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.911020815372467, 'eval_accuracy': 0.6924315619967794, 'eval_precision': 0.6478975511712002, 'eval_recall': 0.6924315619967794, 'eval_f1': 0.6647284564621815, 'eval_runtime': 16.6116, 'eval_samples_per_second': 261.684, 'eval_steps_per_second': 16.374, 'epoch': 7.77}
{'loss': 0.6181, 'grad_norm': 10.696527481079102, 'learning_rate': 2.408237601569067e-05, 'epoch': 7.97}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9261600971221924, 'eval_accuracy': 0.689901081205429, 'eval_precision': 0.6424441205220242, 'eval_recall': 0.689901081205429, 'eval_f1': 0.6573254224400767, 'eval_runtime': 16.4147, 'eval_samples_per_second': 264.824, 'eval_steps_per_second': 16.571, 'epoch': 7.97}
{'loss': 0.5878, 'grad_norm': 3.0605123043060303, 'learning_rate': 2.3381899691790417e-05, 'epoch': 8.18}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9732067584991455, 'eval_accuracy': 0.6905912123303428, 'eval_precision': 0.6537577628993534, 'eval_recall': 0.6905912123303428, 'eval_f1': 0.6663185568429295, 'eval_runtime': 16.5983, 'eval_samples_per_second': 261.894, 'eval_steps_per_second': 16.387, 'epoch': 8.18}
{'loss': 0.5986, 'grad_norm': 13.54085922241211, 'learning_rate': 2.2681423367890166e-05, 'epoch': 8.38}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9359745979309082, 'eval_accuracy': 0.6834598573729008, 'eval_precision': 0.6517125037201881, 'eval_recall': 0.6834598573729008, 'eval_f1': 0.6633933150656943, 'eval_runtime': 16.6062, 'eval_samples_per_second': 261.77, 'eval_steps_per_second': 16.379, 'epoch': 8.38}
{'loss': 0.581, 'grad_norm': 5.702419281005859, 'learning_rate': 2.1980947043989914e-05, 'epoch': 8.59}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9502013921737671, 'eval_accuracy': 0.6954221302047389, 'eval_precision': 0.640595925793185, 'eval_recall': 0.6954221302047389, 'eval_f1': 0.6565419830649324, 'eval_runtime': 16.5369, 'eval_samples_per_second': 262.867, 'eval_steps_per_second': 16.448, 'epoch': 8.59}
{'loss': 0.5912, 'grad_norm': 6.3513994216918945, 'learning_rate': 2.1280470720089663e-05, 'epoch': 8.79}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.956149697303772, 'eval_accuracy': 0.6903611686220382, 'eval_precision': 0.6375853198127215, 'eval_recall': 0.6903611686220382, 'eval_f1': 0.6520624787368915, 'eval_runtime': 16.5424, 'eval_samples_per_second': 262.779, 'eval_steps_per_second': 16.443, 'epoch': 8.79}
{'loss': 0.5835, 'grad_norm': 5.696227550506592, 'learning_rate': 2.057999439618941e-05, 'epoch': 8.99}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9826604723930359, 'eval_accuracy': 0.6866804692891649, 'eval_precision': 0.6508469231485624, 'eval_recall': 0.6866804692891649, 'eval_f1': 0.6620133957053421, 'eval_runtime': 16.6127, 'eval_samples_per_second': 261.668, 'eval_steps_per_second': 16.373, 'epoch': 8.99}
{'loss': 0.5426, 'grad_norm': 8.658143997192383, 'learning_rate': 1.9879518072289157e-05, 'epoch': 9.2}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.982140839099884, 'eval_accuracy': 0.6811594202898551, 'eval_precision': 0.6470551891527316, 'eval_recall': 0.6811594202898551, 'eval_f1': 0.6566602106759016, 'eval_runtime': 16.4262, 'eval_samples_per_second': 264.638, 'eval_steps_per_second': 16.559, 'epoch': 9.2}
{'loss': 0.5459, 'grad_norm': 12.698022842407227, 'learning_rate': 1.9179041748388902e-05, 'epoch': 9.4}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0277467966079712, 'eval_accuracy': 0.6809293765815505, 'eval_precision': 0.645056846700432, 'eval_recall': 0.6809293765815505, 'eval_f1': 0.6575939494534908, 'eval_runtime': 16.4384, 'eval_samples_per_second': 264.441, 'eval_steps_per_second': 16.547, 'epoch': 9.4}
{'loss': 0.5567, 'grad_norm': 8.13183879852295, 'learning_rate': 1.8478565424488655e-05, 'epoch': 9.61}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 0.9914194345474243, 'eval_accuracy': 0.6892109500805152, 'eval_precision': 0.6480105634030724, 'eval_recall': 0.6892109500805152, 'eval_f1': 0.6573399172408175, 'eval_runtime': 16.4035, 'eval_samples_per_second': 265.005, 'eval_steps_per_second': 16.582, 'epoch': 9.61}
{'loss': 0.5476, 'grad_norm': 12.961636543273926, 'learning_rate': 1.7778089100588403e-05, 'epoch': 9.81}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.016283631324768, 'eval_accuracy': 0.692661605705084, 'eval_precision': 0.6469001805478788, 'eval_recall': 0.692661605705084, 'eval_f1': 0.66095104250693, 'eval_runtime': 16.4741, 'eval_samples_per_second': 263.869, 'eval_steps_per_second': 16.511, 'epoch': 9.81}
{'loss': 0.5547, 'grad_norm': 14.613335609436035, 'learning_rate': 1.707761277668815e-05, 'epoch': 10.02}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0063446760177612, 'eval_accuracy': 0.6933517368299977, 'eval_precision': 0.652033623746056, 'eval_recall': 0.6933517368299977, 'eval_f1': 0.6664828220065162, 'eval_runtime': 16.5775, 'eval_samples_per_second': 262.223, 'eval_steps_per_second': 16.408, 'epoch': 10.02}
{'loss': 0.5079, 'grad_norm': 15.135733604431152, 'learning_rate': 1.6377136452787897e-05, 'epoch': 10.22}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0676548480987549, 'eval_accuracy': 0.6820795951230734, 'eval_precision': 0.6524324058446587, 'eval_recall': 0.6820795951230734, 'eval_f1': 0.6608421256072649, 'eval_runtime': 16.5714, 'eval_samples_per_second': 262.319, 'eval_steps_per_second': 16.414, 'epoch': 10.22}
{'loss': 0.523, 'grad_norm': 11.243840217590332, 'learning_rate': 1.5676660128887643e-05, 'epoch': 10.43}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.033102035522461, 'eval_accuracy': 0.6910512997469519, 'eval_precision': 0.6520660905107352, 'eval_recall': 0.6910512997469519, 'eval_f1': 0.6659284477147237, 'eval_runtime': 16.6273, 'eval_samples_per_second': 261.438, 'eval_steps_per_second': 16.359, 'epoch': 10.43}
{'loss': 0.5082, 'grad_norm': 9.10694694519043, 'learning_rate': 1.4976183804987393e-05, 'epoch': 10.63}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0410633087158203, 'eval_accuracy': 0.6901311249137336, 'eval_precision': 0.647835624116523, 'eval_recall': 0.6901311249137336, 'eval_f1': 0.6600261225237655, 'eval_runtime': 16.4599, 'eval_samples_per_second': 264.096, 'eval_steps_per_second': 16.525, 'epoch': 10.63}
{'loss': 0.5263, 'grad_norm': 17.423980712890625, 'learning_rate': 1.4275707481087138e-05, 'epoch': 10.83}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0693402290344238, 'eval_accuracy': 0.6783988957902002, 'eval_precision': 0.6521575187991719, 'eval_recall': 0.6783988957902002, 'eval_f1': 0.6626667584662174, 'eval_runtime': 16.2287, 'eval_samples_per_second': 267.858, 'eval_steps_per_second': 16.76, 'epoch': 10.83}
{'loss': 0.5136, 'grad_norm': 46.89948272705078, 'learning_rate': 1.3575231157186887e-05, 'epoch': 11.04}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0715298652648926, 'eval_accuracy': 0.6841499884978146, 'eval_precision': 0.6535537359939715, 'eval_recall': 0.6841499884978146, 'eval_f1': 0.664451804792439, 'eval_runtime': 16.1719, 'eval_samples_per_second': 268.799, 'eval_steps_per_second': 16.819, 'epoch': 11.04}
{'loss': 0.4924, 'grad_norm': 14.016453742980957, 'learning_rate': 1.2874754833286638e-05, 'epoch': 11.24}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0658353567123413, 'eval_accuracy': 0.6857602944559467, 'eval_precision': 0.6536718054170372, 'eval_recall': 0.6857602944559467, 'eval_f1': 0.6655137985765026, 'eval_runtime': 16.6043, 'eval_samples_per_second': 261.8, 'eval_steps_per_second': 16.381, 'epoch': 11.24}
{'loss': 0.4738, 'grad_norm': 11.148024559020996, 'learning_rate': 1.2174278509386383e-05, 'epoch': 11.45}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0909879207611084, 'eval_accuracy': 0.6836899010812054, 'eval_precision': 0.6557017590942854, 'eval_recall': 0.6836899010812054, 'eval_f1': 0.6668561183767836, 'eval_runtime': 16.5418, 'eval_samples_per_second': 262.788, 'eval_steps_per_second': 16.443, 'epoch': 11.45}
{'loss': 0.4714, 'grad_norm': 17.687931060791016, 'learning_rate': 1.1473802185486132e-05, 'epoch': 11.65}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0957268476486206, 'eval_accuracy': 0.6800092017483322, 'eval_precision': 0.6463145227122564, 'eval_recall': 0.6800092017483322, 'eval_f1': 0.656216587745272, 'eval_runtime': 16.6283, 'eval_samples_per_second': 261.422, 'eval_steps_per_second': 16.358, 'epoch': 11.65}
{'loss': 0.4924, 'grad_norm': 14.643922805786133, 'learning_rate': 1.0773325861585879e-05, 'epoch': 11.86}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.0979053974151611, 'eval_accuracy': 0.6853002070393375, 'eval_precision': 0.6497724316291378, 'eval_recall': 0.6853002070393375, 'eval_f1': 0.6574787701767847, 'eval_runtime': 16.5952, 'eval_samples_per_second': 261.943, 'eval_steps_per_second': 16.39, 'epoch': 11.86}
{'loss': 0.4589, 'grad_norm': 16.580656051635742, 'learning_rate': 1.0072849537685626e-05, 'epoch': 12.06}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.1956802606582642, 'eval_accuracy': 0.6758684149988498, 'eval_precision': 0.65342324783506, 'eval_recall': 0.6758684149988498, 'eval_f1': 0.661910740950191, 'eval_runtime': 15.8904, 'eval_samples_per_second': 273.562, 'eval_steps_per_second': 17.117, 'epoch': 12.06}
{'loss': 0.4411, 'grad_norm': 7.449788570404053, 'learning_rate': 9.372373213785374e-06, 'epoch': 12.26}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.2051931619644165, 'eval_accuracy': 0.6733379342074994, 'eval_precision': 0.6477195729046891, 'eval_recall': 0.6733379342074994, 'eval_f1': 0.6549178757471099, 'eval_runtime': 15.2941, 'eval_samples_per_second': 284.227, 'eval_steps_per_second': 17.785, 'epoch': 12.26}
{'loss': 0.4524, 'grad_norm': 22.991453170776367, 'learning_rate': 8.671896889885123e-06, 'epoch': 12.47}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.1337939500808716, 'eval_accuracy': 0.6804692891649413, 'eval_precision': 0.6520705376636368, 'eval_recall': 0.6804692891649413, 'eval_f1': 0.661207211928511, 'eval_runtime': 15.2712, 'eval_samples_per_second': 284.654, 'eval_steps_per_second': 17.811, 'epoch': 12.47}
{'loss': 0.475, 'grad_norm': 16.453332901000977, 'learning_rate': 7.97142056598487e-06, 'epoch': 12.67}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.1602050065994263, 'eval_accuracy': 0.6731078904991948, 'eval_precision': 0.6563880201153356, 'eval_recall': 0.6731078904991948, 'eval_f1': 0.6627240911719811, 'eval_runtime': 15.3789, 'eval_samples_per_second': 282.659, 'eval_steps_per_second': 17.687, 'epoch': 12.67}
{'loss': 0.4538, 'grad_norm': 8.672873497009277, 'learning_rate': 7.270944242084618e-06, 'epoch': 12.88}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.1328917741775513, 'eval_accuracy': 0.6887508626639062, 'eval_precision': 0.6524836856484078, 'eval_recall': 0.6887508626639062, 'eval_f1': 0.6621621699844565, 'eval_runtime': 15.2662, 'eval_samples_per_second': 284.747, 'eval_steps_per_second': 17.817, 'epoch': 12.88}
{'loss': 0.4441, 'grad_norm': 14.343741416931152, 'learning_rate': 6.570467918184366e-06, 'epoch': 13.08}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.1608127355575562, 'eval_accuracy': 0.6783988957902002, 'eval_precision': 0.648721792089475, 'eval_recall': 0.6783988957902002, 'eval_f1': 0.6598015820513579, 'eval_runtime': 16.4821, 'eval_samples_per_second': 263.741, 'eval_steps_per_second': 16.503, 'epoch': 13.08}
{'loss': 0.4388, 'grad_norm': 20.420042037963867, 'learning_rate': 5.869991594284113e-06, 'epoch': 13.29}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.158353328704834, 'eval_accuracy': 0.679549114331723, 'eval_precision': 0.6501252415744331, 'eval_recall': 0.679549114331723, 'eval_f1': 0.6604189641292709, 'eval_runtime': 16.4524, 'eval_samples_per_second': 264.217, 'eval_steps_per_second': 16.533, 'epoch': 13.29}
{'loss': 0.4281, 'grad_norm': 15.429155349731445, 'learning_rate': 5.169515270383862e-06, 'epoch': 13.49}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.2034447193145752, 'eval_accuracy': 0.6705774097078445, 'eval_precision': 0.6483335395280483, 'eval_recall': 0.6705774097078445, 'eval_f1': 0.6560375929189366, 'eval_runtime': 16.4826, 'eval_samples_per_second': 263.733, 'eval_steps_per_second': 16.502, 'epoch': 13.49}
{'loss': 0.4196, 'grad_norm': 7.788223743438721, 'learning_rate': 4.46903894648361e-06, 'epoch': 13.7}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.1984068155288696, 'eval_accuracy': 0.6747181964573269, 'eval_precision': 0.6492339232423007, 'eval_recall': 0.6747181964573269, 'eval_f1': 0.6597244903929625, 'eval_runtime': 16.5899, 'eval_samples_per_second': 262.027, 'eval_steps_per_second': 16.396, 'epoch': 13.7}
{'loss': 0.4387, 'grad_norm': 6.724644660949707, 'learning_rate': 3.768562622583357e-06, 'epoch': 13.9}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.176538109779358, 'eval_accuracy': 0.671957671957672, 'eval_precision': 0.6495410846417607, 'eval_recall': 0.671957671957672, 'eval_f1': 0.6573449973038292, 'eval_runtime': 16.5979, 'eval_samples_per_second': 261.9, 'eval_steps_per_second': 16.388, 'epoch': 13.9}
{'loss': 0.4168, 'grad_norm': 9.018512725830078, 'learning_rate': 3.0680862986831045e-06, 'epoch': 14.1}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.229907751083374, 'eval_accuracy': 0.6685070163331033, 'eval_precision': 0.6459268896613989, 'eval_recall': 0.6685070163331033, 'eval_f1': 0.6551109854442223, 'eval_runtime': 16.5999, 'eval_samples_per_second': 261.869, 'eval_steps_per_second': 16.386, 'epoch': 14.1}
{'loss': 0.3941, 'grad_norm': 17.215946197509766, 'learning_rate': 2.3676099747828524e-06, 'epoch': 14.31}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.2426756620407104, 'eval_accuracy': 0.6710374971244536, 'eval_precision': 0.6533667641078454, 'eval_recall': 0.6710374971244536, 'eval_f1': 0.6608425557362739, 'eval_runtime': 16.579, 'eval_samples_per_second': 262.199, 'eval_steps_per_second': 16.406, 'epoch': 14.31}
{'loss': 0.4114, 'grad_norm': 6.446820259094238, 'learning_rate': 1.6671336508826e-06, 'epoch': 14.51}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.2294639348983765, 'eval_accuracy': 0.6767885898320681, 'eval_precision': 0.6510649001080419, 'eval_recall': 0.6767885898320681, 'eval_f1': 0.6600466997496496, 'eval_runtime': 16.4818, 'eval_samples_per_second': 263.746, 'eval_steps_per_second': 16.503, 'epoch': 14.51}
{'loss': 0.4169, 'grad_norm': 14.071447372436523, 'learning_rate': 9.666573269823481e-07, 'epoch': 14.72}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.2351536750793457, 'eval_accuracy': 0.6731078904991948, 'eval_precision': 0.6515682991408055, 'eval_recall': 0.6731078904991948, 'eval_f1': 0.6595323372835603, 'eval_runtime': 16.6223, 'eval_samples_per_second': 261.516, 'eval_steps_per_second': 16.364, 'epoch': 14.72}
{'loss': 0.3907, 'grad_norm': 12.692825317382812, 'learning_rate': 2.6618100308209587e-07, 'epoch': 14.92}


  0%|          | 0/272 [00:00<?, ?it/s]

{'eval_loss': 1.242987036705017, 'eval_accuracy': 0.6728778467908902, 'eval_precision': 0.6501326112687558, 'eval_recall': 0.6728778467908902, 'eval_f1': 0.6589296147450722, 'eval_runtime': 16.9984, 'eval_samples_per_second': 255.73, 'eval_steps_per_second': 16.002, 'epoch': 14.92}
{'train_runtime': 8061.8703, 'train_samples_per_second': 72.78, 'train_steps_per_second': 2.276, 'train_loss': 0.6337880610509932, 'epoch': 15.0}


TrainOutput(global_step=18345, training_loss=0.6337880610509932, metrics={'train_runtime': 8061.8703, 'train_samples_per_second': 72.78, 'train_steps_per_second': 2.276, 'total_flos': 3.859548472733184e+16, 'train_loss': 0.6337880610509932, 'epoch': 15.0})