In [1]:
from datasets import Dataset
import pandas as pd
import evaluate
import numpy as np
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding, AutoTokenizer, set_seed
import os
from sklearn.model_selection import train_test_split
from scipy.special import softmax
import argparse
import logging
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import wandb
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import os
from torch.utils.data import Dataset
import torch
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, roc_auc_score, confusion_matrix
from imblearn.under_sampling import RandomUnderSampler
from datasets import Dataset
import numpy as np
import numpy as np
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, confusion_matrix

import pandas as pd,os
import torch
from statistics import mode
from sklearn.metrics import confusion_matrix
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline, Trainer
from datasets import Dataset, load_metric
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def preprocess_function(examples, **fn_kwargs):
    return fn_kwargs['tokenizer'](examples["text"], truncation=True)

def get_data(train_path, test_path, random_seed):
    """
    function to read dataframe with columns
    """

    train_df = pd.read_json(train_path, lines=True)
    test_df = pd.read_json(test_path, lines=True)
   
    return train_df, test_df

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    auc = roc_auc_score(labels, preds)
    cm = confusion_matrix(labels, preds)

    return {
        'accuracy': acc,
        'f1': f1,
        'auc': auc,
        'precision': precision,
        'recall': recall,
        'confusion_matrix': cm.tolist()
    }

def fine_tune(train_df, valid_df, checkpoints_path, id2label, label2id, model):

    # pandas dataframe to huggingface Dataset
    train_dataset = Dataset.from_pandas(train_df)
    valid_dataset = Dataset.from_pandas(valid_df)
    
    # get tokenizer and model from huggingface
    tokenizer = AutoTokenizer.from_pretrained(model)     # put your model here
    model = AutoModelForSequenceClassification.from_pretrained(
       model, num_labels=len(label2id), id2label=id2label, label2id=label2id    # put your model here
    )
    
    # tokenize data for train/valid
    tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True, fn_kwargs={'tokenizer': tokenizer})
    tokenized_valid_dataset = valid_dataset.map(preprocess_function, batched=True,  fn_kwargs={'tokenizer': tokenizer})
    

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


    # create Trainer 
    training_args = TrainingArguments(
        output_dir=checkpoints_path,
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=3,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_valid_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    
    pokemonRojoFuego=trainer.predict(tokenized_valid_dataset)
    print(pokemonRojoFuego)

    # save best model
    best_model_path = checkpoints_path+'/best/'
    
    if not os.path.exists(best_model_path):
        os.makedirs(best_model_path)
    

    trainer.save_model(best_model_path)

def test(test_df, model_path, id2label, label2id):
    
    # load tokenizer from saved model 
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    # load best model
    model = AutoModelForSequenceClassification.from_pretrained(
       model_path, num_labels=len(label2id), id2label=id2label, label2id=label2id
    )
            
    test_dataset = Dataset.from_pandas(test_df)

    tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True,  fn_kwargs={'tokenizer': tokenizer})
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    # create Trainer
    trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    # get logits from predictions and evaluate results using classification report
    predictions = trainer.predict(tokenized_test_dataset)
    prob_pred = softmax(predictions.predictions, axis=-1)
    preds = np.argmax(predictions.predictions, axis=-1)
    metric = evaluate.load("bstrai/classification_report")
    results = metric.compute(predictions=preds, references=predictions.label_ids)
    
    # return dictionary of classification report
    return results, preds

def process_and_balance_dataframes(df):
    if 'label' not in df.columns:
        raise ValueError("DataFrame must contain a 'label' column")

    unique_labels = df['label'].unique()
    balanced_dfs = []

    for label in np.sort(unique_labels):
        
        # Create a binary label DataFrame
        binary_df = df.copy()
        binary_df['label'] = binary_df['label'].apply(lambda x: 1 if x == label else 0)

        # Balancing the dataset
        rus = RandomUnderSampler(random_state=42)
        X = binary_df.drop('label', axis=1)
        y = binary_df['label']
        X_res, y_res = rus.fit_resample(X, y)

        # Creating a balanced DataFrame
        balanced_df = pd.DataFrame(X_res, columns=X.columns)
        balanced_df['label'] = y_res
        balanced_dfs.append(balanced_df)
        print(f'label: {label} - {balanced_df.shape}')

    return balanced_dfs

In [3]:
random_seed = 0

df= pd.read_json('datasets/subtaskB_train.jsonl', lines=True)
df=df.sample(1000)

print(df['label'].value_counts())

onevsall_dfs=process_and_balance_dataframes(df)

i=0

for current_df in onevsall_dfs:
    print(f'\n\nTraining for label {i}')
    print(current_df['label'].value_counts())
    
    train_df, dev_df = train_test_split(current_df, test_size=0.1, random_state=42)

    train_df.to_json('reducedTrainDataFrame.jsonl', orient='records', lines=True)
    dev_df.to_json('reducedTrainDataFrame_dev.jsonl', orient='records', lines=True)

    train_path =  'reducedTrainDataFrame.jsonl'
    test_path =  'reducedTrainDataFrame_dev.jsonl'

    model = 'roberta-base'

    subtask =  'A'
    prediction_path = 'reducedPredictedDataFrame.jsonl'

    if not os.path.exists(train_path):
        logging.error("File doesnt exists: {}".format(train_path))
        raise ValueError("File doesnt exists: {}".format(train_path))

    if not os.path.exists(test_path):
        logging.error("File doesnt exists: {}".format(train_path))
        raise ValueError("File doesnt exists: {}".format(train_path))

    id2label = {0: "not_current_model", 1: "current_model"}
    label2id = {"not_current_model": 0, "current_model": 1}

    set_seed(random_seed)

    train_df, test_df = get_data(train_path, test_path, random_seed)

    fine_tune(train_df, test_df, f"testing_models/{model.split('/')[-1]}_label{i}_0k", id2label, label2id, model)

    i+=1


label
3    178
4    174
1    172
2    169
0    159
5    148
Name: count, dtype: int64
label: 0 - (318, 5)
label: 1 - (344, 5)
label: 2 - (338, 5)
label: 3 - (356, 5)
label: 4 - (348, 5)
label: 5 - (296, 5)


Training for label 0
label
0    159
1    159
Name: count, dtype: int64


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 286/286 [00:00<00:00, 4494.27 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 2282.54 examples/s]
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33malberto-rodero557[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/54 [00:00<?, ?it/s]You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 33%|███▎      | 18/54 [00:04<00:07,  4.65it/s]Trainer is attempting to log a value of "[[11, 5], [1, 15]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                               
 33%|███▎      | 18/54 [00:04<00:07,  4.65it/s]

{'eval_loss': 0.5817766189575195, 'eval_accuracy': 0.8125, 'eval_f1': 0.8333333333333334, 'eval_auc': 0.8125, 'eval_precision': 0.75, 'eval_recall': 0.9375, 'eval_confusion_matrix': [[11, 5], [1, 15]], 'eval_runtime': 0.1643, 'eval_samples_per_second': 194.738, 'eval_steps_per_second': 12.171, 'epoch': 1.0}


 67%|██████▋   | 36/54 [00:10<00:03,  4.65it/s]Trainer is attempting to log a value of "[[16, 0], [2, 14]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                               
 67%|██████▋   | 36/54 [00:10<00:03,  4.65it/s]

{'eval_loss': 0.2547818422317505, 'eval_accuracy': 0.9375, 'eval_f1': 0.9333333333333333, 'eval_auc': 0.9375, 'eval_precision': 1.0, 'eval_recall': 0.875, 'eval_confusion_matrix': [[16, 0], [2, 14]], 'eval_runtime': 0.1648, 'eval_samples_per_second': 194.124, 'eval_steps_per_second': 12.133, 'epoch': 2.0}


100%|██████████| 54/54 [00:15<00:00,  4.67it/s]Trainer is attempting to log a value of "[[16, 0], [2, 14]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                               
100%|██████████| 54/54 [00:15<00:00,  4.67it/s]

{'eval_loss': 0.21345257759094238, 'eval_accuracy': 0.9375, 'eval_f1': 0.9333333333333333, 'eval_auc': 0.9375, 'eval_precision': 1.0, 'eval_recall': 0.875, 'eval_confusion_matrix': [[16, 0], [2, 14]], 'eval_runtime': 0.1629, 'eval_samples_per_second': 196.421, 'eval_steps_per_second': 12.276, 'epoch': 3.0}


100%|██████████| 54/54 [00:17<00:00,  3.14it/s]


{'train_runtime': 18.7682, 'train_samples_per_second': 45.716, 'train_steps_per_second': 2.877, 'train_loss': 0.3860274420844184, 'epoch': 3.0}


100%|██████████| 2/2 [00:00<00:00, 26.13it/s]


PredictionOutput(predictions=array([[ 1.3591527 , -1.367556  ],
       [-2.080223  ,  2.0065427 ],
       [ 1.1978284 , -1.2842716 ],
       [-2.1423118 ,  2.0436175 ],
       [ 1.1823822 , -1.2691592 ],
       [ 1.368689  , -1.437959  ],
       [-2.0632112 ,  1.9494604 ],
       [-2.0471585 ,  2.093024  ],
       [ 1.283997  , -1.3507924 ],
       [ 1.3503917 , -1.4255153 ],
       [ 1.295598  , -1.3111292 ],
       [ 1.2889085 , -1.4252031 ],
       [-2.0277724 ,  2.1149943 ],
       [ 0.93307614, -0.9568824 ],
       [-2.0161204 ,  2.0337534 ],
       [ 1.1350982 , -1.1812134 ],
       [-1.6758199 ,  1.7885774 ],
       [ 1.190765  , -1.2457278 ],
       [-2.1109395 ,  2.099482  ],
       [-2.009266  ,  1.9808086 ],
       [-1.9287232 ,  1.9721607 ],
       [-1.754269  ,  1.6867911 ],
       [-2.0530899 ,  2.0136352 ],
       [ 1.2567849 , -1.3016502 ],
       [ 0.4637889 , -0.61355335],
       [ 0.8646828 , -0.92552537],
       [-2.2327447 ,  2.12434   ],
       [ 0.8192887 , -0.89

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 309/309 [00:00<00:00, 5060.58 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 3330.25 examples/s]
  0%|          | 0/60 [00:00<?, ?it/s]You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 32%|███▏      | 19/60 [00:04<00:09,  4.51it/s]Trainer is attempting to log a value of "[[12, 7], [4, 12]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.6276017427444458, 'eval_accuracy': 0.6857142857142857, 'eval_f1': 0.6857142857142857, 'eval_auc': 0.6907894736842104, 'eval_precision': 0.631578947368421, 'eval_recall': 0.75, 'eval_confusion_matrix': [[12, 7], [4, 12]], 'eval_runtime': 0.1775, 'eval_samples_per_second': 197.218, 'eval_steps_per_second': 16.904, 'epoch': 1.0}


 65%|██████▌   | 39/60 [00:10<00:04,  4.36it/s]Trainer is attempting to log a value of "[[12, 7], [0, 16]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                               
 67%|██████▋   | 40/60 [00:10<00:04,  4.36it/s]

{'eval_loss': 0.5574249625205994, 'eval_accuracy': 0.8, 'eval_f1': 0.8205128205128205, 'eval_auc': 0.8157894736842105, 'eval_precision': 0.6956521739130435, 'eval_recall': 1.0, 'eval_confusion_matrix': [[12, 7], [0, 16]], 'eval_runtime': 0.1768, 'eval_samples_per_second': 197.942, 'eval_steps_per_second': 16.966, 'epoch': 2.0}


 98%|█████████▊| 59/60 [00:15<00:00,  4.48it/s]Trainer is attempting to log a value of "[[16, 3], [1, 15]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                               
100%|██████████| 60/60 [00:16<00:00,  4.48it/s]

{'eval_loss': 0.359159380197525, 'eval_accuracy': 0.8857142857142857, 'eval_f1': 0.8823529411764706, 'eval_auc': 0.8898026315789473, 'eval_precision': 0.8333333333333334, 'eval_recall': 0.9375, 'eval_confusion_matrix': [[16, 3], [1, 15]], 'eval_runtime': 0.1829, 'eval_samples_per_second': 191.315, 'eval_steps_per_second': 16.398, 'epoch': 3.0}


100%|██████████| 60/60 [00:17<00:00,  3.40it/s]


{'train_runtime': 17.6252, 'train_samples_per_second': 52.595, 'train_steps_per_second': 3.404, 'train_loss': 0.5245036443074544, 'epoch': 3.0}


100%|██████████| 3/3 [00:00<00:00, 35.27it/s]


PredictionOutput(predictions=array([[-0.20435828,  0.15633969],
       [-0.7768489 ,  0.250968  ],
       [-1.8090919 ,  1.6144699 ],
       [-1.9305148 ,  1.6774125 ],
       [-0.22724682,  0.06194476],
       [-1.4356672 ,  1.087782  ],
       [ 0.684137  , -0.9207112 ],
       [-1.5524727 ,  1.2944418 ],
       [ 1.5878452 , -1.4768381 ],
       [-1.7457817 ,  1.4313656 ],
       [-1.7523235 ,  1.5552132 ],
       [ 1.5410403 , -1.4765381 ],
       [ 1.4940193 , -1.5232819 ],
       [-0.924099  ,  0.40566328],
       [-1.7242949 ,  1.4463938 ],
       [ 0.8200089 , -0.87617326],
       [ 0.5484975 , -0.66233194],
       [-1.8922565 ,  1.693135  ],
       [-1.800823  ,  1.6714047 ],
       [-0.593021  ,  0.17281239],
       [ 0.46311155, -0.6121122 ],
       [ 0.20105484, -0.38297418],
       [-1.5593336 ,  1.2955502 ],
       [ 0.16226296, -0.35238618],
       [ 0.80712175, -0.892857  ],
       [-1.7144881 ,  1.4537184 ],
       [ 0.19975235, -0.24556567],
       [-1.6151897 ,  1.35

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 304/304 [00:00<00:00, 6531.38 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 3364.63 examples/s]
  0%|          | 0/57 [00:00<?, ?it/s]You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 33%|███▎      | 19/57 [00:04<00:08,  4.51it/s]Trainer is attempting to log a value of "[[2, 18], [0, 14]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.6751080751419067, 'eval_accuracy': 0.47058823529411764, 'eval_f1': 0.6086956521739131, 'eval_auc': 0.55, 'eval_precision': 0.4375, 'eval_recall': 1.0, 'eval_confusion_matrix': [[2, 18], [0, 14]], 'eval_runtime': 0.1716, 'eval_samples_per_second': 198.114, 'eval_steps_per_second': 17.481, 'epoch': 1.0}


 67%|██████▋   | 38/57 [00:10<00:04,  4.52it/s]Trainer is attempting to log a value of "[[8, 12], [0, 14]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
                                               
 67%|██████▋   | 38/57 [00:10<00:04,  4.52it/s]

{'eval_loss': 0.635382354259491, 'eval_accuracy': 0.6470588235294118, 'eval_f1': 0.7000000000000001, 'eval_auc': 0.7, 'eval_precision': 0.5384615384615384, 'eval_recall': 1.0, 'eval_confusion_matrix': [[8, 12], [0, 14]], 'eval_runtime': 0.1738, 'eval_samples_per_second': 195.584, 'eval_steps_per_second': 17.257, 'epoch': 2.0}


100%|██████████| 57/57 [00:15<00:00,  4.67it/s]Trainer is attempting to log a value of "[[13, 7], [0, 14]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.

100%|██████████| 57/57 [00:15<00:00,  4.67it/s]

{'eval_loss': 0.5640297532081604, 'eval_accuracy': 0.7941176470588235, 'eval_f1': 0.8, 'eval_auc': 0.825, 'eval_precision': 0.6666666666666666, 'eval_recall': 1.0, 'eval_confusion_matrix': [[13, 7], [0, 14]], 'eval_runtime': 0.1852, 'eval_samples_per_second': 183.593, 'eval_steps_per_second': 16.199, 'epoch': 3.0}


100%|██████████| 57/57 [00:17<00:00,  3.34it/s]


{'train_runtime': 17.0708, 'train_samples_per_second': 53.424, 'train_steps_per_second': 3.339, 'train_loss': 0.6081049065840872, 'epoch': 3.0}


100%|██████████| 3/3 [00:00<00:00, 34.86it/s]


PredictionOutput(predictions=array([[ 0.08637703, -0.08790345],
       [-0.6629512 ,  0.9621987 ],
       [-0.06899299,  0.05285936],
       [ 0.5083303 , -0.42344975],
       [-0.92346716,  1.2016282 ],
       [-0.8169539 ,  0.9896983 ],
       [ 0.17533414, -0.18819079],
       [-0.39926964,  0.52405214],
       [-0.20780736,  0.3423396 ],
       [ 0.1667374 , -0.12906155],
       [ 0.02122703,  0.02909597],
       [-0.85111547,  1.1006504 ],
       [-0.74926096,  1.0284059 ],
       [-0.02114723,  0.1544601 ],
       [-0.04533643,  0.10110208],
       [ 0.1436456 , -0.12348482],
       [-0.10289803,  0.16720676],
       [-0.33224073,  0.53215694],
       [-0.5862348 ,  0.85472804],
       [ 0.10874398, -0.0365673 ],
       [ 0.03913756,  0.11373958],
       [-0.09728011,  0.15510148],
       [-0.7860497 ,  1.0730821 ],
       [ 0.1402562 , -0.13215916],
       [ 0.09247153, -0.09143364],
       [ 0.14592904, -0.14396968],
       [-0.36664566,  0.48463708],
       [-0.19152954,  0.30

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 320/320 [00:00<00:00, 5920.84 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 3424.93 examples/s]
  0%|          | 0/60 [00:00<?, ?it/s]You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 33%|███▎      | 20/60 [00:04<00:08,  4.47it/s]Trainer is attempting to log a value of "[[17, 1], [13, 5]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.6897968649864197, 'eval_accuracy': 0.6111111111111112, 'eval_f1': 0.4166666666666667, 'eval_auc': 0.611111111111111, 'eval_precision': 0.8333333333333334, 'eval_recall': 0.2777777777777778, 'eval_confusion_matrix': [[17, 1], [13, 5]], 'eval_runtime': 0.1869, 'eval_samples_per_second': 192.651, 'eval_steps_per_second': 16.054, 'epoch': 1.0}


 67%|██████▋   | 40/60 [00:10<00:04,  4.45it/s]Trainer is attempting to log a value of "[[9, 9], [6, 12]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.

 67%|██████▋   | 40/60 [00:10<00:04,  4.45it/s]

{'eval_loss': 0.672661304473877, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.6153846153846153, 'eval_auc': 0.5833333333333333, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.6666666666666666, 'eval_confusion_matrix': [[9, 9], [6, 12]], 'eval_runtime': 0.1823, 'eval_samples_per_second': 197.447, 'eval_steps_per_second': 16.454, 'epoch': 2.0}


100%|██████████| 60/60 [00:16<00:00,  4.47it/s]Trainer is attempting to log a value of "[[6, 12], [3, 15]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.

100%|██████████| 60/60 [00:16<00:00,  4.47it/s]

{'eval_loss': 0.6580802798271179, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.6666666666666667, 'eval_auc': 0.5833333333333335, 'eval_precision': 0.5555555555555556, 'eval_recall': 0.8333333333333334, 'eval_confusion_matrix': [[6, 12], [3, 15]], 'eval_runtime': 0.179, 'eval_samples_per_second': 201.097, 'eval_steps_per_second': 16.758, 'epoch': 3.0}


100%|██████████| 60/60 [00:18<00:00,  3.31it/s]


{'train_runtime': 18.1552, 'train_samples_per_second': 52.877, 'train_steps_per_second': 3.305, 'train_loss': 0.6723592122395833, 'epoch': 3.0}


100%|██████████| 3/3 [00:00<00:00, 33.32it/s]


PredictionOutput(predictions=array([[ 0.00891186,  0.03057183],
       [-0.16328064,  0.16737331],
       [-0.06584411,  0.11730452],
       [-0.13337278,  0.14698479],
       [ 0.1968729 , -0.17205904],
       [ 0.04135403, -0.0206851 ],
       [ 0.11520058, -0.08736792],
       [-0.00839227,  0.03754065],
       [ 0.09774699, -0.07137944],
       [-0.24098292,  0.27013865],
       [-0.09760526,  0.09771747],
       [-0.10875738,  0.12566149],
       [-0.05143604,  0.06832202],
       [-0.22114326,  0.22992933],
       [-0.11453638,  0.15631332],
       [-0.26102144,  0.30380854],
       [ 0.04067517,  0.00239846],
       [ 0.06739908, -0.03980219],
       [-0.02638169,  0.06701081],
       [-0.19351128,  0.22124003],
       [ 0.01260625,  0.02673431],
       [ 0.00066426,  0.03971876],
       [-0.22268729,  0.22999252],
       [-0.01811118,  0.00090532],
       [-0.18239923,  0.21142544],
       [-0.20199145,  0.19984998],
       [-0.18713191,  0.223202  ],
       [-0.21647123,  0.21

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 313/313 [00:00<00:00, 6021.41 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 3685.22 examples/s]
  0%|          | 0/60 [00:00<?, ?it/s]You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 33%|███▎      | 20/60 [00:04<00:07,  5.04it/s]Trainer is attempting to log a value of "[[14, 7], [0, 14]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.6063339114189148, 'eval_accuracy': 0.8, 'eval_f1': 0.8, 'eval_auc': 0.8333333333333334, 'eval_precision': 0.6666666666666666, 'eval_recall': 1.0, 'eval_confusion_matrix': [[14, 7], [0, 14]], 'eval_runtime': 0.1691, 'eval_samples_per_second': 207.024, 'eval_steps_per_second': 17.745, 'epoch': 1.0}


 67%|██████▋   | 40/60 [00:10<00:03,  5.07it/s]Trainer is attempting to log a value of "[[20, 1], [3, 11]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.

 67%|██████▋   | 40/60 [00:10<00:03,  5.07it/s]

{'eval_loss': 0.29131242632865906, 'eval_accuracy': 0.8857142857142857, 'eval_f1': 0.8461538461538461, 'eval_auc': 0.869047619047619, 'eval_precision': 0.9166666666666666, 'eval_recall': 0.7857142857142857, 'eval_confusion_matrix': [[20, 1], [3, 11]], 'eval_runtime': 0.1691, 'eval_samples_per_second': 207.006, 'eval_steps_per_second': 17.743, 'epoch': 2.0}


100%|██████████| 60/60 [00:16<00:00,  5.15it/s]Trainer is attempting to log a value of "[[20, 1], [1, 13]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.

100%|██████████| 60/60 [00:16<00:00,  5.15it/s]

{'eval_loss': 0.16950753331184387, 'eval_accuracy': 0.9428571428571428, 'eval_f1': 0.9285714285714286, 'eval_auc': 0.9404761904761904, 'eval_precision': 0.9285714285714286, 'eval_recall': 0.9285714285714286, 'eval_confusion_matrix': [[20, 1], [1, 13]], 'eval_runtime': 0.1686, 'eval_samples_per_second': 207.594, 'eval_steps_per_second': 17.794, 'epoch': 3.0}


100%|██████████| 60/60 [00:17<00:00,  3.39it/s]


{'train_runtime': 17.6862, 'train_samples_per_second': 53.092, 'train_steps_per_second': 3.392, 'train_loss': 0.4370721181233724, 'epoch': 3.0}


100%|██████████| 3/3 [00:00<00:00, 37.95it/s]


PredictionOutput(predictions=array([[-1.9490178 ,  2.0316265 ],
       [ 1.5227078 , -1.3635459 ],
       [-1.9340832 ,  2.0830195 ],
       [-1.3084121 ,  1.4560884 ],
       [ 1.9918504 , -1.7708483 ],
       [ 1.0270512 , -0.68448704],
       [-1.8431525 ,  2.0472884 ],
       [-1.3297827 ,  1.342704  ],
       [-0.24730961,  0.25219274],
       [ 1.2789073 , -1.1467907 ],
       [ 1.226227  , -1.1811454 ],
       [ 1.343309  , -1.2062926 ],
       [-1.7623217 ,  2.0356493 ],
       [ 1.9279035 , -1.6674129 ],
       [-0.26075602,  0.5075524 ],
       [ 1.3189132 , -1.0602943 ],
       [-1.464678  ,  1.6872567 ],
       [ 1.1104039 , -0.88766855],
       [ 1.2304451 , -1.0232836 ],
       [-0.16647765,  0.32270843],
       [-1.9029602 ,  2.0538309 ],
       [ 1.1386906 , -1.035981  ],
       [ 1.1395421 , -1.185952  ],
       [-0.10335716,  0.30097386],
       [ 1.2232317 , -1.0556604 ],
       [ 1.248432  , -1.2002201 ],
       [ 1.2324513 , -0.9984324 ],
       [ 0.49405032, -0.34

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 266/266 [00:00<00:00, 5012.11 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 3526.30 examples/s]
  0%|          | 0/51 [00:00<?, ?it/s]You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 33%|███▎      | 17/51 [00:03<00:06,  4.98it/s]Trainer is attempting to log a value of "[[11, 2], [2, 15]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.6476290225982666, 'eval_accuracy': 0.8666666666666667, 'eval_f1': 0.8823529411764706, 'eval_auc': 0.8642533936651583, 'eval_precision': 0.8823529411764706, 'eval_recall': 0.8823529411764706, 'eval_confusion_matrix': [[11, 2], [2, 15]], 'eval_runtime': 0.1551, 'eval_samples_per_second': 193.422, 'eval_steps_per_second': 12.895, 'epoch': 1.0}


 67%|██████▋   | 34/51 [00:09<00:03,  4.96it/s]Trainer is attempting to log a value of "[[11, 2], [0, 17]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.

 67%|██████▋   | 34/51 [00:09<00:03,  4.96it/s]

{'eval_loss': 0.28423231840133667, 'eval_accuracy': 0.9333333333333333, 'eval_f1': 0.9444444444444444, 'eval_auc': 0.9230769230769231, 'eval_precision': 0.8947368421052632, 'eval_recall': 1.0, 'eval_confusion_matrix': [[11, 2], [0, 17]], 'eval_runtime': 0.1549, 'eval_samples_per_second': 193.711, 'eval_steps_per_second': 12.914, 'epoch': 2.0}


100%|██████████| 51/51 [00:14<00:00,  4.97it/s]Trainer is attempting to log a value of "[[11, 2], [0, 17]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.

100%|██████████| 51/51 [00:14<00:00,  4.97it/s]

{'eval_loss': 0.21711711585521698, 'eval_accuracy': 0.9333333333333333, 'eval_f1': 0.9444444444444444, 'eval_auc': 0.9230769230769231, 'eval_precision': 0.8947368421052632, 'eval_recall': 1.0, 'eval_confusion_matrix': [[11, 2], [0, 17]], 'eval_runtime': 0.1549, 'eval_samples_per_second': 193.685, 'eval_steps_per_second': 12.912, 'epoch': 3.0}


100%|██████████| 51/51 [00:15<00:00,  3.25it/s]


{'train_runtime': 15.697, 'train_samples_per_second': 50.838, 'train_steps_per_second': 3.249, 'train_loss': 0.4747910219080308, 'epoch': 3.0}


100%|██████████| 2/2 [00:00<00:00, 30.05it/s]


PredictionOutput(predictions=array([[-1.1500847 ,  1.1474271 ],
       [-0.7840526 ,  0.6739131 ],
       [ 1.8492463 , -1.9941918 ],
       [ 1.813197  , -1.9462798 ],
       [-0.8954138 ,  0.77748954],
       [ 1.7625248 , -1.8589479 ],
       [ 1.6846746 , -1.9103608 ],
       [-1.2534664 ,  1.3212765 ],
       [-1.0698038 ,  1.0213008 ],
       [-0.9071823 ,  0.8377258 ],
       [-1.0452782 ,  1.0803397 ],
       [-1.0825732 ,  1.1000346 ],
       [ 1.7938638 , -1.825214  ],
       [-1.1138895 ,  1.1410542 ],
       [-1.2842449 ,  1.287307  ],
       [ 1.6619502 , -1.5945144 ],
       [-0.9776328 ,  0.87743306],
       [-1.1470191 ,  1.2442986 ],
       [-0.92999357,  0.9439759 ],
       [ 1.7240598 , -1.9179192 ],
       [ 1.7239888 , -1.7073314 ],
       [-0.67987186,  0.7178457 ],
       [-1.2051665 ,  1.1256298 ],
       [ 1.753485  , -1.8150482 ],
       [ 1.8350328 , -1.8648995 ],
       [ 1.221591  , -1.1349193 ],
       [-0.86628574,  0.94437146],
       [-0.9059453 ,  1.05