# BERT models cross-validation pipeline

### Library import and auxiliary function for cross-validation approach

In [1]:
!pip install transformers
!pip install -U sentence-transformers
!pip install datasets
#install nltk emoji library to be used with normalizeTweet()
!pip install nltk emoji==0.6.0
!pip install evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m31.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m66.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.0 tokenizers-0.13.2 transformers-4.26.1
Looking in indexes: https://pypi.org/simple, http

In [3]:
import pandas as pd
import numpy as np
import torch

import transformers
from datasets import Dataset
from datasets.table import Table
import pyarrow as pa
import pyarrow.dataset as ds
from datasets import Dataset , DatasetDict

from evaluate import load
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, f1_score
from sklearn.model_selection import StratifiedKFold, ParameterGrid,  GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer

metric = load('glue','sst2')

import logging
logging.basicConfig(level=logging.ERROR)

from TweetNormalize import normalizeTweet

import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('omw-1.4')

from nltk.corpus import stopwords, wordnet
from nltk import word_tokenize, pos_tag
from nltk.stem.wordnet import WordNetLemmatizer

import spacy
import re
nlp = spacy.load('en_core_web_sm')

stopword_list = stopwords.words('english')

doc_counter = 0
def reset_counter():
  global doc_counter
  doc_counter = 0

def increase_counter():
  global doc_counter
  doc_counter += 1
  if doc_counter % 100 == 0:
    print(doc_counter)

def spacy_nlp_tokenizer(text):
    increase_counter()

    # substituting all space characters with a single space
    text = re.sub('\s+', ' ', text)

    # we use spacy for main nlp tasks
    doc = nlp(text)
    # lemmatized tokens, skipping stopwords
    lemmas = ['LEMMA_'+token.lemma_ for token in doc if not token.is_stop]
    # entity_types
    entity_types = ['NER_'+token.ent_type_ for token in doc if token.ent_type_]

    # in case an entity linker is available, we can use it do put actual entities as
    # features, e.g. Queen Elizabeth, Elizabeth II, Her Majesty -> KB2912
    # see https://spacy.io/usage/training#entity-linker
    # entities = ['ENT_'+token.ent_kb_id_ for token in doc if token.ent_kb_id_]

    # we use a simple nltk function to create ngrams
    lemma_bigrams = ['BI_'+p1+'_'+p2 for p1,p2 in nltk.ngrams(lemmas,2)]
    lemma_trigrams = ['TRI_'+p1+'_'+p2+'_'+p3 for p1,p2,p3 in nltk.ngrams(lemmas,3)]

    all_tokens = list()
    all_tokens.extend(lemmas)
    all_tokens.extend(lemma_bigrams)
    all_tokens.extend(lemma_trigrams)
    all_tokens.extend(entity_types)
    return all_tokens

from EDA_AUG import eda_4

# Setting up the device for GPU usage

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print('Is GPU available for usage?', torch.cuda.is_available())
print("How many devices available for 'cuda'?", torch.cuda.device_count())

Downloading builder script:   0%|          | 0.00/5.75k [00:00<?, ?B/s]

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


Is GPU available for usage? True
How many devices available for 'cuda'? 1


  words = [word for word in words if word is not '']
  words = [word for word in words if word is not '']


In [4]:
#import datasets
df_en_train = pd.read_csv("/content/data_sets/corrected_df.csv", sep=',', header=0)
df_en_test = pd.read_csv("/content/data_sets/en_testing_labeled.tsv", sep='\t', header=0)

df_en_train

print(df_en_train.misogyny_category.value_counts())

x_train_binary = [normalizeTweet(i) for i in df_en_train['text']]
y_train_binary = [i for i in df_en_train['misogynous']]
x_test_binary = [normalizeTweet(i) for i in df_en_test['text']]
y_test_binary = [i for i in df_en_test['misogynous']]


df_en_train.drop(df_en_train[df_en_train['misogyny_category'] == '0'].index, inplace = True)
df_en_test.drop(df_en_test[df_en_test['misogyny_category'] == '0'].index, inplace = True)


x_train_multi = [normalizeTweet(i) for i in df_en_train['text']]
y_train_multi = [i for i in df_en_train['misogyny_category']]
x_test_multi = [normalizeTweet(i) for i in df_en_test['text']]
y_test_multi = [i for i in df_en_test['misogyny_category']]




0                    2254
discredit             982
sexual_harassment     334
stereotype            176
dominance             145
derailing              90
Name: misogyny_category, dtype: int64


In [5]:
#define a label encoding for multi_class categories
ordered_labels = sorted(list(set(y_train_multi)))
label_dict = {}
k=0
for i in ordered_labels:
  label_dict[i] = k
  k+=1

label_dict

y_train_multi = [label_dict[i] for i in df_en_train['misogyny_category']]
y_test_multi = [label_dict[i] for i in df_en_test['misogyny_category']]

In [6]:
label_dict

{'derailing': 0,
 'discredit': 1,
 'dominance': 2,
 'sexual_harassment': 3,
 'stereotype': 4}

In [7]:
#create dataframes from the normalized tweets

df_train_binary = pd.DataFrame(data = {'Text': x_train_binary, 'label': y_train_binary},)
df_test_binary = pd.DataFrame(data = {'Text': x_test_binary, 'label': y_test_binary},)
df_train_multi = pd.DataFrame(data = {'Text': x_train_multi, 'label': y_train_multi},)
df_test_multi = pd.DataFrame(data = {'Text': x_test_multi, 'label': y_test_multi},)

display(df_train_binary.head())
display(df_train_multi.head())

Unnamed: 0,Text,label
0,Please tell me why the bitch next to me in the...,1
1,@USER @USER Bitch shut the fuck up,1
2,"@USER Dear cunt , please shut the fuck up .",1
3,RT @USER : Pls shut the fuck up bitch,1
4,"RT @USER : "" when u gonna get your license "" S...",1


Unnamed: 0,Text,label
0,Please tell me why the bitch next to me in the...,2
1,@USER @USER Bitch shut the fuck up,2
2,"@USER Dear cunt , please shut the fuck up .",2
3,RT @USER : Pls shut the fuck up bitch,2
4,"RT @USER : "" when u gonna get your license "" S...",2


In [8]:
#function used to perform BERTweet tokenization

def support_tokenizer(df, tokenizer, max_length = 256):
  
  input_ids = []
  token_type_ids = []
  attention_mask = []

  for sentence in df['Text']:
              #for each sentence, perform a tokenization compatible with bert models
              #getting input_ids, token_type_ids and attention_mask
              sent_tok = tokenizer.encode_plus(sentence, padding="max_length", \
                                              truncation=True, 
                                              add_special_tokens=True,
                                              max_length=256,
                                              pad_to_max_length=True,
                                              return_token_type_ids=True)
              

              input_ids.append(sent_tok['input_ids'])
              token_type_ids.append(sent_tok['token_type_ids'])
              attention_mask.append(sent_tok['attention_mask'])


  #crete a dataframe with the original information + the new tokenized data
  tok_df = pd.DataFrame({'Text': df['Text'], 'label': df['label'],
                         'input_ids': input_ids,'token_type_ids':token_type_ids,
                         'attention_mask':attention_mask})
  return tok_df
    

In [9]:
#convert a pandas dataframe to a HuggingFace compatible one
def convert_hg_dataset(df):
  return Dataset(pa.Table.from_pandas(df))

In [10]:
from transformers import RobertaTokenizer, AutoTokenizer

In [11]:
#define two different tokenizers, one for roberta and the other for bertweet

#roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base', truncation=True, do_lower_case=True)
bertweet_tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False, truncation = True, do_lower_case = True)

Downloading (…)lve/main/config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/843k [00:00<?, ?B/s]

Downloading (…)solve/main/bpe.codes:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [12]:
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, RobertaModel


In [13]:
#download pre-trained models

#model_roberta = AutoModelForSequenceClassification.from_pretrained("roberta-base")
model_bertweet = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base")

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'roberta.pooler.dense.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: 

In [14]:
#exploit sklearn libraries in order to have a classification report 
#these functions will be used combined with HugginFace training and testing transformers libraries

def compute_metrics_binary(eval_pred):

    predictions, labels = eval_pred
    pred = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(y_true=labels, y_pred=pred )
    recall = recall_score(y_true=labels, y_pred=pred, )
    precision = precision_score(y_true=labels, y_pred=pred )
    f1 = f1_score(y_true=labels, y_pred=pred)    

    print(classification_report(labels, pred))

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

def compute_metrics_multi(eval_pred):
  
    predictions, labels = eval_pred
    pred = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    recall = recall_score(y_true=labels, y_pred=pred, average = None )
    precision = precision_score(y_true=labels, y_pred=pred, average = None )
    f1 = f1_score(y_true=labels, y_pred=pred, average = None)    

    print(classification_report(labels, pred))

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

In [None]:
#define a function that given a model_name from transformer library, performs a  k fold cross validation 
#for binary and multi class problems, given a parameters setting in input

#the function always uses stratified cross validation for the classification problems

def transformer_crossval(model_name, params, dataframe, tokenizer, k=5, problem = 'binary', random_state = 42):

  if problem == 'binary':
    compute_metrics = compute_metrics_binary
  if problem == 'multi':
    compute_metrics = compute_metrics_multi

  skf = StratifiedKFold(n_splits=k, shuffle=True, random_state = random_state)
  #definining a precise random state allows us to keep the same folds for different intantiations of the same function

  X = dataframe['Text'] #consider the texts
  y = dataframe['label'] #consider the labels

  num_labels = len(set(y))

  ordered_train_sets = []
  ordered_validation_sets = []

  tokenize_func = lambda sentences: tokenizer(sentences['Text'], \
                                            padding="max_length", \
                                            truncation=True, 
                                            )

  for train_index, test_index in skf.split(X, y):
    
    #for each split done by skf.split(X, y), convert the pandas dataframe into a hg_dataset
    #and perform tokenization on it.
    #then save it in appropriate lists.

    train_df = pd.DataFrame(data = {'Text': X.iloc[train_index, ], 'label': y.iloc[train_index]},)
    validation_df = pd.DataFrame(data = {'Text': X.iloc[test_index, ], 'label': y.iloc[test_index]},)

    train_df = support_tokenizer(train_df, tokenizer, max_length = 256)
    validation_df = support_tokenizer(validation_df, tokenizer, max_length = 256)
    

    train_df = convert_hg_dataset(train_df)
    validation_df = convert_hg_dataset(validation_df)

    ordered_train_sets.append(train_df)
    ordered_validation_sets.append(validation_df)


  loss_metrics_validation = []
  accuracy_metrics_validation = []
  f1_metrics_validation = []

  for training_set, validation_set in zip(ordered_train_sets,ordered_validation_sets):
    print(ordered_train_sets[0])
    print(ordered_validation_sets[0]) 

    #for each training and validation set, we perform a training and a validation
    #with our model, saving the accuracy and validation score 

    training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs= params['epoch'],             # total number of training epochs
    per_device_train_batch_size= params['per_device_train_batch_size'],  # batch size per device during training
    per_device_eval_batch_size= params['per_device_validation_batch_size'],   # batch size for evaluation
    warmup_steps= params['warmup_steps'],                # number of warmup steps for learning rate scheduler
    weight_decay= params['weight_decay'],               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=20,
    learning_rate = params['learning_rate']

)
    
        
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels= num_labels).to(device)   

    trainer = Trainer(
    model= model, 
    args= training_args, 
    train_dataset=training_set,
    eval_dataset=validation_set,
    compute_metrics=compute_metrics,
    )     
    
    trainer.train()

    evaluations = trainer.evaluate()

    print(evaluations) #############################

    loss_metrics_validation.append(evaluations['eval_loss'])
    
    accuracy_metrics_validation.append(evaluations['eval_accuracy'])
    f1_metrics_validation.append(evaluations['eval_f1'].mean())

  result_dict = {
  'avg_loss' : np.array(loss_metrics_validation).mean(),
  'std_loss' : np.array(loss_metrics_validation).std(),
  'avg_accuracy' : np.array(accuracy_metrics_validation).mean(),
  'std_accuracy' : np.array(accuracy_metrics_validation).std(),
  'avg_f1' : np.array(f1_metrics_validation).mean(),
  'std_f1' : np.array(f1_metrics_validation).std(),
  }

  return result_dict

In [None]:
#define a function which fit a transformer model to a dataframe and reports results given the test

def transformer_fit_predict(model_name, params, dataframe_training, dataframe_test, tokenizer, problem = 'binary', random_state = 42):

  if problem == 'binary':
    compute_metrics = compute_metrics_binary
  if problem == 'multi':
    compute_metrics = compute_metrics_multi

  y = dataframe_training['label'] #consider the labels

  num_labels = len(set(y))

  tokenize_func = lambda sentences: tokenizer(sentences['Text'], \
                                            padding="max_length", \
                                            truncation=True, 
                                            )

  train_df = support_tokenizer(dataframe_training, tokenizer, max_length = 256)
  test_df = support_tokenizer(dataframe_test, tokenizer, max_length = 256)

  train_df = convert_hg_dataset(train_df)
  test_df = convert_hg_dataset(test_df)

  training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs= params['epoch'],             # total number of training epochs
    per_device_train_batch_size= params['per_device_train_batch_size'],  # batch size per device during training
    per_device_eval_batch_size= params['per_device_validation_batch_size'],   # batch size for evaluation
    warmup_steps= params['warmup_steps'],                # number of warmup steps for learning rate scheduler
    weight_decay= params['weight_decay'],               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=20,
    learning_rate = params['learning_rate'])
  
    
        
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels= num_labels).to(device)   

  trainer = Trainer(
    model= model, 
    args= training_args, 
    train_dataset=train_df,
    eval_dataset=test_df,
    compute_metrics=compute_metrics,
)     
  trainer.train()

  evaluations = trainer.evaluate()

  print(evaluations)

  return model, evaluations

## Stratified cross-validation with BERTweet for binary class

In [None]:
param_grid = {'epoch': [5], 
              'per_device_train_batch_size' : [16],
              'per_device_validation_batch_size' : [8,16],
              'warmup_steps' : [500],
              'learning_rate' : [1e-05,3e-5],
              'weight_decay' : [0.0001, 0.01]
              }


grid = ParameterGrid(param_grid)

updated_grid = []

for i in grid:
  if i['per_device_train_batch_size'] > i['per_device_validation_batch_size']:
    updated_grid.append(i)


#specify the different possible parameters configuration to test
for x in updated_grid:
  print(x)

{'epoch': 5, 'learning_rate': 1e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}
{'epoch': 5, 'learning_rate': 1e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.01}
{'epoch': 5, 'learning_rate': 3e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}
{'epoch': 5, 'learning_rate': 3e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.01}


### NOTE: outputs for the entire cross-validation procedure are not reported due to the difficulty of running it in a single colab session; best configuration for all cases is reported in the next cell every time transformer_crossval() is called

In [None]:
#test different configurations for the binary case

results = []

for i in range(len(updated_grid)):

  print('Testing with configuration: \n')
  print(updated_grid[i], end = '\n\n')

  #tuple_result = (avg_loss, std_loss, avg_accuracy, std_accuracy,)
  tuple_result = transformer_crossval("vinai/bertweet-base" , updated_grid[i], df_train_binary, bertweet_tokenizer,k=5, problem = 'binary')
  result = (updated_grid[i], tuple_result)
  results.append(result)
  print(tuple_result)


In [None]:
best_config = {'weight_decay': 0.0001, 'warmup_steps': 500, 'per_device_validation_batch_size': 8, 'per_device_train_batch_size': 16, 'learning_rate': 3e-05, 'epoch': 5}


In [None]:
transformer_fit_predict("vinai/bertweet-base" , best_config, df_train_binary, df_test_binary, bertweet_tokenizer, problem = 'binary')




Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: 

Step,Training Loss
20,0.6899
40,0.6892
60,0.6831
80,0.684
100,0.673
120,0.6619
140,0.613
160,0.5382
180,0.5015
200,0.4954


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-1000
Configuration saved in ./results/checkpoint-1000/config.json
Model weights saved in ./results/checkpoint-1000/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text. If Text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 8


              precision    recall  f1-score   support

           0       0.76      0.75      0.75       540
           1       0.71      0.72      0.71       460

    accuracy                           0.73      1000
   macro avg       0.73      0.73      0.73      1000
weighted avg       0.73      0.73      0.73      1000

{'eval_loss': 1.1440918445587158, 'eval_accuracy': 0.734, 'eval_precision': 0.709051724137931, 'eval_recall': 0.7152173913043478, 'eval_f1': 0.7121212121212122, 'eval_runtime': 14.8546, 'eval_samples_per_second': 67.319, 'eval_steps_per_second': 8.415, 'epoch': 5.0}


(RobertaForSequenceClassification(
   (roberta): RobertaModel(
     (embeddings): RobertaEmbeddings(
       (word_embeddings): Embedding(64001, 768, padding_idx=1)
       (position_embeddings): Embedding(130, 768, padding_idx=1)
       (token_type_embeddings): Embedding(1, 768)
       (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
       (dropout): Dropout(p=0.1, inplace=False)
     )
     (encoder): RobertaEncoder(
       (layer): ModuleList(
         (0): RobertaLayer(
           (attention): RobertaAttention(
             (self): RobertaSelfAttention(
               (query): Linear(in_features=768, out_features=768, bias=True)
               (key): Linear(in_features=768, out_features=768, bias=True)
               (value): Linear(in_features=768, out_features=768, bias=True)
               (dropout): Dropout(p=0.1, inplace=False)
             )
             (output): RobertaSelfOutput(
               (dense): Linear(in_features=768, out_features=768, bias=True)


## Stratified cross-validation with BERTweet for multi-class

---



In [None]:
#test different configurations and

results = []

for i in range(len(updated_grid)):

  print('Testing with configuration: \n')
  print(updated_grid[i], end = '\n\n')

  #tuple_result = (avg_loss, std_loss, avg_accuracy, std_accuracy,)
  tuple_result = transformer_crossval("vinai/bertweet-base" , updated_grid[i], df_train_multi, bertweet_tokenizer,k=5, problem = 'multi')
  result = (updated_grid[i], tuple_result)
  results.append(result)
  print(tuple_result)


In [None]:
#crossvalidation results for multi-class case BERTweet [NO AUGMENTATION]
{'epoch': 5, 'learning_rate': 1e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}
{'avg_loss': 0.9555049061775207, 'std_loss': 0.01990089166461212, 'avg_accuracy': 0.6688045572589427, 'std_accuracy': 0.020259804124640655, 'avg_f1': 0.41354925905999673, 'std_f1': 0.03231382637080065}

{'epoch': 5, 'learning_rate': 1e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.01}
{'avg_loss': 0.954667842388153, 'std_loss': 0.01865113807747386, 'avg_accuracy': 0.674584904079752, 'std_accuracy': 0.009966775463470062, 'avg_f1': 0.4224969606350145, 'std_f1': 0.02941109302009539}


{'epoch': 5, 'learning_rate': 3e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}
{'avg_loss': 0.949773371219635, 'std_loss': 0.03147020396051596, 'avg_accuracy': 0.682685766943118, 'std_accuracy': 0.015633047387544417, 'avg_f1': 0.5463946231350254, 'std_f1': 0.015116808746339593}


{'epoch': 5, 'learning_rate': 3e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.01}
{'avg_loss': 0.9542999863624573, 'std_loss': 0.03613993268974634, 'avg_accuracy': 0.686747088883304, 'std_accuracy': 0.015550506742450145, 'avg_f1': 0.540460741372458, 'std_f1': 0.01830868482157618}







{'avg_loss': 0.9542999863624573,
 'std_loss': 0.03613993268974634,
 'avg_accuracy': 0.686747088883304,
 'std_accuracy': 0.015550506742450145,
 'avg_f1': 0.540460741372458,
 'std_f1': 0.01830868482157618}

In [None]:
#test result with the best parameter configuration

best_configuration = {'epoch': 5, 'learning_rate': 3e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}

transformer_fit_predict("vinai/bertweet-base", best_configuration, df_train_multi, df_test_multi, bertweet_tokenizer, problem = 'multi')




PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label

Step,Training Loss
20,1.5976
40,1.5474
60,1.4559
80,1.4141
100,1.2651
120,1.2729
140,1.1893
160,1.2213
180,1.1451
200,1.0814


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text. If Text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 460
  Batch size = 8


Trainer is attempting to log a value of "[0.25       0.60199005 0.81333333 0.57407407 0.91803279]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.18181818 0.85815603 0.49193548 0.70454545 0.8       ]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.21052632 0.70760234 0.61306533 0.63265306 0.85496183]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


              precision    recall  f1-score   support

           0       0.25      0.18      0.21        11
           1       0.60      0.86      0.71       141
           2       0.81      0.49      0.61       124
           3       0.57      0.70      0.63        44
           4       0.92      0.80      0.85       140

    accuracy                           0.71       460
   macro avg       0.63      0.61      0.60       460
weighted avg       0.74      0.71      0.71       460

{'eval_loss': 0.920255720615387, 'eval_accuracy': 0.7108695652173913, 'eval_precision': array([0.25      , 0.60199005, 0.81333333, 0.57407407, 0.91803279]), 'eval_recall': array([0.18181818, 0.85815603, 0.49193548, 0.70454545, 0.8       ]), 'eval_f1': array([0.21052632, 0.70760234, 0.61306533, 0.63265306, 0.85496183]), 'eval_runtime': 6.9648, 'eval_samples_per_second': 66.047, 'eval_steps_per_second': 8.328, 'epoch': 5.0}


(RobertaForSequenceClassification(
   (roberta): RobertaModel(
     (embeddings): RobertaEmbeddings(
       (word_embeddings): Embedding(64001, 768, padding_idx=1)
       (position_embeddings): Embedding(130, 768, padding_idx=1)
       (token_type_embeddings): Embedding(1, 768)
       (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
       (dropout): Dropout(p=0.1, inplace=False)
     )
     (encoder): RobertaEncoder(
       (layer): ModuleList(
         (0): RobertaLayer(
           (attention): RobertaAttention(
             (self): RobertaSelfAttention(
               (query): Linear(in_features=768, out_features=768, bias=True)
               (key): Linear(in_features=768, out_features=768, bias=True)
               (value): Linear(in_features=768, out_features=768, bias=True)
               (dropout): Dropout(p=0.1, inplace=False)
             )
             (output): RobertaSelfOutput(
               (dense): Linear(in_features=768, out_features=768, bias=True)


### Data augmentation approach

In [15]:
#perform cross-validation with data-augmentation and see if it performs any better

#the function extends the dataset given a certain label and a limit

def enlarge_df(df, label, limit : float):
  df_new = df
  limit = int(len(df[df['label'] == label]['Text']) * limit)

  to_add = []

  for string in df[df['label'] == label]['Text']:
    k = eda_4(sentence = string, alpha_sr = 0.2, alpha_ri = 0.0, alpha_rs=0.2, num_aug = 10) #generate 10 instances
    for j in k:
      if len(to_add) == limit:
        print(len(to_add))
        return df_new
      df_new = df_new.append({'Text': j , 'label' : label}, ignore_index = True)
      to_add.append(j)


  return df_new     


df_train_multi_five = df_en_train
df_test_multi_five = df_en_test



In [16]:
#rename multi-class dataset columns for consistency with enlarge_df

df_train_multi_five = df_train_multi_five[['text','misogyny_category']].rename(columns={"text": "Text", "misogyny_category": "label"})
df_test_multi_five = df_test_multi_five[['text','misogyny_category']].rename(columns={"text": "Text", "misogyny_category": "label"})

for key, value in label_dict.items():
  df_train_multi_five = df_train_multi_five.replace(key, value)
  df_test_multi_five = df_test_multi_five.replace(key, value)
  

In [17]:
#extend every label excpet discredit, the majority class

for key, value in label_dict.items():
    if key == 'discredit':
        continue   
    new = enlarge_df(df_train_multi_five, value, 0.5)
    df_train_multi_five = new
    
df_train_multi_five['label'].value_counts()

45
72
167
88


1    982
3    501
4    264
2    217
0    135
Name: label, dtype: int64

In [None]:
#perform again a model selection

results = []

for i in range(len(updated_grid)):

  print('Testing with configuration: \n')
  print(updated_grid[i], end = '\n\n')

  #tuple_result = (avg_loss, std_loss, avg_accuracy, std_accuracy,)
  tuple_result = transformer_crossval("vinai/bertweet-base" , updated_grid[i], df_train_multi_five, bertweet_tokenizer,k=5, problem = 'multi')
  result = (updated_grid[i], tuple_result)
  results.append(result)
  print(tuple_result)


Testing with configuration: 

{'epoch': 5, 'learning_rate': 1e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}

Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_mask', '__index_level_0__'],
    num_rows: 1679
})
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_mask', '__index_level_0__'],
    num_rows: 420
})


Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.decoder.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: 

Step,Training Loss
20,1.5785
40,1.5826
60,1.5561
80,1.5088
100,1.49
120,1.4519
140,1.4311
160,1.3666
180,1.3597
200,1.3765


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.68421053 0.70850202 0.76190476 0.8        0.66037736]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.48148148 0.89285714 0.36363636 0.64       0.66037736]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.56521739 0.79006772 0.49230769 0.71111111 0.66037736]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.68      0.48      0.57        27
           1       0.71      0.89      0.79       196
           2       0.76      0.36      0.49        44
           3       0.80      0.64      0.71       100
           4       0.66      0.66      0.66        53

    accuracy                           0.72       420
   macro avg       0.72      0.61      0.64       420
weighted avg       0.73      0.72      0.71       420

{'eval_loss': 0.8860934376716614, 'eval_accuracy': 0.7214285714285714, 'eval_precision': array([0.68421053, 0.70850202, 0.76190476, 0.8       , 0.66037736]), 'eval_recall': array([0.48148148, 0.89285714, 0.36363636, 0.64      , 0.66037736]), 'eval_f1': array([0.56521739, 0.79006772, 0.49230769, 0.71111111, 0.66037736]), 'eval_runtime': 6.0671, 'eval_samples_per_second': 69.226, 'eval_steps_per_second': 8.736, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6217
40,1.6019
60,1.5781
80,1.5448
100,1.4851
120,1.4821
140,1.458
160,1.3938
180,1.3329
200,1.3373


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.71428571 0.71764706 0.77272727 0.82051282 0.68181818]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.55555556 0.93367347 0.38636364 0.64       0.56603774]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.625      0.81152993 0.51515152 0.71910112 0.6185567 ]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.71      0.56      0.63        27
           1       0.72      0.93      0.81       196
           2       0.77      0.39      0.52        44
           3       0.82      0.64      0.72       100
           4       0.68      0.57      0.62        53

    accuracy                           0.74       420
   macro avg       0.74      0.62      0.66       420
weighted avg       0.74      0.74      0.72       420

{'eval_loss': 0.850989043712616, 'eval_accuracy': 0.7357142857142858, 'eval_precision': array([0.71428571, 0.71764706, 0.77272727, 0.82051282, 0.68181818]), 'eval_recall': array([0.55555556, 0.93367347, 0.38636364, 0.64      , 0.56603774]), 'eval_f1': array([0.625     , 0.81152993, 0.51515152, 0.71910112, 0.6185567 ]), 'eval_runtime': 6.0358, 'eval_samples_per_second': 69.584, 'eval_steps_per_second': 8.781, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_m

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6095
40,1.5943
60,1.5813
80,1.5407
100,1.5321
120,1.4816
140,1.4606
160,1.4118
180,1.3476
200,1.3308


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.71428571 0.716      0.76       0.77894737 0.77777778]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.37037037 0.91326531 0.44186047 0.73267327 0.52830189]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.48780488 0.80269058 0.55882353 0.75510204 0.62921348]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.71      0.37      0.49        27
           1       0.72      0.91      0.80       196
           2       0.76      0.44      0.56        43
           3       0.78      0.73      0.76       101
           4       0.78      0.53      0.63        53

    accuracy                           0.74       420
   macro avg       0.75      0.60      0.65       420
weighted avg       0.74      0.74      0.72       420

{'eval_loss': 0.7860565185546875, 'eval_accuracy': 0.7380952380952381, 'eval_precision': array([0.71428571, 0.716     , 0.76      , 0.77894737, 0.77777778]), 'eval_recall': array([0.37037037, 0.91326531, 0.44186047, 0.73267327, 0.52830189]), 'eval_f1': array([0.48780488, 0.80269058, 0.55882353, 0.75510204, 0.62921348]), 'eval_runtime': 6.0642, 'eval_samples_per_second': 69.259, 'eval_steps_per_second': 8.74, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_m

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6198
40,1.6071
60,1.5774
80,1.5237
100,1.5132
120,1.4833
140,1.478
160,1.3849
180,1.3587
200,1.3243


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.77777778 0.70850202 0.76       0.7804878  0.54166667]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.51851852 0.88832487 0.44186047 0.64       0.49056604]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.62222222 0.78828829 0.55882353 0.7032967  0.51485149]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.78      0.52      0.62        27
           1       0.71      0.89      0.79       197
           2       0.76      0.44      0.56        43
           3       0.78      0.64      0.70       100
           4       0.54      0.49      0.51        53

    accuracy                           0.71       420
   macro avg       0.71      0.60      0.64       420
weighted avg       0.71      0.71      0.70       420

{'eval_loss': 0.8993726968765259, 'eval_accuracy': 0.7095238095238096, 'eval_precision': array([0.77777778, 0.70850202, 0.76      , 0.7804878 , 0.54166667]), 'eval_recall': array([0.51851852, 0.88832487, 0.44186047, 0.64      , 0.49056604]), 'eval_f1': array([0.62222222, 0.78828829, 0.55882353, 0.7032967 , 0.51485149]), 'eval_runtime': 6.0423, 'eval_samples_per_second': 69.51, 'eval_steps_per_second': 8.772, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_m

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.604
40,1.6087
60,1.5893
80,1.5507
100,1.4944
120,1.4419
140,1.4725
160,1.3878
180,1.3778
200,1.3089


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 419
  Batch size = 8


Trainer is attempting to log a value of "[0.9375     0.7639485  0.83333333 0.7706422  0.67741935]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.55555556 0.9035533  0.58139535 0.84       0.40384615]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.69767442 0.82790698 0.68493151 0.80382775 0.5060241 ]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


              precision    recall  f1-score   support

           0       0.94      0.56      0.70        27
           1       0.76      0.90      0.83       197
           2       0.83      0.58      0.68        43
           3       0.77      0.84      0.80       100
           4       0.68      0.40      0.51        52

    accuracy                           0.77       419
   macro avg       0.80      0.66      0.70       419
weighted avg       0.77      0.77      0.76       419

{'eval_loss': 0.7861971855163574, 'eval_accuracy': 0.7708830548926014, 'eval_precision': array([0.9375    , 0.7639485 , 0.83333333, 0.7706422 , 0.67741935]), 'eval_recall': array([0.55555556, 0.9035533 , 0.58139535, 0.84      , 0.40384615]), 'eval_f1': array([0.69767442, 0.82790698, 0.68493151, 0.80382775, 0.5060241 ]), 'eval_runtime': 6.0435, 'eval_samples_per_second': 69.33, 'eval_steps_per_second': 8.77, 'epoch': 5.0}
{'avg_loss': 0.8417417764663696, 'std_loss': 0.048083166124973466, 'avg_accuracy': 0.7

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_mask', '__index_level_0__'],
    num_rows: 1679
})
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_mask', '__index_level_0__'],
    num_rows: 420
})


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6136
40,1.6077
60,1.5792
80,1.5348
100,1.5049
120,1.4716
140,1.4481
160,1.3762
180,1.361
200,1.3719


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.66666667 0.6848249  0.73684211 0.84285714 0.56603774]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.51851852 0.89795918 0.31818182 0.59       0.56603774]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.58333333 0.77704194 0.44444444 0.69411765 0.56603774]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.67      0.52      0.58        27
           1       0.68      0.90      0.78       196
           2       0.74      0.32      0.44        44
           3       0.84      0.59      0.69       100
           4       0.57      0.57      0.57        53

    accuracy                           0.70       420
   macro avg       0.70      0.58      0.61       420
weighted avg       0.71      0.70      0.68       420

{'eval_loss': 0.896431028842926, 'eval_accuracy': 0.6976190476190476, 'eval_precision': array([0.66666667, 0.6848249 , 0.73684211, 0.84285714, 0.56603774]), 'eval_recall': array([0.51851852, 0.89795918, 0.31818182, 0.59      , 0.56603774]), 'eval_f1': array([0.58333333, 0.77704194, 0.44444444, 0.69411765, 0.56603774]), 'eval_runtime': 6.0364, 'eval_samples_per_second': 69.578, 'eval_steps_per_second': 8.78, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_ma

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6217
40,1.6019
60,1.5781
80,1.5448
100,1.4851
120,1.4821
140,1.458
160,1.3938
180,1.3329
200,1.3373


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.71428571 0.71484375 0.77272727 0.83116883 0.68181818]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.55555556 0.93367347 0.38636364 0.64       0.56603774]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.625      0.80973451 0.51515152 0.72316384 0.6185567 ]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.71      0.56      0.63        27
           1       0.71      0.93      0.81       196
           2       0.77      0.39      0.52        44
           3       0.83      0.64      0.72       100
           4       0.68      0.57      0.62        53

    accuracy                           0.74       420
   macro avg       0.74      0.62      0.66       420
weighted avg       0.74      0.74      0.72       420

{'eval_loss': 0.8511837124824524, 'eval_accuracy': 0.7357142857142858, 'eval_precision': array([0.71428571, 0.71484375, 0.77272727, 0.83116883, 0.68181818]), 'eval_recall': array([0.55555556, 0.93367347, 0.38636364, 0.64      , 0.56603774]), 'eval_f1': array([0.625     , 0.80973451, 0.51515152, 0.72316384, 0.6185567 ]), 'eval_runtime': 6.0782, 'eval_samples_per_second': 69.099, 'eval_steps_per_second': 8.72, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_m

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6095
40,1.5943
60,1.5813
80,1.5407
100,1.5321
120,1.4816
140,1.4606
160,1.4118
180,1.3476
200,1.3308


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.75       0.72177419 0.76       0.78350515 0.76315789]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.33333333 0.91326531 0.44186047 0.75247525 0.54716981]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.46153846 0.80630631 0.55882353 0.76767677 0.63736264]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.75      0.33      0.46        27
           1       0.72      0.91      0.81       196
           2       0.76      0.44      0.56        43
           3       0.78      0.75      0.77       101
           4       0.76      0.55      0.64        53

    accuracy                           0.74       420
   macro avg       0.76      0.60      0.65       420
weighted avg       0.75      0.74      0.73       420

{'eval_loss': 0.7883722186088562, 'eval_accuracy': 0.7428571428571429, 'eval_precision': array([0.75      , 0.72177419, 0.76      , 0.78350515, 0.76315789]), 'eval_recall': array([0.33333333, 0.91326531, 0.44186047, 0.75247525, 0.54716981]), 'eval_f1': array([0.46153846, 0.80630631, 0.55882353, 0.76767677, 0.63736264]), 'eval_runtime': 6.0704, 'eval_samples_per_second': 69.188, 'eval_steps_per_second': 8.731, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6198
40,1.6071
60,1.5774
80,1.5237
100,1.5132
120,1.4833
140,1.478
160,1.3849
180,1.3587
200,1.3243


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.77777778 0.70850202 0.76       0.7804878  0.54166667]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.51851852 0.88832487 0.44186047 0.64       0.49056604]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.62222222 0.78828829 0.55882353 0.7032967  0.51485149]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.78      0.52      0.62        27
           1       0.71      0.89      0.79       197
           2       0.76      0.44      0.56        43
           3       0.78      0.64      0.70       100
           4       0.54      0.49      0.51        53

    accuracy                           0.71       420
   macro avg       0.71      0.60      0.64       420
weighted avg       0.71      0.71      0.70       420

{'eval_loss': 0.8994224071502686, 'eval_accuracy': 0.7095238095238096, 'eval_precision': array([0.77777778, 0.70850202, 0.76      , 0.7804878 , 0.54166667]), 'eval_recall': array([0.51851852, 0.88832487, 0.44186047, 0.64      , 0.49056604]), 'eval_f1': array([0.62222222, 0.78828829, 0.55882353, 0.7032967 , 0.51485149]), 'eval_runtime': 6.0609, 'eval_samples_per_second': 69.297, 'eval_steps_per_second': 8.745, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.604
40,1.6087
60,1.5893
80,1.5507
100,1.4944
120,1.4419
140,1.4725
160,1.3878
180,1.3778
200,1.3089


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 419
  Batch size = 8


Trainer is attempting to log a value of "[0.9375     0.76495726 0.83333333 0.77777778 0.67741935]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.55555556 0.90862944 0.58139535 0.84       0.40384615]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.69767442 0.83062645 0.68493151 0.80769231 0.5060241 ]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


              precision    recall  f1-score   support

           0       0.94      0.56      0.70        27
           1       0.76      0.91      0.83       197
           2       0.83      0.58      0.68        43
           3       0.78      0.84      0.81       100
           4       0.68      0.40      0.51        52

    accuracy                           0.77       419
   macro avg       0.80      0.66      0.71       419
weighted avg       0.78      0.77      0.76       419

{'eval_loss': 0.7826442122459412, 'eval_accuracy': 0.7732696897374701, 'eval_precision': array([0.9375    , 0.76495726, 0.83333333, 0.77777778, 0.67741935]), 'eval_recall': array([0.55555556, 0.90862944, 0.58139535, 0.84      , 0.40384615]), 'eval_f1': array([0.69767442, 0.83062645, 0.68493151, 0.80769231, 0.5060241 ]), 'eval_runtime': 6.0608, 'eval_samples_per_second': 69.133, 'eval_steps_per_second': 8.745, 'epoch': 5.0}
{'avg_loss': 0.8436107158660888, 'std_loss': 0.05045886793741185, 'avg_accuracy': 0.

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_mask', '__index_level_0__'],
    num_rows: 1679
})
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_mask', '__index_level_0__'],
    num_rows: 420
})


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6093
40,1.5867
60,1.5191
80,1.4086
100,1.3888
120,1.3662
140,1.3404
160,1.2406
180,1.198
200,1.1571


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.6        0.75229358 0.7037037  0.82716049 0.5625    ]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.66666667 0.83673469 0.43181818 0.67       0.67924528]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.63157895 0.79227053 0.53521127 0.74033149 0.61538462]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.60      0.67      0.63        27
           1       0.75      0.84      0.79       196
           2       0.70      0.43      0.54        44
           3       0.83      0.67      0.74       100
           4       0.56      0.68      0.62        53

    accuracy                           0.72       420
   macro avg       0.69      0.66      0.66       420
weighted avg       0.73      0.72      0.72       420

{'eval_loss': 0.8871079087257385, 'eval_accuracy': 0.7238095238095238, 'eval_precision': array([0.6       , 0.75229358, 0.7037037 , 0.82716049, 0.5625    ]), 'eval_recall': array([0.66666667, 0.83673469, 0.43181818, 0.67      , 0.67924528]), 'eval_f1': array([0.63157895, 0.79227053, 0.53521127, 0.74033149, 0.61538462]), 'eval_runtime': 6.0899, 'eval_samples_per_second': 68.967, 'eval_steps_per_second': 8.703, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6181
40,1.5795
60,1.5243
80,1.4317
100,1.3415
120,1.3806
140,1.3655
160,1.2665
180,1.1628
200,1.1126


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.7037037  0.75210084 0.75       0.79310345 0.77777778]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.7037037  0.91326531 0.54545455 0.69       0.52830189]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.7037037  0.82488479 0.63157895 0.73796791 0.62921348]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.70      0.70      0.70        27
           1       0.75      0.91      0.82       196
           2       0.75      0.55      0.63        44
           3       0.79      0.69      0.74       100
           4       0.78      0.53      0.63        53

    accuracy                           0.76       420
   macro avg       0.76      0.68      0.71       420
weighted avg       0.76      0.76      0.75       420

{'eval_loss': 0.8085600137710571, 'eval_accuracy': 0.7595238095238095, 'eval_precision': array([0.7037037 , 0.75210084, 0.75      , 0.79310345, 0.77777778]), 'eval_recall': array([0.7037037 , 0.91326531, 0.54545455, 0.69      , 0.52830189]), 'eval_f1': array([0.7037037 , 0.82488479, 0.63157895, 0.73796791, 0.62921348]), 'eval_runtime': 6.0788, 'eval_samples_per_second': 69.093, 'eval_steps_per_second': 8.719, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.606
40,1.5674
60,1.5192
80,1.4239
100,1.4347
120,1.3746
140,1.3757
160,1.2889
180,1.1857
200,1.1088


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.62857143 0.82198953 0.64864865 0.7755102  0.6779661 ]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.81481481 0.80102041 0.55813953 0.75247525 0.75471698]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.70967742 0.81136951 0.6        0.7638191  0.71428571]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.63      0.81      0.71        27
           1       0.82      0.80      0.81       196
           2       0.65      0.56      0.60        43
           3       0.78      0.75      0.76       101
           4       0.68      0.75      0.71        53

    accuracy                           0.76       420
   macro avg       0.71      0.74      0.72       420
weighted avg       0.76      0.76      0.76       420

{'eval_loss': 0.7229498028755188, 'eval_accuracy': 0.7595238095238095, 'eval_precision': array([0.62857143, 0.82198953, 0.64864865, 0.7755102 , 0.6779661 ]), 'eval_recall': array([0.81481481, 0.80102041, 0.55813953, 0.75247525, 0.75471698]), 'eval_f1': array([0.70967742, 0.81136951, 0.6       , 0.7638191 , 0.71428571]), 'eval_runtime': 6.0748, 'eval_samples_per_second': 69.138, 'eval_steps_per_second': 8.725, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6159
40,1.5823
60,1.5189
80,1.3908
100,1.4097
120,1.3829
140,1.3912
160,1.2392
180,1.172
200,1.0653


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.75       0.71428571 0.6875     0.77011494 0.64102564]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.66666667 0.86294416 0.51162791 0.67       0.47169811]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.70588235 0.7816092  0.58666667 0.71657754 0.54347826]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.75      0.67      0.71        27
           1       0.71      0.86      0.78       197
           2       0.69      0.51      0.59        43
           3       0.77      0.67      0.72       100
           4       0.64      0.47      0.54        53

    accuracy                           0.72       420
   macro avg       0.71      0.64      0.67       420
weighted avg       0.72      0.72      0.71       420

{'eval_loss': 0.9416081309318542, 'eval_accuracy': 0.719047619047619, 'eval_precision': array([0.75      , 0.71428571, 0.6875    , 0.77011494, 0.64102564]), 'eval_recall': array([0.66666667, 0.86294416, 0.51162791, 0.67      , 0.47169811]), 'eval_f1': array([0.70588235, 0.7816092 , 0.58666667, 0.71657754, 0.54347826]), 'eval_runtime': 6.0771, 'eval_samples_per_second': 69.112, 'eval_steps_per_second': 8.721, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_m

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6
40,1.5802
60,1.5363
80,1.4527
100,1.3731
120,1.3103
140,1.3959
160,1.2536
180,1.2113
200,1.0597


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 419
  Batch size = 8


Trainer is attempting to log a value of "[0.82608696 0.76744186 0.86666667 0.73275862 0.65714286]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.7037037  0.83756345 0.60465116 0.85       0.44230769]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.76       0.80097087 0.71232877 0.78703704 0.52873563]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


              precision    recall  f1-score   support

           0       0.83      0.70      0.76        27
           1       0.77      0.84      0.80       197
           2       0.87      0.60      0.71        43
           3       0.73      0.85      0.79       100
           4       0.66      0.44      0.53        52

    accuracy                           0.76       419
   macro avg       0.77      0.69      0.72       419
weighted avg       0.76      0.76      0.75       419

{'eval_loss': 0.8275337815284729, 'eval_accuracy': 0.7589498806682577, 'eval_precision': array([0.82608696, 0.76744186, 0.86666667, 0.73275862, 0.65714286]), 'eval_recall': array([0.7037037 , 0.83756345, 0.60465116, 0.85      , 0.44230769]), 'eval_f1': array([0.76      , 0.80097087, 0.71232877, 0.78703704, 0.52873563]), 'eval_runtime': 6.0598, 'eval_samples_per_second': 69.144, 'eval_steps_per_second': 8.746, 'epoch': 5.0}
{'avg_loss': 0.8375519275665283, 'std_loss': 0.07397030488880738, 'avg_accuracy': 0.

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_mask', '__index_level_0__'],
    num_rows: 1679
})
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_mask', '__index_level_0__'],
    num_rows: 420
})


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6093
40,1.5867
60,1.5191
80,1.4086
100,1.3888
120,1.3662
140,1.3404
160,1.2406
180,1.198
200,1.1572


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.6        0.75117371 0.63333333 0.84810127 0.54411765]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.66666667 0.81632653 0.43181818 0.67       0.69811321]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.63157895 0.78239609 0.51351351 0.74860335 0.61157025]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.60      0.67      0.63        27
           1       0.75      0.82      0.78       196
           2       0.63      0.43      0.51        44
           3       0.85      0.67      0.75       100
           4       0.54      0.70      0.61        53

    accuracy                           0.72       420
   macro avg       0.68      0.66      0.66       420
weighted avg       0.73      0.72      0.71       420

{'eval_loss': 0.8943487405776978, 'eval_accuracy': 0.7166666666666667, 'eval_precision': array([0.6       , 0.75117371, 0.63333333, 0.84810127, 0.54411765]), 'eval_recall': array([0.66666667, 0.81632653, 0.43181818, 0.67      , 0.69811321]), 'eval_f1': array([0.63157895, 0.78239609, 0.51351351, 0.74860335, 0.61157025]), 'eval_runtime': 6.0868, 'eval_samples_per_second': 69.002, 'eval_steps_per_second': 8.707, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6181
40,1.5795
60,1.5243
80,1.4317
100,1.3415
120,1.3806
140,1.3654
160,1.2666
180,1.1627
200,1.1124


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.7037037  0.75847458 0.77419355 0.80232558 0.75      ]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.7037037  0.91326531 0.54545455 0.69       0.56603774]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.7037037  0.8287037  0.64       0.74193548 0.64516129]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.70      0.70      0.70        27
           1       0.76      0.91      0.83       196
           2       0.77      0.55      0.64        44
           3       0.80      0.69      0.74       100
           4       0.75      0.57      0.65        53

    accuracy                           0.76       420
   macro avg       0.76      0.68      0.71       420
weighted avg       0.77      0.76      0.76       420

{'eval_loss': 0.8057152628898621, 'eval_accuracy': 0.7642857142857142, 'eval_precision': array([0.7037037 , 0.75847458, 0.77419355, 0.80232558, 0.75      ]), 'eval_recall': array([0.7037037 , 0.91326531, 0.54545455, 0.69      , 0.56603774]), 'eval_f1': array([0.7037037 , 0.8287037 , 0.64      , 0.74193548, 0.64516129]), 'eval_runtime': 6.0581, 'eval_samples_per_second': 69.328, 'eval_steps_per_second': 8.749, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.606
40,1.5674
60,1.5192
80,1.4239
100,1.4347
120,1.3746
140,1.3757
160,1.2889
180,1.1851
200,1.1113


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.76923077 0.8125     0.70588235 0.78217822 0.74509804]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.74074074 0.8622449  0.55813953 0.78217822 0.71698113]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.75471698 0.83663366 0.62337662 0.78217822 0.73076923]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.77      0.74      0.75        27
           1       0.81      0.86      0.84       196
           2       0.71      0.56      0.62        43
           3       0.78      0.78      0.78       101
           4       0.75      0.72      0.73        53

    accuracy                           0.79       420
   macro avg       0.76      0.73      0.75       420
weighted avg       0.78      0.79      0.78       420

{'eval_loss': 0.7131219506263733, 'eval_accuracy': 0.7857142857142857, 'eval_precision': array([0.76923077, 0.8125    , 0.70588235, 0.78217822, 0.74509804]), 'eval_recall': array([0.74074074, 0.8622449 , 0.55813953, 0.78217822, 0.71698113]), 'eval_f1': array([0.75471698, 0.83663366, 0.62337662, 0.78217822, 0.73076923]), 'eval_runtime': 6.0742, 'eval_samples_per_second': 69.145, 'eval_steps_per_second': 8.725, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6159
40,1.5823
60,1.5189
80,1.3908
100,1.4097
120,1.3829
140,1.3912
160,1.2392
180,1.1719
200,1.0651


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 420
  Batch size = 8


Trainer is attempting to log a value of "[0.6875     0.74885845 0.6875     0.71134021 0.675     ]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.81481481 0.83248731 0.51162791 0.69       0.50943396]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.74576271 0.78846154 0.58666667 0.70050761 0.58064516]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavi

              precision    recall  f1-score   support

           0       0.69      0.81      0.75        27
           1       0.75      0.83      0.79       197
           2       0.69      0.51      0.59        43
           3       0.71      0.69      0.70       100
           4       0.68      0.51      0.58        53

    accuracy                           0.72       420
   macro avg       0.70      0.67      0.68       420
weighted avg       0.72      0.72      0.72       420

{'eval_loss': 0.9594967365264893, 'eval_accuracy': 0.7238095238095238, 'eval_precision': array([0.6875    , 0.74885845, 0.6875    , 0.71134021, 0.675     ]), 'eval_recall': array([0.81481481, 0.83248731, 0.51162791, 0.69      , 0.50943396]), 'eval_f1': array([0.74576271, 0.78846154, 0.58666667, 0.70050761, 0.58064516]), 'eval_runtime': 6.1224, 'eval_samples_per_second': 68.6, 'eval_steps_per_second': 8.657, 'epoch': 5.0}
Dataset({
    features: ['Text', 'label', 'input_ids', 'token_type_ids', 'attention_ma

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": 

Step,Training Loss
20,1.6
40,1.5802
60,1.5363
80,1.4527
100,1.3731
120,1.3103
140,1.3959
160,1.2536
180,1.2113
200,1.0597


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 419
  Batch size = 8


Trainer is attempting to log a value of "[0.76       0.76056338 0.85714286 0.70833333 0.63636364]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.7037037  0.82233503 0.55813953 0.85       0.40384615]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.73076923 0.7902439  0.67605634 0.77272727 0.49411765]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


              precision    recall  f1-score   support

           0       0.76      0.70      0.73        27
           1       0.76      0.82      0.79       197
           2       0.86      0.56      0.68        43
           3       0.71      0.85      0.77       100
           4       0.64      0.40      0.49        52

    accuracy                           0.74       419
   macro avg       0.74      0.67      0.69       419
weighted avg       0.74      0.74      0.73       419

{'eval_loss': 0.8315685987472534, 'eval_accuracy': 0.7422434367541766, 'eval_precision': array([0.76      , 0.76056338, 0.85714286, 0.70833333, 0.63636364]), 'eval_recall': array([0.7037037 , 0.82233503, 0.55813953, 0.85      , 0.40384615]), 'eval_f1': array([0.73076923, 0.7902439 , 0.67605634, 0.77272727, 0.49411765]), 'eval_runtime': 6.0632, 'eval_samples_per_second': 69.105, 'eval_steps_per_second': 8.741, 'epoch': 5.0}
{'avg_loss': 0.8408502578735352, 'std_loss': 0.0831555314928094, 'avg_accuracy': 0.7

In [None]:
#cross-validation results for BERTweet after EDA AUGMENTATION APPROACH


{'epoch': 5, 'learning_rate': 1e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}

{'avg_loss': 0.836749529838562, 'std_loss': 0.02075077741974913, 'avg_accuracy': 0.7359447004608295, 'std_accuracy': 0.008935815405375743, 'avg_f1': 0.6608662137094996, 'std_f1': 0.017969838953794084}

{'epoch': 5, 'learning_rate': 1e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.01}

{'avg_loss': 0.8311222553253174, 'std_loss': 0.019607455106381306, 'avg_accuracy': 0.7345622119815667, 'std_accuracy': 0.009147204258654005, 'avg_f1': 0.6586178401516201, 'std_f1': 0.01348793240955729}

{'epoch': 5, 'learning_rate': 3e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.01}


{'avg_loss': 0.8364081501960754, 'std_loss': 0.04591267743316787, 'avg_accuracy': 0.7497695852534563, 'std_accuracy': 0.017218010776285174, 'avg_f1': 0.7020609259300116, 'std_f1': 0.023128162178396007}



{'epoch': 5, 'learning_rate': 3e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}



{'avg_loss': 0.8197516322135925, 'std_loss': 0.051124447542434746, 'avg_accuracy': 0.7571428571428571, 'std_accuracy': 0.013870891191974566, 'avg_f1': 0.7080092538817573, 'std_f1': 0.0265260270643998}



In [None]:
#test result with the best parameter configuration

best_configuration = {'epoch': 5, 'learning_rate': 3e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}

transformer_fit_predict("vinai/bertweet-base", best_configuration, df_train_multi_five, df_test_multi_five, bertweet_tokenizer, problem = 'multi')



PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label

Step,Training Loss
20,1.6113
40,1.5795
60,1.5007
80,1.4577
100,1.3898
120,1.323
140,1.3548
160,1.293
180,1.2001
200,1.1245


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text, __index_level_0__. If Text, __index_level_0__ are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 460
  Batch size = 8


Trainer is attempting to log a value of "[0.36363636 0.55909091 0.75862069 0.59615385 0.93277311]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.36363636 0.87234043 0.35483871 0.70454545 0.79285714]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.36363636 0.68144044 0.48351648 0.64583333 0.85714286]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


              precision    recall  f1-score   support

           0       0.36      0.36      0.36        11
           1       0.56      0.87      0.68       141
           2       0.76      0.35      0.48       124
           3       0.60      0.70      0.65        44
           4       0.93      0.79      0.86       140

    accuracy                           0.68       460
   macro avg       0.64      0.62      0.61       460
weighted avg       0.73      0.68      0.67       460

{'eval_loss': 1.0978748798370361, 'eval_accuracy': 0.6804347826086956, 'eval_precision': array([0.36363636, 0.55909091, 0.75862069, 0.59615385, 0.93277311]), 'eval_recall': array([0.36363636, 0.87234043, 0.35483871, 0.70454545, 0.79285714]), 'eval_f1': array([0.36363636, 0.68144044, 0.48351648, 0.64583333, 0.85714286]), 'eval_runtime': 6.8626, 'eval_samples_per_second': 67.03, 'eval_steps_per_second': 8.452, 'epoch': 5.0}


(RobertaForSequenceClassification(
   (roberta): RobertaModel(
     (embeddings): RobertaEmbeddings(
       (word_embeddings): Embedding(64001, 768, padding_idx=1)
       (position_embeddings): Embedding(130, 768, padding_idx=1)
       (token_type_embeddings): Embedding(1, 768)
       (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
       (dropout): Dropout(p=0.1, inplace=False)
     )
     (encoder): RobertaEncoder(
       (layer): ModuleList(
         (0): RobertaLayer(
           (attention): RobertaAttention(
             (self): RobertaSelfAttention(
               (query): Linear(in_features=768, out_features=768, bias=True)
               (key): Linear(in_features=768, out_features=768, bias=True)
               (value): Linear(in_features=768, out_features=768, bias=True)
               (dropout): Dropout(p=0.1, inplace=False)
             )
             (output): RobertaSelfOutput(
               (dense): Linear(in_features=768, out_features=768, bias=True)


In [None]:
#testing the augmented dataset with MLP

parameters_mlp = {'hidden_layer_sizes': [(8,16,32),(16,32,64,128),(64,128,256),], 
                 'activation':['relu','logistic','tanh'], 
                 'solver': ['lbfgs','sgd','adam'],
                 'max_iter' : [9000],
                  'early_stopping' : [True]}


from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

#find lingustic features relevant for classification

def find_how_many(string_list,reference):
  count = 0
  for x in string_list:
    if x == reference:
      count += 1

  return count

def extract_adjs(string):
  tweet_adjs= []
  doc = nlp(string)
  adjs = 0
  for token in doc:
    if token.pos_ == 'ADJ':
     adjs += 1
  return adjs

def linguistic_extraction(dataset):
  linguistic_features = []
  for x in dataset:
    lenght = len(x)
    urls = find_how_many(x.split(),'HTTPURL')
    users = find_how_many(x.split(),'@USER')
    adjs =  extract_adjs(x)
    linguistic_features.append([lenght] + [urls] + [users] + [adjs])
  return linguistic_features

def perform_gridsearch(classifier, param_grid, cv, X_train, y_train, X_test, y_test, scoring = None):

  clf = GridSearchCV(estimator = classifier, param_grid = param_grid, n_jobs = -1, cv = cv, scoring = scoring)
  clf.fit(X_train, y_train)

  best_model = clf.best_estimator_

  predictions = best_model.predict(X_test)

  print('Best configuration', clf.best_params_)
  print('Best mean score for the validation', clf.best_score_)
  print('Std for the best mean score across folds',clf.cv_results_['std_test_score'][clf.best_index_])
  print('******************')
  print('Classification report:')
  print(classification_report(y_test, predictions, digits = 7))
  print('Confusion matrix:')
  cm = confusion_matrix(y_test, predictions)
  print(cm)
  print('******************')
  print(clf.cv_results_['mean_test_score'])
  return best_model

skf = StratifiedKFold(shuffle=True, random_state = 42, n_splits = 5)



Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [None]:
x_train_multi = [normalizeTweet(i) for i in df_train_multi_five['Text']]
y_train_multi = [i for i in df_train_multi_five['label']]
x_test_multi = [normalizeTweet(i) for i in df_test_multi_five['Text']]
y_test_multi = [i for i in df_test_multi_five['label']]

In [None]:
linguistic_features_train_multi = linguistic_extraction(x_train_multi)
linguistic_features_test_multi = linguistic_extraction(x_test_multi)

vect = CountVectorizer(analyzer=spacy_nlp_tokenizer, min_df=5)  
reset_counter()

X_train_tok_multi = vect.fit_transform(x_train_multi)
reset_counter()

X_test_tok_multi = vect.transform(x_test_multi)


tfidf = TfidfTransformer()  # weighting
tfidf.fit(X_train_tok_multi)
X_train_vec_multi = tfidf.transform(X_train_tok_multi)
X_test_vec_multi = tfidf.transform(X_test_tok_multi)


X_train_vec_multi_arr = X_train_vec_multi.toarray()
X_test_vec_multi_arr = X_test_vec_multi.toarray()

X_train_full_multi = []
X_test_full_multi = []



embeddings_train_multi = model.encode(x_train_multi)
embeddings_test_multi = model.encode(x_test_multi)

for i in range(len(X_train_vec_multi_arr)):
  arr1 = X_train_vec_multi_arr[i]
  arr2 = embeddings_train_multi[i]
  arr3 = linguistic_features_train_multi[i]
  X_train_full_multi.append(np.concatenate((arr1,arr2,arr3), axis =0))

for i in range(len(X_test_vec_multi_arr)):
  arr1 = X_test_vec_multi_arr[i]
  arr2 = embeddings_test_multi[i]
  arr3 = linguistic_features_test_multi[i]
  X_test_full_multi.append(np.concatenate((arr1,arr2,arr3), axis =0))



100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
100
200
300
400


In [None]:
#RESULTS WITH sparse matrix + embedding + linguistic features

perform_gridsearch(MLPClassifier(), parameters_mlp, skf, X_train_full_multi, y_train_multi, X_test_full_multi, y_test_multi, scoring = 'f1_macro')


Best configuration {'activation': 'tanh', 'early_stopping': True, 'hidden_layer_sizes': (64, 128, 256), 'max_iter': 9000, 'solver': 'adam'}
Best mean score for the validation 0.6607431437334649
Std for the best mean score across folds 0.026382555174533463
******************
Classification report:
              precision    recall  f1-score   support

           0  0.2142857 0.2727273 0.2400000        11
           1  0.4363636 0.8510638 0.5769231       141
           2  0.9333333 0.1129032 0.2014388       124
           3  0.3039216 0.7045455 0.4246575        44
           4  0.9629630 0.3714286 0.5360825       140

    accuracy                      0.4782609       460
   macro avg  0.5701734 0.4625337 0.3958204       460
weighted avg  0.7126198 0.4782609 0.4406545       460

Confusion matrix:
[[  3   4   0   4   0]
 [  4 120   0  16   1]
 [  4  63  14  42   1]
 [  0  13   0  31   0]
 [  3  75   1   9  52]]
******************
[0.46080958 0.12901667 0.34981831 0.55917537 0.12847652 0.61

MLPClassifier(activation='tanh', early_stopping=True,
              hidden_layer_sizes=(64, 128, 256), max_iter=9000)

# From 5 labels to 3 labels

In [None]:
df_train_multi

Unnamed: 0,Text,label
0,Please tell me why the bitch next to me in the...,2
1,@USER @USER Bitch shut the fuck up,2
2,"@USER Dear cunt , please shut the fuck up .",2
3,RT @USER : Pls shut the fuck up bitch,2
4,"RT @USER : "" when u gonna get your license "" S...",2
...,...,...
1722,@USER @USER @USER @USER This ugly bitch has a ...,1
1723,@USER Your lady probably is a bitch though . M...,1
1724,"Women are bitches . 1 time , this bitch in BAL...",1
1725,Any woman that likes me is a bitch . All women...,1


In [None]:
#What happens dropping the most under-represented class and merging the other two ?

df_train_multi_three = df_train_multi
df_test_multi_three = df_test_multi

df_train_multi_three = df_train_multi_three[df_train_multi_three.label != label_dict['derailing']]
df_test_multi_three = df_test_multi_three[df_test_multi_three.label != label_dict['derailing']]

df_train_multi_three.label.replace( [label_dict['dominance']],  [ label_dict['stereotype']]   ,  inplace=True)
df_test_multi_three.label.replace( [label_dict['dominance']],  [ label_dict['stereotype']]   ,  inplace=True)

df_train_multi_three = df_train_multi_three.reset_index()
df_test_multi_three = df_test_multi_three.reset_index()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)


In [None]:
set(df_train_multi_three['label'])

{1, 3, 4}

In [None]:
label_dict
#0: discredit, 1: sexual_harassment, 2: stereotype_dominance

{'derailing': 0,
 'discredit': 1,
 'dominance': 2,
 'sexual_harassment': 3,
 'stereotype': 4}

In [None]:
df_train_multi_three['label'].replace([1,3,4], [0,1,2], inplace = True)
df_test_multi_three['label'].replace([1,3,4], [0,1,2], inplace = True)

In [None]:
df_train_multi_three['label'].value_counts()

0    982
1    334
2    321
Name: label, dtype: int64

In [None]:
#extend every label excpet discredit, the majority class

for key, value in label_dict.items():
    if key == 0:
        continue   
    new = enlarge_df(df_train_multi_three, value, 0.5)
    df_train_multi_three = new
    
df_train_multi_three['label'].value_counts()

491
167
160


0    1473
1     501
2     481
Name: label, dtype: int64

In [None]:
#perform again a model selection

results = []

for i in range(len(updated_grid)):

  print('Testing with configuration: \n')
  print(updated_grid[i], end = '\n\n')

  #tuple_result = (avg_loss, std_loss, avg_accuracy, std_accuracy,)
  tuple_result = transformer_crossval("vinai/bertweet-base" , updated_grid[i], df_train_multi_three, bertweet_tokenizer,k=5, problem = 'multi')
  result = (updated_grid[i], tuple_result)
  results.append(result)
  print(tuple_result)


In [None]:
#test result with the best parameter configuration

best_configuration = {'epoch': 5, 'learning_rate': 3e-05, 'per_device_train_batch_size': 16, 'per_device_validation_batch_size': 8, 'warmup_steps': 500, 'weight_decay': 0.0001}

transformer_fit_predict("vinai/bertweet-base", best_configuration, df_train_multi_three, df_test_multi_three, bertweet_tokenizer, problem = 'multi')




PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--vinai--bertweet-base/snapshots/118ab1d567653bec16bbb081eafb6f8942f72108/config.json
Model config RobertaConfig {
  "_name_or_path": "vinai/bertweet-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1":

Step,Training Loss
20,1.0924
40,1.0671
60,1.0042
80,0.9602
100,0.9564
120,0.9011
140,0.8706
160,0.8269
180,0.6919
200,0.6774


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Text. If Text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 449
  Batch size = 8


Trainer is attempting to log a value of "[0.59798995 0.57407407 0.91326531]" of type <class 'numpy.ndarray'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.84397163 0.70454545 0.6780303 ]" of type <class 'numpy.ndarray'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.7        0.63265306 0.77826087]" of type <class 'numpy.ndarray'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


              precision    recall  f1-score   support

           0       0.60      0.84      0.70       141
           1       0.57      0.70      0.63        44
           2       0.91      0.68      0.78       264

    accuracy                           0.73       449
   macro avg       0.70      0.74      0.70       449
weighted avg       0.78      0.73      0.74       449

{'eval_loss': 0.9769389629364014, 'eval_accuracy': 0.732739420935412, 'eval_precision': array([0.59798995, 0.57407407, 0.91326531]), 'eval_recall': array([0.84397163, 0.70454545, 0.6780303 ]), 'eval_f1': array([0.7       , 0.63265306, 0.77826087]), 'eval_runtime': 6.7204, 'eval_samples_per_second': 66.811, 'eval_steps_per_second': 8.482, 'epoch': 5.0}


(RobertaForSequenceClassification(
   (roberta): RobertaModel(
     (embeddings): RobertaEmbeddings(
       (word_embeddings): Embedding(64001, 768, padding_idx=1)
       (position_embeddings): Embedding(130, 768, padding_idx=1)
       (token_type_embeddings): Embedding(1, 768)
       (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
       (dropout): Dropout(p=0.1, inplace=False)
     )
     (encoder): RobertaEncoder(
       (layer): ModuleList(
         (0): RobertaLayer(
           (attention): RobertaAttention(
             (self): RobertaSelfAttention(
               (query): Linear(in_features=768, out_features=768, bias=True)
               (key): Linear(in_features=768, out_features=768, bias=True)
               (value): Linear(in_features=768, out_features=768, bias=True)
               (dropout): Dropout(p=0.1, inplace=False)
             )
             (output): RobertaSelfOutput(
               (dense): Linear(in_features=768, out_features=768, bias=True)
