In [1]:
#Importing necessary libraries

import re
import scipy
import pandas         as pd
import io
import numpy          as np
import copy
import csv

import transformers
from transformers                     import  AutoModel, AutoTokenizer, AdamW, get_linear_schedule_with_warmup
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments

import torch

from gsitk.preprocess import pprocess_twitter, Preprocessor


from tqdm import tqdm


from sklearn.metrics                  import classification_report
from sklearn.feature_extraction.text  import TfidfVectorizer
from sklearn.model_selection import train_test_split


from torch                            import nn, optim
from torch.utils                      import data

#Seeding for deterministic results
RANDOM_SEED = 64
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

if torch.cuda.is_available():
   torch.cuda.manual_seed(RANDOM_SEED)
   torch.cuda.manual_seed_all(RANDOM_SEED) 
   torch.backends.cudnn.deterministic = True  
   torch.backends.cudnn.benchmark = False


CLASS_NAMES = ['sadness', 'joy', 'anger', 'surprise', 'disgust', 'fear', 'others']
MAX_LENGTH = 200
BATCH_SIZE = 16
EPOCHS = 5
MODEL = "cardiffnlp/twitter-xlm-roberta-base"

tokenizer = AutoTokenizer.from_pretrained(MODEL,use_fast=True)


In [2]:
#Converting labels to numbers
def label_sadness(label):
  if label   == 'sadness':
    return 1
  else:
    return 0

def label_joy(label):
  if label   == 'joy':
    return 1
  else:
    return 0

def label_anger(label):
  if label   == 'anger':
    return 1
  else:
    return 0

def label_surprise(label):
  if label   == 'surprise':
    return 1
  else:
    return 0

def label_disgust(label):
  if label   == 'disgust':
    return 1
  else:
    return 0

def label_fear(label):
  if label   == 'fear':
    return 1
  else:
    return 0

def label_others(label):
  if label   == 'others':
    return 1
  else:
    return 0

funs = [label_sadness, label_joy, label_anger, label_surprise, label_disgust, label_fear, label_others]

In [3]:
#Converting labels to numbers
def int_to_label(label):
  if label   == 0:
    return 'sadness'
  elif label == 1:
    return 'joy'
  elif label == 2:
    return 'anger'
  elif label == 3:
    return 'surprise'
  elif label == 4:
    return 'disgust'
  elif label == 5:
    return 'fear'
  elif label == 6:
    return 'others'

In [4]:
#Creates a dataset which will be used to feed to RoBERTa
class EmotionDataset(data.Dataset):

  def __init__(self, id, tweet, labelValue,  tokenizer, max_len):
    self.tweet    = tweet      #First input sequence that will be supplied to RoBERTa
    self.id = id
#     self.extra_feats   = extrafeats     #Extra features
    self.labelValue  = labelValue    #label value for each training example in the dataset
    self.tokenizer   = tokenizer     #tokenizer that will be used to tokenize input sequences (Uses BERT-tokenizer here)
    self.max_len     = max_len       #Maximum length of the tokens from the input sequence that BERT needs to attend to

  def __len__(self):
    return len(self.labelValue)

  def __getitem__(self, item):
    tweet    = str(self.tweet[item])

    
    #Encoding the first and the second sequence to a form accepted by RoBERTa
    #RoBERTa does not use token_type_ids to distinguish the first sequence from the second sequnece.
    encoding = tokenizer.encode_plus(
        tweet,
        max_length = self.max_len,
        add_special_tokens= True,
        truncation = True,
        padding = 'max_length',
        return_attention_mask = True,
        return_tensors = 'pt'
    )

    return {
        'tweet' : tweet,
        'tweet_id': self.id[item],
        'input_ids': encoding['input_ids'].flatten(),
        'attention_mask': encoding['attention_mask'].flatten(),
        'labels'  : torch.tensor(self.labelValue[item], dtype=torch.long)
#         'extra_features' : torch.tensor(self.extra_feats[item]).float()
    }


In [5]:
#Creates a data loader
def createDataLoader(dataframe, tokenizer, max_len, batch_size):
  ds = EmotionDataset(
      tweet    = dataframe.tweet.to_numpy(),
      labelValue  = dataframe.label.to_numpy(),
      tokenizer   = tokenizer,
      max_len     = max_len,
      id = dataframe.id.to_numpy()
  )

  return data.DataLoader(
      ds,
      batch_size  = batch_size,
      shuffle     = False,
      num_workers = 4
  )



In [6]:
from transformers import Trainer
class MultilabelTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.CrossEntropyLoss()
        loss = loss_fct(logits,labels)
        return (loss, outputs) if return_outputs else loss

In [7]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted',zero_division=0)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [8]:
#This function gets the predictions from the model after it is trained.
def get_predictions(model, data_loader):

  model = model.eval()

  predictions = []
  prediction_probs = []
  real_values = []
  ids = []

  with torch.no_grad():
    for d in tqdm(data_loader):


      input_ids              = d["input_ids"].to(device)
      attention_mask         = d["attention_mask"].to(device)
      labels                 = d["labels"].to(device)

      #Getting the softmax output from model
      outputs = model(
        input_ids             = input_ids,
        attention_mask        = attention_mask
      )

      _, preds = torch.max(outputs.logits, dim=1)     #Determining the model predictions


      predictions.extend(preds)
      prediction_probs.extend(outputs.logits)
      real_values.extend(labels)
      ids.extend(d['tweet_id'])

  predictions = torch.stack(predictions).cpu()
  prediction_probs = torch.stack(prediction_probs).cpu()
  real_values = torch.stack(real_values).cpu()

  return ids, predictions, prediction_probs, real_values


In [9]:
df_dev = pd.read_csv('data/EmoEvalEs/dev.tsv', sep='\t', quoting=csv.QUOTE_NONE, usecols=['id'])
df_test = pd.read_csv('data/EmoEvalEs/emoevales_test.tsv', sep='\t', quoting=csv.QUOTE_NONE, usecols=['id'])

In [10]:
for emotion, f in zip(CLASS_NAMES, funs):
    
    train = pd.read_csv('data/EmoEvalEs/train.tsv', sep='\t', quoting=csv.QUOTE_NONE)
    dev = pd.read_csv('data/EmoEvalEs/dev.tsv', sep='\t', quoting=csv.QUOTE_NONE)
    test = pd.read_csv('data/EmoEvalEs/emoevales_test.tsv', sep='\t', quoting=csv.QUOTE_NONE)


    train['label'] = train.emotion.apply(f)
    dev['label'] = dev.emotion.apply(f)
    test['label'] = 1
    train.drop(columns=['emotion','event','offensive'], inplace=True)
    dev.drop(columns=['emotion','event','offensive'], inplace=True)
    test.drop(columns=['event','offensive'], inplace=True)


    train['tweet'] = Preprocessor(pprocess_twitter).transform(train.tweet)
    dev['tweet'] = Preprocessor(pprocess_twitter).transform(dev.tweet)
    test['tweet'] = Preprocessor(pprocess_twitter).transform(test.tweet)

    print(train.head())

    #Creating data loader for test data
    devDataLoader         = createDataLoader(dev, tokenizer, MAX_LENGTH, BATCH_SIZE)

    #Creating data loader for test data
    testDataLoader         = createDataLoader(test, tokenizer, MAX_LENGTH, BATCH_SIZE)

    #Creating data loader for training data
    trainDataset        = EmotionDataset(train.id.to_numpy(), train.tweet.to_numpy(), train.label.to_numpy(), tokenizer, MAX_LENGTH)

    #Creating data loader for development data
    developmentDataset  = EmotionDataset(dev.id.to_numpy(),dev.tweet.to_numpy(), dev.label.to_numpy(), tokenizer, MAX_LENGTH)

    #Creating data loader for test data
    testDataset         = EmotionDataset(test.id.to_numpy(),test.tweet.to_numpy(), test.label.to_numpy(), tokenizer, MAX_LENGTH)

    training_args = TrainingArguments(
        output_dir='./resultsEmotion-binary',                   # output directory
        num_train_epochs=EPOCHS,                  # total number of training epochs
        per_device_train_batch_size=BATCH_SIZE,   # batch size per device during training
        per_device_eval_batch_size=BATCH_SIZE,    # batch size for evaluation
        warmup_steps=100,                         # number of warmup steps for learning rate scheduler
        weight_decay=0.01,                        # strength of weight decay
        logging_dir='./logsEmotion-binary',                     # directory for storing logs
        logging_steps=10,                         # when to print log
        load_best_model_at_end=True,
        evaluation_strategy = 'epoch'
    )

    num_labels = len(set(train.label.tolist()))
    print(f'Num labels: {num_labels}')

    model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=num_labels)

    trainer = MultilabelTrainer(
        model=model,                              # the instantiated ðŸ¤— Transformers model to be trained
        args=training_args,                       # training arguments, defined above
        train_dataset=trainDataset,              # training dataset
        eval_dataset=developmentDataset,                 # evaluation dataset
        compute_metrics=compute_metrics 
    )

    trainer.train()

    trainer.save_model(f"./resultsEmotion-preproc/best_model_{emotion}") # save best model

    print(trainer.evaluate())

    m = AutoModelForSequenceClassification.from_pretrained(f"./resultsEmotion-preproc/best_model_{emotion}", num_labels=num_labels)

    device = torch.device("cuda")
    m = m.to(device)
    
    #Getting model predictions on dev dataset
    ids_dev, yHat_dev, predProbs_dev, yTest_dev = get_predictions(
      m,
      devDataLoader
    )
    df_dev[emotion] = yHat_dev
    df_dev[f'probs_{emotion}'] = predProbs_dev

      #Printing classification report for dev dataset (Evaluating the model on Dev set)
    print(classification_report(yTest_dev, yHat_dev, target_names= ['other', emotion]))

    #Getting model predictions on dev dataset
    ids_test, yHat_test, predProbs_test, yTest_test = get_predictions(
      m,
      testDataLoader
    )
    df_test[emotion] = yHat_test
    df_test[f'probs_{emotion}'] = predProbs_test
    del m

                                     id  \
0  a0c1a858-a9b8-4cb1-8a81-1602736ff5b8   
1  9b272817-a231-4f68-bdf4-3350d4919330   
2  4bd5b1e5-4b74-440a-82f4-c2567a241011   
3  0bb9d7c9-d781-4684-890e-a94bfb50acc0   
4  88749098-d539-4500-9209-0bbfae2b109c   

                                               tweet  label  
0  la gran guerra de <hastag> juegodetronos nos h...      0  
1  el golpe de estado en <hastag> venezuela estÃ¡ ...      0  
2  no tengo una foto en la catedral de <hastag> n...      1  
3  <hastag> notredame nunca llegue a visitar tan ...      1  
4  a tomar por culo mi crush ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­<hastag> juegod...      1  
Num labels: 2


Some weights of the model checkpoint at cardiffnlp/twitter-xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base and are newly initialized: ['classifier.den

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1,0.1847,0.208436,0.939573,0.940414,0.941491,0.939573,5.5178,152.96
2,0.1036,0.167887,0.944313,0.943962,0.943658,0.944313,5.2179,161.75
3,0.1729,0.201617,0.946682,0.942237,0.945351,0.946682,5.4391,155.172
4,0.0736,0.224297,0.946682,0.94434,0.944189,0.946682,5.2571,160.546
5,0.0244,0.26181,0.940758,0.939183,0.938408,0.940758,5.6,150.714


{'eval_loss': 0.16788703203201294, 'eval_accuracy': 0.9443127962085308, 'eval_f1': 0.9439616544238548, 'eval_precision': 0.9436581478435776, 'eval_recall': 0.9443127962085308, 'eval_runtime': 5.1915, 'eval_samples_per_second': 162.573, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 634880, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 331776, 'eval_mem_gpu_peaked_delta': 131887616}


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 53/53 [00:04<00:00, 10.99it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       other       0.97      0.97      0.97       740
     sadness       0.78      0.76      0.77       104

    accuracy                           0.94       844
   macro avg       0.87      0.86      0.87       844
weighted avg       0.94      0.94      0.94       844



100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 104/104 [00:08<00:00, 12.81it/s]


                                     id  \
0  a0c1a858-a9b8-4cb1-8a81-1602736ff5b8   
1  9b272817-a231-4f68-bdf4-3350d4919330   
2  4bd5b1e5-4b74-440a-82f4-c2567a241011   
3  0bb9d7c9-d781-4684-890e-a94bfb50acc0   
4  88749098-d539-4500-9209-0bbfae2b109c   

                                               tweet  label  
0  la gran guerra de <hastag> juegodetronos nos h...      0  
1  el golpe de estado en <hastag> venezuela estÃ¡ ...      0  
2  no tengo una foto en la catedral de <hastag> n...      0  
3  <hastag> notredame nunca llegue a visitar tan ...      0  
4  a tomar por culo mi crush ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­<hastag> juegod...      0  
Num labels: 2


Some weights of the model checkpoint at cardiffnlp/twitter-xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base and are newly initialized: ['classifier.den

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1,0.3843,0.335598,0.847156,0.849857,0.853563,0.847156,5.3417,158.002
2,0.3102,0.316071,0.869668,0.867131,0.865654,0.869668,5.1224,164.767
3,0.1439,0.390573,0.868483,0.870112,0.872245,0.868483,5.126,164.652
4,0.1001,0.515756,0.853081,0.856534,0.861872,0.853081,5.2863,159.658
5,0.0149,0.614253,0.862559,0.863884,0.865506,0.862559,4.9767,169.591


{'eval_loss': 0.3160707950592041, 'eval_accuracy': 0.8696682464454977, 'eval_f1': 0.8671311613342572, 'eval_precision': 0.8656538807520381, 'eval_recall': 0.8696682464454977, 'eval_runtime': 5.0971, 'eval_samples_per_second': 165.584, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 24576, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 69632, 'eval_mem_gpu_peaked_delta': 130413056}


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 53/53 [00:04<00:00, 10.71it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       other       0.91      0.93      0.92       663
         joy       0.72      0.65      0.68       181

    accuracy                           0.87       844
   macro avg       0.81      0.79      0.80       844
weighted avg       0.87      0.87      0.87       844



100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 104/104 [00:08<00:00, 12.88it/s]


                                     id  \
0  a0c1a858-a9b8-4cb1-8a81-1602736ff5b8   
1  9b272817-a231-4f68-bdf4-3350d4919330   
2  4bd5b1e5-4b74-440a-82f4-c2567a241011   
3  0bb9d7c9-d781-4684-890e-a94bfb50acc0   
4  88749098-d539-4500-9209-0bbfae2b109c   

                                               tweet  label  
0  la gran guerra de <hastag> juegodetronos nos h...      0  
1  el golpe de estado en <hastag> venezuela estÃ¡ ...      0  
2  no tengo una foto en la catedral de <hastag> n...      0  
3  <hastag> notredame nunca llegue a visitar tan ...      0  
4  a tomar por culo mi crush ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­<hastag> juegod...      0  
Num labels: 2


Some weights of the model checkpoint at cardiffnlp/twitter-xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base and are newly initialized: ['classifier.den

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1,0.277,0.281424,0.925355,0.906914,0.925191,0.925355,5.1474,163.967
2,0.1949,0.201883,0.924171,0.911999,0.914961,0.924171,5.144,164.073
3,0.2182,0.177253,0.936019,0.933029,0.931655,0.936019,5.136,164.329
4,0.0735,0.297928,0.92891,0.925117,0.923296,0.92891,5.393,156.5
5,0.0299,0.330978,0.925355,0.924759,0.924209,0.925355,5.2534,160.658


{'eval_loss': 0.17725278437137604, 'eval_accuracy': 0.9360189573459715, 'eval_f1': 0.9330289358805706, 'eval_precision': 0.931654956805346, 'eval_recall': 0.9360189573459715, 'eval_runtime': 5.3882, 'eval_samples_per_second': 156.637, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 143360, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 40960, 'eval_mem_gpu_peaked_delta': 130413056}


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 53/53 [00:04<00:00, 10.72it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       other       0.95      0.97      0.96       759
       anger       0.72      0.59      0.65        85

    accuracy                           0.94       844
   macro avg       0.84      0.78      0.81       844
weighted avg       0.93      0.94      0.93       844



100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 104/104 [00:08<00:00, 12.78it/s]


                                     id  \
0  a0c1a858-a9b8-4cb1-8a81-1602736ff5b8   
1  9b272817-a231-4f68-bdf4-3350d4919330   
2  4bd5b1e5-4b74-440a-82f4-c2567a241011   
3  0bb9d7c9-d781-4684-890e-a94bfb50acc0   
4  88749098-d539-4500-9209-0bbfae2b109c   

                                               tweet  label  
0  la gran guerra de <hastag> juegodetronos nos h...      0  
1  el golpe de estado en <hastag> venezuela estÃ¡ ...      0  
2  no tengo una foto en la catedral de <hastag> n...      0  
3  <hastag> notredame nunca llegue a visitar tan ...      0  
4  a tomar por culo mi crush ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­<hastag> juegod...      0  
Num labels: 2


Some weights of the model checkpoint at cardiffnlp/twitter-xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base and are newly initialized: ['classifier.den

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1,0.283,0.157863,0.958531,0.938235,0.918781,0.958531,5.3166,158.747
2,0.1304,0.168182,0.958531,0.938235,0.918781,0.958531,5.2036,162.194
3,0.1435,0.16142,0.958531,0.938235,0.918781,0.958531,5.7252,147.42
4,0.0711,0.161543,0.959716,0.948132,0.947131,0.959716,5.7988,145.546
5,0.035,0.167508,0.957346,0.949076,0.945508,0.957346,5.6916,148.29


{'eval_loss': 0.15786345303058624, 'eval_accuracy': 0.9585308056872038, 'eval_f1': 0.9382352350888661, 'eval_precision': 0.91878130545136, 'eval_recall': 0.9585308056872038, 'eval_runtime': 5.7197, 'eval_samples_per_second': 147.561, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 24576, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 65536, 'eval_mem_gpu_peaked_delta': 130413056}


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 53/53 [00:04<00:00, 10.81it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  0%|          | 0/104 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       other       0.96      1.00      0.98       809
    surprise       0.00      0.00      0.00        35

    accuracy                           0.96       844
   macro avg       0.48      0.50      0.49       844
weighted avg       0.92      0.96      0.94       844



100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 104/104 [00:08<00:00, 12.82it/s]


                                     id  \
0  a0c1a858-a9b8-4cb1-8a81-1602736ff5b8   
1  9b272817-a231-4f68-bdf4-3350d4919330   
2  4bd5b1e5-4b74-440a-82f4-c2567a241011   
3  0bb9d7c9-d781-4684-890e-a94bfb50acc0   
4  88749098-d539-4500-9209-0bbfae2b109c   

                                               tweet  label  
0  la gran guerra de <hastag> juegodetronos nos h...      0  
1  el golpe de estado en <hastag> venezuela estÃ¡ ...      0  
2  no tengo una foto en la catedral de <hastag> n...      0  
3  <hastag> notredame nunca llegue a visitar tan ...      0  
4  a tomar por culo mi crush ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­<hastag> juegod...      0  
Num labels: 2


Some weights of the model checkpoint at cardiffnlp/twitter-xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base and are newly initialized: ['classifier.den

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1,0.0613,0.102294,0.981043,0.971655,0.962445,0.981043,5.6117,150.4
2,0.1086,0.0991,0.981043,0.971655,0.962445,0.981043,5.4712,154.261
3,0.0846,0.089413,0.981043,0.971655,0.962445,0.981043,5.634,149.806
4,0.012,0.138727,0.979858,0.971062,0.962423,0.979858,5.939,142.111
5,0.0212,0.146756,0.977488,0.969874,0.962378,0.977488,5.7706,146.259


{'eval_loss': 0.08941256254911423, 'eval_accuracy': 0.981042654028436, 'eval_f1': 0.9716546860473027, 'eval_precision': 0.9624446890231576, 'eval_recall': 0.981042654028436, 'eval_runtime': 5.4962, 'eval_samples_per_second': 153.561, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 94208, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 61440, 'eval_mem_gpu_peaked_delta': 130413056}


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 53/53 [00:04<00:00, 10.89it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  0%|          | 0/104 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       other       0.98      1.00      0.99       828
     disgust       0.00      0.00      0.00        16

    accuracy                           0.98       844
   macro avg       0.49      0.50      0.50       844
weighted avg       0.96      0.98      0.97       844



100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 104/104 [00:08<00:00, 12.51it/s]


                                     id  \
0  a0c1a858-a9b8-4cb1-8a81-1602736ff5b8   
1  9b272817-a231-4f68-bdf4-3350d4919330   
2  4bd5b1e5-4b74-440a-82f4-c2567a241011   
3  0bb9d7c9-d781-4684-890e-a94bfb50acc0   
4  88749098-d539-4500-9209-0bbfae2b109c   

                                               tweet  label  
0  la gran guerra de <hastag> juegodetronos nos h...      0  
1  el golpe de estado en <hastag> venezuela estÃ¡ ...      0  
2  no tengo una foto en la catedral de <hastag> n...      0  
3  <hastag> notredame nunca llegue a visitar tan ...      0  
4  a tomar por culo mi crush ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­<hastag> juegod...      0  
Num labels: 2


Some weights of the model checkpoint at cardiffnlp/twitter-xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base and are newly initialized: ['classifier.den

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1,0.0365,0.058634,0.989336,0.984033,0.978787,0.989336,4.9353,171.015
2,0.0532,0.057325,0.989336,0.984033,0.978787,0.989336,4.9894,169.16
3,0.055,0.059601,0.989336,0.984033,0.978787,0.989336,4.9869,169.245
4,0.1011,0.044655,0.989336,0.984033,0.978787,0.989336,5.1345,164.378
5,0.0418,0.032036,0.991706,0.989085,0.991775,0.991706,5.1624,163.49


{'eval_loss': 0.032036133110523224, 'eval_accuracy': 0.9917061611374408, 'eval_f1': 0.9890845221496604, 'eval_precision': 0.9917751122918801, 'eval_recall': 0.9917061611374408, 'eval_runtime': 5.0602, 'eval_samples_per_second': 166.792, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 8192, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 73728, 'eval_mem_gpu_peaked_delta': 130413056}


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 53/53 [00:04<00:00, 10.64it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       other       0.99      1.00      1.00       835
        fear       1.00      0.22      0.36         9

    accuracy                           0.99       844
   macro avg       1.00      0.61      0.68       844
weighted avg       0.99      0.99      0.99       844



100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 104/104 [00:08<00:00, 12.73it/s]


                                     id  \
0  a0c1a858-a9b8-4cb1-8a81-1602736ff5b8   
1  9b272817-a231-4f68-bdf4-3350d4919330   
2  4bd5b1e5-4b74-440a-82f4-c2567a241011   
3  0bb9d7c9-d781-4684-890e-a94bfb50acc0   
4  88749098-d539-4500-9209-0bbfae2b109c   

                                               tweet  label  
0  la gran guerra de <hastag> juegodetronos nos h...      1  
1  el golpe de estado en <hastag> venezuela estÃ¡ ...      1  
2  no tengo una foto en la catedral de <hastag> n...      0  
3  <hastag> notredame nunca llegue a visitar tan ...      0  
4  a tomar por culo mi crush ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­ðŸ˜­<hastag> juegod...      0  
Num labels: 2


Some weights of the model checkpoint at cardiffnlp/twitter-xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base and are newly initialized: ['classifier.den

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1,0.5244,0.538125,0.753555,0.749637,0.774651,0.753555,5.2145,161.856
2,0.4429,0.485608,0.768957,0.76897,0.769348,0.768957,5.0494,167.148
3,0.2565,0.578839,0.785545,0.784952,0.787405,0.785545,5.0399,167.462
4,0.175,0.700598,0.774882,0.774288,0.776536,0.774882,5.0778,166.214
5,0.0824,0.841751,0.764218,0.763734,0.765325,0.764218,5.27,160.152


{'eval_loss': 0.48560822010040283, 'eval_accuracy': 0.768957345971564, 'eval_f1': 0.7689699955882527, 'eval_precision': 0.7693475662215269, 'eval_recall': 0.768957345971564, 'eval_runtime': 5.1817, 'eval_samples_per_second': 162.882, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 106496, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 20480, 'eval_mem_gpu_peaked_delta': 130413056}


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 53/53 [00:04<00:00, 10.81it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       other       0.78      0.76      0.77       430
      others       0.76      0.78      0.77       414

    accuracy                           0.77       844
   macro avg       0.77      0.77      0.77       844
weighted avg       0.77      0.77      0.77       844



100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 104/104 [00:08<00:00, 12.87it/s]


In [12]:
df_dev.head()

Unnamed: 0,id,sadness,probs_sadness,joy,probs_joy,anger,probs_anger,surprise,probs_surprise,disgust,probs_disgust,fear,probs_fear,others,probs_others
0,d23cfa8a-dad1-45b6-90eb-b786cd21e7d3,0,"(tensor(2.1775), tensor(-2.6061))",0,"(tensor(0.8122), tensor(-0.8980))",0,"(tensor(3.3597), tensor(-3.2102))",0,"(tensor(2.0922), tensor(-1.7751))",0,"(tensor(2.0833), tensor(-1.9457))",0,"(tensor(3.2390), tensor(-3.1019))",0,"(tensor(0.3610), tensor(-0.0371))"
1,5192574e-af5e-4ccb-aa1d-b801a9395b7f,1,"(tensor(-1.2853), tensor(1.2862))",0,"(tensor(1.5347), tensor(-1.4379))",0,"(tensor(0.6478), tensor(-0.8687))",0,"(tensor(2.0227), tensor(-1.6946))",0,"(tensor(2.3554), tensor(-2.1683))",0,"(tensor(3.2265), tensor(-3.1065))",0,"(tensor(1.0557), tensor(-0.6814))"
2,86a2042d-4964-4e07-a02b-aa2953a86ced,0,"(tensor(2.3103), tensor(-2.8410))",0,"(tensor(2.1206), tensor(-1.8977))",1,"(tensor(-0.7081), tensor(0.5931))",0,"(tensor(1.9138), tensor(-1.6044))",0,"(tensor(2.0155), tensor(-1.8536))",0,"(tensor(3.2155), tensor(-3.1082))",1,"(tensor(-0.4797), tensor(0.7404))"
3,067c0c3e-459e-4b36-8223-22d8ce7f9cd9,0,"(tensor(2.3598), tensor(-2.8128))",1,"(tensor(-0.1507), tensor(-0.1341))",0,"(tensor(3.1925), tensor(-3.0603))",0,"(tensor(2.0707), tensor(-1.7401))",0,"(tensor(2.1409), tensor(-2.0117))",0,"(tensor(3.2346), tensor(-3.0895))",0,"(tensor(0.4162), tensor(0.0151))"
4,5243fe33-bcea-4300-8f2e-b79e63557673,0,"(tensor(2.2474), tensor(-2.6807))",0,"(tensor(2.0996), tensor(-1.8899))",0,"(tensor(1.9631), tensor(-2.0919))",0,"(tensor(1.9930), tensor(-1.6738))",0,"(tensor(1.8055), tensor(-1.6735))",0,"(tensor(3.2330), tensor(-3.1128))",1,"(tensor(-0.6910), tensor(0.8079))"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
839,58a876f1-b866-4555-88d9-8bd1bf237074,0,"(tensor(2.8300), tensor(-3.2035))",0,"(tensor(0.8433), tensor(-0.9076))",0,"(tensor(3.3203), tensor(-3.1399))",0,"(tensor(2.0918), tensor(-1.7284))",0,"(tensor(2.1324), tensor(-1.9383))",0,"(tensor(3.2467), tensor(-3.0958))",1,"(tensor(-1.0118), tensor(0.9327))"
840,74590933-6e77-414e-b06c-6c51eff61452,0,"(tensor(2.5368), tensor(-2.9950))",1,"(tensor(-1.0199), tensor(0.6685))",0,"(tensor(2.9915), tensor(-2.9174))",0,"(tensor(2.1637), tensor(-1.8366))",0,"(tensor(2.0667), tensor(-1.8883))",0,"(tensor(3.2433), tensor(-3.1019))",0,"(tensor(1.2977), tensor(-0.8392))"
841,6c7c0d00-7834-494c-be41-581883c6d241,1,"(tensor(-2.2629), tensor(2.3998))",0,"(tensor(2.1960), tensor(-1.9443))",0,"(tensor(2.9317), tensor(-2.8464))",0,"(tensor(1.9453), tensor(-1.6147))",0,"(tensor(2.1876), tensor(-1.9977))",0,"(tensor(3.2326), tensor(-3.0848))",0,"(tensor(1.9541), tensor(-1.5800))"
842,2f06b007-bfbf-45f7-b720-e8d19e9f2751,1,"(tensor(-1.2638), tensor(1.3494))",0,"(tensor(2.2329), tensor(-1.9811))",0,"(tensor(1.5373), tensor(-1.7167))",0,"(tensor(2.2931), tensor(-1.9370))",0,"(tensor(2.2516), tensor(-2.0682))",0,"(tensor(2.6328), tensor(-2.2371))",0,"(tensor(1.1728), tensor(-0.7890))"


In [83]:
dev_total = pd.read_pickle('preds_dev/xlmroberta.pck')
test_total = pd.read_csv('preds_test/submission-roberta-final.tsv',sep='\t', quoting=csv.QUOTE_NONE, names=['id','emotion'])
dev = pd.read_csv('data/EmoEvalEs/dev.tsv', sep='\t', quoting=csv.QUOTE_NONE)
test = pd.read_csv('data/EmoEvalEs/emoevales_test.tsv', sep='\t', quoting=csv.QUOTE_NONE)

In [84]:
test_total.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1656 entries, 0 to 1655
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   id       1656 non-null   object
 1   emotion  1656 non-null   object
dtypes: object(2)
memory usage: 26.0+ KB


In [85]:
dev_total = dev_total.merge(df_dev, on='id')
dev_total = dev_total.merge(dev[['id','emotion']], on='id')
test_total = test_total.merge(df_test, on='id')


In [86]:
probs_cols = ['probs_sadness','probs_joy','probs_anger', 'probs_surprise', 'probs_disgust', 'probs_fear', 'probs_others']

In [87]:
for col in probs_cols:
    dev_total[col] = dev_total[col].apply(convertTensors)
    test_total[col] = test_total[col].apply(convertTensors)

In [69]:
dev_total['emotion_probs'] = dev_total[probs_cols].idxmax(axis=1)
test_total['emotion_probs'] = test_total[probs_cols].idxmax(axis=1)

In [70]:
test_total.head()

Unnamed: 0,id,emotion,sadness,probs_sadness,joy,probs_joy,anger,probs_anger,surprise,probs_surprise,disgust,probs_disgust,fear,probs_fear,others,probs_others,emotion_probs
0,76ae8555-1140-4299-a06d-fe8363ac5300,others,0,0.003295,0,0.075722,0,0.001402,0,0.045164,0,0.016391,0,0.003537,1,0.862014,probs_others
1,94627594-947a-4a9f-a98f-a90b8b46cc97,sadness,1,0.992938,0,0.01094,0,0.007113,0,0.020962,0,0.011975,0,0.001772,0,0.027283,probs_sadness
2,540262fe-224f-42a0-8899-5d0131d4253d,joy,0,0.004933,1,0.870784,0,0.003013,0,0.015577,0,0.01276,0,0.003454,0,0.032905,probs_joy
3,d4576b28-1b72-448d-8514-992310515f3c,others,0,0.003918,0,0.017657,0,0.004151,0,0.026145,0,0.019293,0,0.00177,1,0.84373,probs_others
4,4a521987-1d36-4c67-90aa-198bc5980924,joy,0,0.003315,1,0.649997,0,0.003101,0,0.019518,0,0.025117,0,0.001733,0,0.249809,probs_joy


In [57]:
dev_total['emotion_probs'] = dev_total['emotion_probs'].apply(lambda x: x.split('_')[1])
dev_total

Unnamed: 0,id,emotion_x,sadness,probs_sadness,joy,probs_joy,anger,probs_anger,surprise,probs_surprise,disgust,probs_disgust,fear,probs_fear,others,probs_others,emotion_y,emotion_probs
0,b8edb708-ff27-4472-82d2-75a4858c5c80,sadness,1,0.992747,0,0.014215,0,0.001466,0,0.015671,0,0.010838,0,0.001725,0,0.025844,sadness,sadness
1,10b4fb5f-1ec6-457c-8f11-5236c2e7b984,sadness,0,0.228827,0,0.022623,0,0.007079,0,0.020036,0,0.018854,0,0.001780,0,0.432726,others,others
2,eb1b1a68-93da-42a5-8379-56a8ce37c2fd,anger,0,0.007660,0,0.029938,0,0.022477,0,0.172670,0,0.018518,0,0.001892,0,0.316669,anger,others
3,88627d8a-bdda-4a9b-9a64-7a3e60ba62f2,anger,0,0.005049,0,0.037504,0,0.196453,0,0.028873,0,0.019335,0,0.001795,0,0.391174,others,others
4,0a7d24b7-9823-4e7b-aac2-3a7441ddf1ca,others,0,0.003526,0,0.050385,0,0.001393,0,0.176855,0,0.011173,0,0.002016,1,0.856378,others,others
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
839,aebc4c10-4af5-4304-872e-b9519d8a775c,others,0,0.004527,0,0.123410,0,0.002391,0,0.167884,0,0.010719,0,0.001740,1,0.857611,others,others
840,067b466c-c61b-486f-b848-a4cbb89c1f3f,sadness,1,0.698071,0,0.019504,0,0.008235,0,0.025346,0,0.017259,0,0.001780,0,0.108134,sadness,sadness
841,8d9b7731-0d55-45bd-9706-cfa4e93bb2fb,joy,0,0.003782,1,0.844622,0,0.001453,0,0.018233,0,0.013037,0,0.001766,0,0.150371,joy,joy
842,cf8d3cc8-2249-44e3-9417-2f51fe14bf1b,others,0,0.003384,0,0.031349,0,0.008158,0,0.022599,0,0.027991,0,0.002347,1,0.828457,others,others


In [72]:
dev_total[probs_cols+['id','emotion_x','emotion_y','emotion_probs']].to_csv('binary-dev.csv',index=False)

In [88]:
test_total[probs_cols+['id','emotion']].to_csv('binary-test.csv',index=False)

In [74]:
test_total.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1626 entries, 0 to 1625
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              1626 non-null   object 
 1   emotion         1626 non-null   object 
 2   sadness         1626 non-null   int64  
 3   probs_sadness   1626 non-null   float64
 4   joy             1626 non-null   int64  
 5   probs_joy       1626 non-null   float64
 6   anger           1626 non-null   int64  
 7   probs_anger     1626 non-null   float64
 8   surprise        1626 non-null   int64  
 9   probs_surprise  1626 non-null   float64
 10  disgust         1626 non-null   int64  
 11  probs_disgust   1626 non-null   float64
 12  fear            1626 non-null   int64  
 13  probs_fear      1626 non-null   float64
 14  others          1626 non-null   int64  
 15  probs_others    1626 non-null   float64
 16  emotion_probs   1626 non-null   object 
dtypes: float64(7), int64(7), object(3

In [58]:
#Converting labels to numbers
def label_to_int(label):
  if label   == 'sadness':
    return 0
  elif label == 'joy':
    return 1
  elif label == 'anger':
    return 2
  elif label == 'surprise':
    return 3
  elif label == 'disgust':
    return 4
  elif label == 'fear':
    return 5
  elif label == 'others':
    return 6

In [None]:
dev_total['emotion_x'] = dev_total['emotion_x'].apply(label_to_int)

In [61]:
# emotion_y is the true label for the dev dataset
# emotion_x is the predicted label of the multi-label classifier
print(classification_report(dev_total['emotion_y'], dev_total['emotion_x'], target_names= CLASS_NAMES, digits=4))

              precision    recall  f1-score   support

     sadness       0.70      0.68      0.69        85
         joy       0.00      0.00      0.00        16
       anger       1.00      0.44      0.62         9
    surprise       0.70      0.66      0.68       181
     disgust       0.73      0.85      0.78       414
        fear       0.80      0.80      0.80       104
      others       0.60      0.09      0.15        35

    accuracy                           0.73       844
   macro avg       0.65      0.50      0.53       844
weighted avg       0.71      0.73      0.71       844



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [60]:
# emotion_probs is the selected label from the binary classifiers
print(classification_report(dev_total['emotion_y'], dev_total['emotion_probs'], target_names= CLASS_NAMES, digits=4))

              precision    recall  f1-score   support

     sadness       0.69      0.62      0.65        85
         joy       0.00      0.00      0.00        16
       anger       0.71      0.56      0.63         9
    surprise       0.71      0.61      0.66       181
     disgust       0.72      0.86      0.78       414
        fear       0.82      0.81      0.81       104
      others       0.33      0.03      0.05        35

    accuracy                           0.72       844
   macro avg       0.57      0.50      0.51       844
weighted avg       0.70      0.72      0.70       844



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
