In [1]:
! pip install simpletransformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting simpletransformers
  Downloading simpletransformers-0.63.11-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.7/250.7 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.12.0-py3-none-any.whl (474 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
Collecting streamlit
  Downloading streamlit-1.22.0-py2.py3-none-any.whl (8.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m68.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting seqeval
  Downl

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from simpletransformers.classification import ClassificationModel
import pandas as pd
import logging
import sklearn

emoevent_train_csv_path = '/content/drive/My Drive/Colab Notebooks/datasets/emoevent_train.csv'
emoevent_test_csv_path = '/content/drive/My Drive/Colab Notebooks/datasets/emoevent_test.csv'
emoevent_train = pd.read_csv(emoevent_train_csv_path)
emoevent_test = pd.read_csv(emoevent_test_csv_path)
emoevent_train.head(10)
emoevent_test.head(10)

In [None]:
train_df = pd.DataFrame(emoevent_train)
train_df.columns = ["text", "labels"]

eval_df = pd.DataFrame(emoevent_test)
eval_df.columns = ["text", "labels"]

In [None]:
import torch
from torch import cuda
import gc

def empty_cache(model, optimizer):
  device = 'cuda' if cuda.is_available() else 'cpu'
  with torch.no_grad():
      torch.cuda.empty_cache()
  gc.collect()

In [None]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, precision_score, recall_score

def f1_multiclass(labels, preds):
  return f1_score(labels, preds, average='macro')

def precision_multiclass(labels,preds):
  return precision_score(labels, preds, average='macro')

def recall_multiclass(labels,preds):
  return recall_score(labels, preds, average='macro')

In [None]:
import itertools
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

def plot_confusion_matrix(cm, classes,
                          #title='Confusion matrix',
                          cmap=plt.cm.Blues):

# This function prints and plots the confusion matrix.
    cm = cm.astype('float')
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    #plt.title(title, fontsize=15)
    plt.colorbar()
    tick_marks = np.arange(len(classes))

    plt.xticks(tick_marks, classes, rotation=45, fontsize=15)
    plt.yticks(tick_marks, classes, fontsize=15)
    fmt = '.2f'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.ylabel('True label', fontsize=15)
    plt.xlabel('Predicted label', fontsize=15)



In [None]:
import numpy as np

results = []

reps = 1
for i in range(reps):

  model = ClassificationModel(
    "roberta", 
    "roberta-base",
    num_labels=7, 
    args={'num_train_epochs':2,
          'max_seq_length':256,
          'learning_rate':1e-5,
          'overwrite_output_dir': True, 
          'custom_callback': empty_cache
          }
  )

  model.train_model(train_df)

  result, model_outputs, wrong_predictions = model.eval_model(eval_df, 
                                                              acc=sklearn.metrics.accuracy_score,
                                                              f1=f1_multiclass,
                                                              precision=precision_multiclass,
                                                              recall=recall_multiclass,
                                                              )

  
  results.append({'accuracy': result['acc'], 'f1': result['f1'], 'precision': result['precision'], 'recall': result['recall']})

  # Get the predicted labels for the evaluation dataset
  _, eval_outputs, _ = model.eval_model(eval_df)
  predicted_labels = np.argmax(eval_outputs, axis=1)

  # Plot the confusion matrix
  fig, ax = plt.subplots(figsize=(6, 6))
  cm = confusion_matrix(eval_df.labels.values, predicted_labels, labels=[0, 1, 2, 3, 4, 5, 6], normalize='true')
  plot_confusion_matrix(cm, ['anger', 'disgust', 'fear', 'joy', 'others', 'sadness', 'surprise'])


print('accuracy, f1, precision, recall:')
for i in results:
  print(i['accuracy'], i['f1'], i['precision'], i['recall'])