In [1]:
!pip install simpletransformers
!mkdir -p 'drive/MyDrive/emotion_classifier'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting simpletransformers
  Downloading simpletransformers-0.63.7-py3-none-any.whl (249 kB)
[K     |████████████████████████████████| 249 kB 6.8 MB/s 
Collecting transformers>=4.6.0
  Downloading transformers-4.19.2-py3-none-any.whl (4.2 MB)
[K     |████████████████████████████████| 4.2 MB 66.1 MB/s 
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 51.9 MB/s 
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[K     |████████████████████████████████| 43 kB 2.6 MB/s 
Collecting tokenizers
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 42.8 MB/s 
[?25hCollecting wandb>=0.10.32
  Downloading wandb-0.12.17-py2.py3-none-any.whl (1.8 MB)
[K   

In [1]:
import pandas as pd
import torch
import pickle
from torch.utils.data import Dataset, DataLoader # for the dataloader
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from transformers import Trainer, TrainingArguments
from sklearn.metrics import classification_report, confusion_matrix, f1_score


# DataLoader

In [2]:
# Convert dataframe into dictionary of text and labels
def reader(df):
    texts = df['text'].values.tolist()
    labels = df['labels'].values.tolist()

    return {'texts':texts, 'labels':labels}

In [3]:
# DataLoader
class OlidDataset(Dataset):
  def __init__(self, tokenizer, input_set):
    # input_set: dictionary version of the df
    self.texts = input_set['texts']
    self.labels = input_set['labels']
    self.tokenizer = tokenizer

  def collate_fn(self, batch):
    texts = []
    labels = []

    for b in batch:
      texts.append(str(b['text']))
      labels.append(b['label'])

    print(texts)
    print(labels)
    encodings = self.tokenizer(
      texts,                        # what to encode
      return_tensors = 'pt',        # return pytorch tensors
      add_special_tokens = True,    # incld tokens like [SEP], [CLS]
      padding = "max_length",       # pad to max sentence length
      truncation = True,            # truncate if too long
      max_length= 128)              

    encodings['labels'] = torch.tensor(labels)
    return encodings

  def __len__(self):
    return len(self.texts)

  def __getitem__(self, idx):
    item = {'text': self.texts[idx], 'label': self.labels[idx]}

    return item


# Train Model

In [7]:
def train_model(model_name, best_model_dir, train_df, eval_df):
  optimizer = 'AdamW' 
  learning_rate = 4e-05
  epochs = 1
  
  model_args = ClassificationArgs(num_train_epochs=epochs,        # number of epochs
                                  best_model_dir=best_model_dir,  # directory to save best model
                                  evaluate_during_training=True,  # best model determined by validation set performance
                                  no_cache=True,                  
                                  save_steps=-1,                  
                                  save_model_every_epoch=False,
                                  overwrite_output_dir=True,
                                  learning_rate=learning_rate,    # learning rate
                                  optimizer=optimizer)            # optimizer

  model = ClassificationModel(model_type="xlmroberta",  # tried xlmroberta, bert
                            model_name=model_name,      # tried bert-base-chinese, xlm-roberta-base, bert-base-multilingual-cased (mBert), microsoft/infoxlm-base
                            args = model_args,          # see above
                            num_labels=4,               # 4 labels - sad, happy, fear, anger
                            use_cuda=cuda_available)    # use GPU
  
  if model_name != 'xlm-roberta-base':
    print('Sanity Check, make sure training correct model.')
    evaluate(model, df_val_EP)

  model.train_model(train_df = train_df,                # training dataset
                    eval_df = eval_df)                  # evaluation dataset
  
  return model


# Evaluate

In [5]:
def evaluate(model, df_dataset):
  y_pred, _ = model.predict(df_dataset.text.tolist())
  y_true = df_dataset['labels']

  print("Classification Report", classification_report(y_true, y_pred))
  print("Confusion Matrix", confusion_matrix(y_true, y_pred))
  print("F1-Score", f1_score(y_true, y_pred,average='weighted'))

# Run Code

In [12]:
# Run
GPU = True
if GPU:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else:
    device = torch.device("cpu")
print(f"Using {device}")

cuda_available = torch.cuda.is_available()

## Datasets
# Emotion (Twitter) Dataset (First Tune)
df_train_twitter = pd.read_csv('/content/drive/MyDrive/emotion_classifier/twitter_full.csv')

# EmpatheticPersonas (EP) Dataset (Second Tune)
df_train_EP = pd.read_csv('/content/drive/MyDrive/emotion_classifier/emotionlabeled_train.csv')
df_val_EP = pd.read_csv('/content/drive/MyDrive/emotion_classifier/emotionlabeled_val.csv')
df_test_EP = pd.read_csv('/content/drive/MyDrive/emotion_classifier/emotionlabeled_val.csv')

## Begin Finetune
# First Finetune 
model = train_model(model_name = "xlm-roberta-base",
                    best_model_dir = "/content/drive/MyDrive/emotion_classifier/best_finetuned_1/",
                    train_df = df_train_twitter[['text','labels']],
                    eval_df = df_val_EP[['text','labels']])

# Evaluate first finetune
print('Performance after First Finetune (Twitter) on Validation Set')
evaluate(model, df_val_EP)

# Second Finetune
model = train_model(model_name = "/content/drive/MyDrive/emotion_classifier/best_finetuned_1",
                    best_model_dir = "/content/drive/MyDrive/emotion_classifier/best_finetuned_2/",
                    train_df = df_train_EP[['text','labels']],
                    eval_df = df_val_EP[['text','labels']])

# Evaluate second finetune
print('Performance after Second Finetune (EP) on Validation Set')
evaluate(model, df_val_EP)

# Final Test results
print('Performance on Held-Out Test Set')
evaluate(model, df_test_EP)


Using cuda


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/23856 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

Performance after First Finetune (Twitter) on Validation Set
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Classification Report               precision    recall  f1-score   support

           0       0.80      0.30      0.43        27
           1       0.61      0.91      0.73        34
           2       0.84      0.87      0.85        30
           3       0.77      0.74      0.75        27

    accuracy                           0.72       118
   macro avg       0.75      0.70      0.69       118
weighted avg       0.75      0.72      0.70       118

Confusion Matrix [[ 8 11  3  5]
 [ 1 31  1  1]
 [ 0  4 26  0]
 [ 1  5  1 20]]
F1-Score 0.698532242864788


  f"use_multiprocessing automatically disabled as {model_type}"


Sanity Check, make sure training correct model.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Classification Report               precision    recall  f1-score   support

           0       0.78      0.26      0.39        27
           1       0.50      0.94      0.65        34
           2       0.91      0.97      0.94        30
           3       0.92      0.44      0.60        27

    accuracy                           0.68       118
   macro avg       0.78      0.65      0.64       118
weighted avg       0.76      0.68      0.65       118

Confusion Matrix [[ 7 18  1  1]
 [ 1 32  1  0]
 [ 0  1 29  0]
 [ 1 13  1 12]]
F1-Score 0.6522762522176722




Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/236 [00:00<?, ?it/s]



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

Performance after Second Finetune (EP) on Validation Set
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Classification Report               precision    recall  f1-score   support

           0       0.73      0.89      0.80        27
           1       0.90      0.79      0.84        34
           2       1.00      0.97      0.98        30
           3       0.88      0.85      0.87        27

    accuracy                           0.87       118
   macro avg       0.88      0.88      0.87       118
weighted avg       0.88      0.87      0.87       118

Confusion Matrix [[24  2  0  1]
 [ 5 27  0  2]
 [ 1  0 29  0]
 [ 3  1  0 23]]
F1-Score 0.8746863363379641
Performance on Held-Out Test Set
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Classification Report               precision    recall  f1-score   support

           0       0.73      0.89      0.80        27
           1       0.90      0.79      0.84        34
           2       1.00      0.97      0.98        30
           3       0.88      0.85      0.87        27

    accuracy                           0.87       118
   macro avg       0.88      0.88      0.87       118
weighted avg       0.88      0.87      0.87       118

Confusion Matrix [[24  2  0  1]
 [ 5 27  0  2]
 [ 1  0 29  0]
 [ 3  1  0 23]]
F1-Score 0.8746863363379641


In [None]:
# Load the model
model_args = ClassificationArgs(num_train_epochs=1, 
                                  best_model_dir = '/content/drive/MyDrive/emotion_classifier/best_finetuned_model/',
                                  evaluate_during_training=True,
                                  no_cache=True, 
                                  save_steps=-1,
                                  save_model_every_epoch=False,
                                  overwrite_output_dir=True,
                                  learning_rate=4e-05,
                                  optimizer='AdamW')

model = ClassificationModel(model_type="xlmroberta",      # tried xlmroberta, bert
                            model_name="/content/drive/MyDrive/emotion_classifier/best_finetuned_model",  # local saved model
                            args = model_args, 
                            num_labels=4,
                            use_cuda=cuda_available)

# check if same results obtained
print('loaded model')
y_pred, _ = model.predict(df_val.text.tolist())
y_true = df_val['labels']

print("Classification Report")
print(f1_score(y_true, y_pred,average='weighted'))
print(classification_report(y_true, y_pred))