### Train a text classifier (BERT-based) that predicts the underlying explained emotion.
- This notebook requires that you have install the __simpletransformers__
    - e.g., _pip install simpletransformers_
- in 4 GPUs, this can take several hours (e.g., 18 hours)
- Parameters are the same as in the paper (of course)

In [2]:
import sklearn
import logging
import pandas as pd
import os.path as osp
from simpletransformers.classification import ClassificationModel

from artemis.emotions import ARTEMIS_EMOTIONS, IDX_TO_EMOTION, positive_negative_else
from artemis.in_out.basics import create_dir

In [2]:
num_labels = len(ARTEMIS_EMOTIONS)
model_name =  'bert-base-uncased'
load_best_model = False # assuming you have already done the training and you want to train it more, or evaluate!
do_trainining = True

max_train_epochs = 50
subsample_data = False  # set to True if you want to test speed etc.

In [3]:
# where the output model, logger etc. are or will be.
my_out_dir = '/home/optas/DATA/OUT/artemis/neural_nets/txt_to_emotion/bert_based'
create_dir(my_out_dir)
best_model_dir = osp.join(my_out_dir, 'outputs', 'best_model')
create_dir(best_model_dir)

'/home/optas/DATA/OUT/artemis/neural_nets/txt_to_emotion/bert_based/outputs/best_model'

In [4]:
if load_best_model: # load already trained
    model_name =  best_model_dir

#### In cell below you need to use YOUR PATH.
- I will use the pre-processed ArtEmis dataset; as prepared by the script __preprocess_artemis_data.py --preprocess-for-deep-nets True__ (see STEP.1 at top-README) 
- Specifically this way, I can utilize the same train/test/val splits accross all my neural-based experiments.

In [5]:
preprocessed_artemis = '/home/optas/DATA/OUT/artemis/preprocessed_data/for_neural_nets/artemis_preprocessed.csv'
df = pd.read_csv(preprocessed_artemis)

In [6]:
#
# convert the data in the expected format for simpletransformers: 1)input-text, 2)label 
#
data_splits = dict()
for split in ['train', 'test', 'val']:
    mask = (df['split'] == split)
    sub_df = pd.concat([df.utterance_spelled[mask], df.emotion_label[mask]], axis=1)
    # note that here I am not using my artemis.utils.vocabulary. Instead I rely on the default tokenization etc. of simpletransformers
    sub_df.reset_index(drop=True, inplace=True)
    sub_df.columns = ["text", "labels"]
    if subsample_data:
        sub_df = sub_df.sample(1000)
    sub_df.reset_index(drop=True, inplace=True)
    data_splits[split] = sub_df

In [7]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers-last")
transformers_logger.setLevel(logging.WARNING)

# Create a ClassificationModel
args = {'reprocess_input_data': True, 
        'overwrite_output_dir': True,
        'fp16': False,
        'n_gpu': 4,
        'save_model_every_epoch': False,
        'evaluate_during_training': True,
        'num_train_epochs': max_train_epochs,
        'min_frequency': 5,
        'train_batch_size': 128,
        'output_dir': my_out_dir,
        'cache_dir': my_out_dir,
        'tensorboard_dir': my_out_dir,
        'best_model_dir': best_model_dir,
       }

model = ClassificationModel('bert', model_name=model_name, num_labels=num_labels, args=args)

In [8]:
# Train the model
if do_trainining: 
    model.train_model(data_splits['train'], eval_df=data_splits['val'])

In [None]:
# Evaluate the (last trained) model go below to use the per-validation optimal.
result, model_outputs, wrong_predictions = model.eval_model(data_splits['test'],
                                                            acc=sklearn.metrics.accuracy_score)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


HBox(children=(FloatProgress(value=0.0, max=39850.0), HTML(value='')))




INFO:simpletransformers.classification.classification_utils: Saving features into cached file /home/optas/DATA/OUT/artemis/neural_nets/txt_to_emotion/bert_based/cached_dev_bert_128_9_2


HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=4982.0, style=ProgressStyle(desc…



In [None]:
print(result)

In [16]:
# Load best (trained) model  to do some evaluations
model = ClassificationModel('bert', model_name=best_model_dir, num_labels=num_labels, args=args)

In [17]:
# Evaluate the model in terms of overall test accuracy
result, model_outputs, wrong_predictions = model.eval_model(data_splits['test'], acc=sklearn.metrics.accuracy_score)
print(result)

In [14]:
# convert Bert output to predicted maximizer
predictions = model_outputs.argmax(1)

In [15]:
## Test Accuracy for positive vs. negative emotions.
dataset = data_splits['test']
gt = dataset.labels

gt_pne = gt.apply(lambda x: positive_negative_else(IDX_TO_EMOTION[x]))  # pos-neg-else conversion
predictions_pne = pd.Series(predictions).apply(lambda x: positive_negative_else(IDX_TO_EMOTION[x]))
print('Ternary prediction accuracy:', (gt_pne == predictions_pne).mean())

# now, binary droping something-else
se_label = positive_negative_else('something else')
gt_pn = gt_pne[gt_pne != se_label]
gt_pn.reset_index(drop=True, inplace=True)

pred_pn = predictions_pne[(gt_pne != se_label).values]
pred_pn.reset_index(drop=True, inplace=True)

print('Pos-Neg (binary) prediction accuracy', (gt_pn == pred_pn).mean())

Ternary prediction accuracy: 0.8759849435382685
Pos-Neg (binary) prediction accuracy 0.91516403883715
