#LREC Paper
##Text Classification
###[Simple Transformers](https://github.com/ThilinaRajapakse/simpletransformers/blob/master/examples/text_classification/multilabel_classification.py)

# Data Extraction

In [None]:
#Installing libraries
!pip install simpletransformers

In [1]:
#Importing Libraries
import json
from pprint import pprint
import pandas as pd
import numpy as np
from sklearn.metrics import (hamming_loss, 
                             label_ranking_average_precision_score,
                             f1_score,
                             accuracy_score,
                             multilabel_confusion_matrix)
from simpletransformers.classification import MultiLabelClassificationModel, MultiLabelClassificationArgs
import torch

# Data Pre-Processing

In [2]:
 #Creating Python Object
 with open('rationales_forced_labour_dataset.json', 'r') as json_file:
   dataset = json.load(json_file)

In [3]:
#Creating DataFrame
dataset_df = pd.DataFrame(dataset)
print('Dataset Shape:', dataset_df.shape)

Dataset Shape: (989, 6)


In [4]:
#Splitting Examples with No Labels
dataset_df['n_labels'] = dataset_df['labels'].apply(np.sum, axis=0)
yes_labels_df = dataset_df[dataset_df['n_labels'] > 0]
no_labels_df = dataset_df[dataset_df['n_labels'] == 0]

In [5]:
#Sampling Examples No Labels
no_labels_fraction = 1.0
no_labels_df = no_labels_df.sample(frac=no_labels_fraction, random_state=0)
dataset_df = pd.concat([yes_labels_df, no_labels_df]).drop(['n_labels'], axis=1)
print('Dataset Shape:', dataset_df.shape)

Dataset Shape: (989, 6)


In [15]:
#Creating Train and Test Set
train_df = dataset_df.loc[(dataset_df['set']=='train') | (dataset_df['set']=='dev'), ['content', 'labels']]
eval_df = dataset_df.loc[dataset_df['set']=='test', ['content', 'labels']]
print('Train Dataset Shape:', train_df.shape)
print('Evaluation Dataset Shape:', eval_df.shape)

Train Dataset Shape: (785, 2)
Evaluation Dataset Shape: (204, 2)


# Text Classification

In [16]:
#Hyperparameter Setting
model_args = MultiLabelClassificationArgs()
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True
model_args.num_train_epochs = 10
model_args.overwrite_output_dir = True
model_args.adafactor_relative_step = False
model_args.adafactor_warmup_init = False

#Best Hyperparameters (Hyperparameter Tuning Outcome)
model_args.learning_rate = 0.001001
model_args.optimizer = "Adafactor"
model_args.train_batch_size = 2
model_args.threshold = 0.20

In [17]:
#Create a MultiClassificationModel
model = MultiLabelClassificationModel(
    "roberta", #model_type
    "roberta-base", #model_name
    num_labels=11,
    use_cuda=torch.cuda.is_available(),
    args= model_args
)

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForMultiLabelSequenceClassification: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForMultiLabelSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias',

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

In [18]:
#Train the model
model.train_model(train_df)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/785 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/393 [00:00<?, ?it/s]

(3930, 0.18965011503353107)

In [19]:
#Evaluate the model
results, model_outputs, wrong_predictions = model.eval_model(eval_df)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/204 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/26 [00:00<?, ?it/s]

# Evaluation

In [20]:
#Evaluation Function
def eval_multi_label_model(y, y_pred, threshold):

  #Label Ranking Average Precision Score
  print('LRAP', round(label_ranking_average_precision_score(y, y_pred),4))

  #Hamming Loss
  print('\nHamming Loss:', round(hamming_loss(y, y_pred > threshold),4))

  #F1 Score (Micro)
  print('\nF1 Score (micro):', round(f1_score(y, y_pred > threshold, average='micro'),4))

  #F1 Score (Macro)
  print('\nF1 Score (macro):', round(f1_score(y, y_pred > threshold, average='macro'),4))

  #F1 Score (Weighted)
  print('\nF1 Score (weighted):', round(f1_score(y, y_pred > threshold, average='weighted'),4))

  #F1 Score (Weighted)
  print('\nF1 Score (each label):', f1_score(y, y_pred > threshold, average=None))

  #Exact Match Ratio
  print('\nEMR:', round(accuracy_score(y, y_pred > threshold),4))

In [21]:
#Creating Target Vector (Evaluation Set)
y_eval = np.array(eval_df['labels'].tolist())

In [22]:
#Creating Target Vector (Model Prediction)
y_eval_pred = model_outputs
print('Evaluation Target Vector Shape:', y_eval_pred.shape)

Evaluation Target Vector Shape: (204, 11)


In [23]:
#Evaluation (Validation Set)
eval_multi_label_model(y_eval, y_eval_pred, threshold=0.40)

LRAP 0.8779

Hamming Loss: 0.098

F1 Score (micro): 0.3678

F1 Score (macro): 0.2804

F1 Score (weighted): 0.3506

F1 Score (each label): [0.25       0.46464646 0.3        0.14285714 0.48484848 0.375
 0.4        0.5        0.         0.         0.16666667]

EMR: 0.4951
