# 🤗 SemEval 2018 Task 1
This dataset contains multilabel classification task. Note that multilabel is different with multiclass task, since one record of data could have more than 1 classes/labels.
<br><br><br>
Here is the example:
<br><br>
*“Worry is a down payment on a problem you may never have'. \xa0Joyce Meyer.  #motivation #leadership #worry*
<br><br>
**Labels:**
<br>
anger: `False`<br>
anticipation: `True`<br>
disgust: `False`<br>
fear: `False`<br>
joy: `False`<br>
love: `False`<br>
optimism: `True`<br>
pessimism: `False`<br>
sadness: `False`<br>
surprise: `False`<br>
trust: `True`

### Import Libraries

In [1]:
import numpy as np
import torch

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import Trainer, TrainingArguments, EvalPrediction

from sklearn.metrics import f1_score, roc_auc_score, accuracy_score

#### Some Default Configs

In [2]:
MODEL_CKPT = 'distilbert-base-uncased'
LR = 2e-5
BATCH_SIZE = 4
WEIGHT_DECAY = 0.01
METRIC_NAME = 'f1'

### Dataset Preparation

In [3]:
dataset = load_dataset('sem_eval_2018_task_1', 'subtask5.english')
dataset

Reusing dataset sem_eval_2018_task_1 (C:\Users\fahrizain\.cache\huggingface\datasets\sem_eval_2018_task_1\subtask5.english\1.1.0\a7c0de8b805f1988b118882fb289ccfbbeb9085c7820b6f046b5887e234af182)


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['ID', 'Tweet', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust'],
        num_rows: 6838
    })
    test: Dataset({
        features: ['ID', 'Tweet', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust'],
        num_rows: 3259
    })
    validation: Dataset({
        features: ['ID', 'Tweet', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust'],
        num_rows: 886
    })
})

In [4]:
sample = dataset['train'][0]
sample

{'ID': '2017-En-21441',
 'Tweet': "“Worry is a down payment on a problem you may never have'. \xa0Joyce Meyer.  #motivation #leadership #worry",
 'anger': False,
 'anticipation': True,
 'disgust': False,
 'fear': False,
 'joy': False,
 'love': False,
 'optimism': True,
 'pessimism': False,
 'sadness': False,
 'surprise': False,
 'trust': True}

In [5]:
# create label mapper
labels = [label for label in dataset['train'].features.keys() if label not in ['ID', 'Tweet']]
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}
labels

['anger',
 'anticipation',
 'disgust',
 'fear',
 'joy',
 'love',
 'optimism',
 'pessimism',
 'sadness',
 'surprise',
 'trust']

In [6]:
sample = dataset['train'][:3]
sample

{'ID': ['2017-En-21441', '2017-En-31535', '2017-En-21068'],
 'Tweet': ["“Worry is a down payment on a problem you may never have'. \xa0Joyce Meyer.  #motivation #leadership #worry",
  'Whatever you decide to do make sure it makes you #happy.',
  "@Max_Kellerman  it also helps that the majority of NFL coaching is inept. Some of Bill O'Brien's play calling was wow, ! #GOPATS"],
 'anger': [False, False, True],
 'anticipation': [True, False, False],
 'disgust': [False, False, True],
 'fear': [False, False, False],
 'joy': [False, True, True],
 'love': [False, True, False],
 'optimism': [True, True, True],
 'pessimism': [False, False, False],
 'sadness': [False, False, False],
 'surprise': [False, False, False],
 'trust': [True, False, False]}

In [7]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_CKPT)

In [8]:
encoding = tokenizer(sample['Tweet'], padding='max_length', truncation=True, max_length=128)
labels = {k: sample[k] for k in sample.keys() if k in labels}
labels_matrix = np.zeros((3, len(labels)))
for idx, label in enumerate(labels):
    labels_matrix[:, idx] = labels[label]

labels_matrix

array([[0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0.],
       [1., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0.]])

In [9]:
def preprocess_data(batch):
    # take a batch of text
    text = batch['Tweet']
    # encode them
    encoding = tokenizer(text, padding='max_length', truncation=True, max_length=128)
    # add labels
    labels_batch = {k: batch[k] for k in batch.keys() if k in labels}
    # create numpy array of shape (batch_size, num_labels)
    labels_matrix = np.zeros((len(text), len(labels)))
    # fill numpy array
    for idx, label in enumerate(labels):
        labels_matrix[:, idx] = labels_batch[label]

    encoding['labels'] = labels_matrix.tolist()

    return encoding

In [10]:
encoded_dataset = dataset.map(preprocess_data, batched=True, remove_columns=dataset['train'].column_names)
encoded_dataset



  0%|          | 0/7 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 6838
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 3259
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 886
    })
})

In [11]:
sample = encoded_dataset['train'][0]
[id2label[idx] for idx, label in enumerate(sample['labels']) if label == 1.0]

['anticipation', 'optimism', 'trust']

In [12]:
encoded_dataset.set_format('torch')

### Define Model

In [13]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL_CKPT,
                                                           problem_type='multi_label_classification',
                                                           num_labels=len(labels),
                                                           id2label=id2label,
                                                           label2id=label2id)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

In [14]:
args = TrainingArguments(
    f'bert-finetuned-sem_eval-english',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    learning_rate=LR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=5,
    weight_decay=WEIGHT_DECAY,
    load_best_model_at_end=True,
    metric_for_best_model=METRIC_NAME
)

In [15]:
def multilabel_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_avg = f1_score(y_true, y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average='micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_avg,
                'roc_auc': roc_auc,
                'accuracy': accuracy}

    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    result = multilabel_metrics(predictions=preds, labels=p.label_ids)
    return result

In [16]:
# forward pass on a batch verification
outputs = model(encoded_dataset['train']['input_ids'][0].unsqueeze(0), labels=encoded_dataset['train'][0]['labels'].unsqueeze(0))
outputs

SequenceClassifierOutput(loss=tensor(0.6860, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), logits=tensor([[-0.0598,  0.0766, -0.0128,  0.0214, -0.0474, -0.0983, -0.0803,  0.1194,
         -0.0747, -0.1168, -0.0914]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [17]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset['train'],
    eval_dataset=encoded_dataset['validation'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [18]:
trainer.train()

***** Running training *****
  Num examples = 6838
  Num Epochs = 5
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 8550


  0%|          | 0/8550 [00:00<?, ?it/s]

{'loss': 0.4377, 'learning_rate': 1.8830409356725147e-05, 'epoch': 0.29}
{'loss': 0.3522, 'learning_rate': 1.7660818713450293e-05, 'epoch': 0.58}
{'loss': 0.3276, 'learning_rate': 1.649122807017544e-05, 'epoch': 0.88}


***** Running Evaluation *****
  Num examples = 886
  Batch size = 4


  0%|          | 0/222 [00:00<?, ?it/s]

Saving model checkpoint to bert-finetuned-sem_eval-english\checkpoint-1710
Configuration saved in bert-finetuned-sem_eval-english\checkpoint-1710\config.json


{'eval_loss': 0.323986679315567, 'eval_f1': 0.6661466458658345, 'eval_roc_auc': 0.7697590503676215, 'eval_accuracy': 0.2595936794582393, 'eval_runtime': 37.3934, 'eval_samples_per_second': 23.694, 'eval_steps_per_second': 5.937, 'epoch': 1.0}


Model weights saved in bert-finetuned-sem_eval-english\checkpoint-1710\pytorch_model.bin
tokenizer config file saved in bert-finetuned-sem_eval-english\checkpoint-1710\tokenizer_config.json
Special tokens file saved in bert-finetuned-sem_eval-english\checkpoint-1710\special_tokens_map.json


{'loss': 0.3051, 'learning_rate': 1.5321637426900587e-05, 'epoch': 1.17}
{'loss': 0.2804, 'learning_rate': 1.4152046783625733e-05, 'epoch': 1.46}
{'loss': 0.2798, 'learning_rate': 1.2982456140350879e-05, 'epoch': 1.75}


***** Running Evaluation *****
  Num examples = 886
  Batch size = 4


  0%|          | 0/222 [00:00<?, ?it/s]

Saving model checkpoint to bert-finetuned-sem_eval-english\checkpoint-3420
Configuration saved in bert-finetuned-sem_eval-english\checkpoint-3420\config.json


{'eval_loss': 0.3148577809333801, 'eval_f1': 0.6947689182326053, 'eval_roc_auc': 0.7895590221207314, 'eval_accuracy': 0.2866817155756208, 'eval_runtime': 32.7168, 'eval_samples_per_second': 27.081, 'eval_steps_per_second': 6.786, 'epoch': 2.0}


Model weights saved in bert-finetuned-sem_eval-english\checkpoint-3420\pytorch_model.bin
tokenizer config file saved in bert-finetuned-sem_eval-english\checkpoint-3420\tokenizer_config.json
Special tokens file saved in bert-finetuned-sem_eval-english\checkpoint-3420\special_tokens_map.json


{'loss': 0.2703, 'learning_rate': 1.1812865497076024e-05, 'epoch': 2.05}
{'loss': 0.238, 'learning_rate': 1.0643274853801172e-05, 'epoch': 2.34}
{'loss': 0.2426, 'learning_rate': 9.473684210526315e-06, 'epoch': 2.63}
{'loss': 0.2368, 'learning_rate': 8.304093567251463e-06, 'epoch': 2.92}


***** Running Evaluation *****
  Num examples = 886
  Batch size = 4


  0%|          | 0/222 [00:00<?, ?it/s]

Saving model checkpoint to bert-finetuned-sem_eval-english\checkpoint-5130
Configuration saved in bert-finetuned-sem_eval-english\checkpoint-5130\config.json


{'eval_loss': 0.32266080379486084, 'eval_f1': 0.6887383997993478, 'eval_roc_auc': 0.787815432502287, 'eval_accuracy': 0.2652370203160271, 'eval_runtime': 38.6615, 'eval_samples_per_second': 22.917, 'eval_steps_per_second': 5.742, 'epoch': 3.0}


Model weights saved in bert-finetuned-sem_eval-english\checkpoint-5130\pytorch_model.bin
tokenizer config file saved in bert-finetuned-sem_eval-english\checkpoint-5130\tokenizer_config.json
Special tokens file saved in bert-finetuned-sem_eval-english\checkpoint-5130\special_tokens_map.json


{'loss': 0.2158, 'learning_rate': 7.134502923976608e-06, 'epoch': 3.22}
{'loss': 0.2068, 'learning_rate': 5.964912280701755e-06, 'epoch': 3.51}
{'loss': 0.1996, 'learning_rate': 4.7953216374269005e-06, 'epoch': 3.8}


***** Running Evaluation *****
  Num examples = 886
  Batch size = 4


  0%|          | 0/222 [00:00<?, ?it/s]

Saving model checkpoint to bert-finetuned-sem_eval-english\checkpoint-6840
Configuration saved in bert-finetuned-sem_eval-english\checkpoint-6840\config.json


{'eval_loss': 0.33189594745635986, 'eval_f1': 0.6885083147182923, 'eval_roc_auc': 0.7892089254071624, 'eval_accuracy': 0.26749435665914223, 'eval_runtime': 35.9169, 'eval_samples_per_second': 24.668, 'eval_steps_per_second': 6.181, 'epoch': 4.0}


Model weights saved in bert-finetuned-sem_eval-english\checkpoint-6840\pytorch_model.bin
tokenizer config file saved in bert-finetuned-sem_eval-english\checkpoint-6840\tokenizer_config.json
Special tokens file saved in bert-finetuned-sem_eval-english\checkpoint-6840\special_tokens_map.json


{'loss': 0.197, 'learning_rate': 3.625730994152047e-06, 'epoch': 4.09}
{'loss': 0.1847, 'learning_rate': 2.456140350877193e-06, 'epoch': 4.39}
{'loss': 0.1825, 'learning_rate': 1.2865497076023392e-06, 'epoch': 4.68}
{'loss': 0.1827, 'learning_rate': 1.1695906432748539e-07, 'epoch': 4.97}


***** Running Evaluation *****
  Num examples = 886
  Batch size = 4


  0%|          | 0/222 [00:00<?, ?it/s]

Saving model checkpoint to bert-finetuned-sem_eval-english\checkpoint-8550
Configuration saved in bert-finetuned-sem_eval-english\checkpoint-8550\config.json


{'eval_loss': 0.3367431163787842, 'eval_f1': 0.6896380763510164, 'eval_roc_auc': 0.7900685032839297, 'eval_accuracy': 0.2708803611738149, 'eval_runtime': 37.5363, 'eval_samples_per_second': 23.604, 'eval_steps_per_second': 5.914, 'epoch': 5.0}


Model weights saved in bert-finetuned-sem_eval-english\checkpoint-8550\pytorch_model.bin
tokenizer config file saved in bert-finetuned-sem_eval-english\checkpoint-8550\tokenizer_config.json
Special tokens file saved in bert-finetuned-sem_eval-english\checkpoint-8550\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from bert-finetuned-sem_eval-english\checkpoint-3420 (score: 0.6947689182326053).


{'train_runtime': 5118.7044, 'train_samples_per_second': 6.679, 'train_steps_per_second': 1.67, 'train_loss': 0.25487864120661863, 'epoch': 5.0}


TrainOutput(global_step=8550, training_loss=0.25487864120661863, metrics={'train_runtime': 5118.7044, 'train_samples_per_second': 6.679, 'train_steps_per_second': 1.67, 'train_loss': 0.25487864120661863, 'epoch': 5.0})

In [19]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 886
  Batch size = 4


  0%|          | 0/222 [00:00<?, ?it/s]

{'eval_loss': 0.3148577809333801,
 'eval_f1': 0.6947689182326053,
 'eval_roc_auc': 0.7895590221207314,
 'eval_accuracy': 0.2866817155756208,
 'eval_runtime': 22.9199,
 'eval_samples_per_second': 38.656,
 'eval_steps_per_second': 9.686,
 'epoch': 5.0}

In [20]:
text = 'I\'m happy I can finally train a model for multi-label classification'

encoding = tokenizer(text, return_tensors='pt')
encoding = {k:v.to(trainer.model.device) for k, v in encoding.items()}

outputs = trainer.model(**encoding)

In [21]:
logits = outputs.logits
logits.shape

torch.Size([1, 11])

In [30]:
torch.nn.Sigmoid()(logits)

tensor([[0.0140, 0.2553, 0.0111, 0.0136, 0.9834, 0.3085, 0.8170, 0.0085, 0.0164,
         0.0655, 0.1638]], device='cuda:0', grad_fn=<SigmoidBackward0>)

In [32]:
id2label[4], id2label[6]

('joy', 'optimism')

In [29]:
probs = torch.nn.Sigmoid()(logits.squeeze().cpu())
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= 0.5)] = 1
predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
print(predicted_labels)

['joy', 'optimism']
