# DistilBERT - Emotion Recognition Tasks
## Dataset used: A dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise.

# 1. Loading dataset

In [None]:
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from datasets import load_dataset
# load dataset "emotions"
emotions = load_dataset('emotion')
# Each dataset is a dictionary-type object already split up in train-test-val
emotions



  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

# 2. Load Model and Tokenize Data

In [None]:
# install transformer library via pip
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# import model and tokenizer classes
from transformers import DistilBertModel, DistilBertTokenizer
from transformers import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [None]:
# insert model name
model_name = "distilbert-base-uncased"

# we can download a pretrained version of distilbert uncased
# uncased means it does not make a difference between english and English
bert_model = DistilBertModel.from_pretrained(model_name)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### Set data into dataframes

In [None]:
emotions.set_format('pandas')
train_df = emotions['train'][:]
train_df.tail()

Unnamed: 0,text,label
15995,i just had a very brief time in the beanbag an...,0
15996,i am now turning and i feel pathetic that i am...,0
15997,i feel strong and good overall,1
15998,i feel like this was such a rude comment and i...,3
15999,i know a lot but i feel so stupid because i ca...,0


In [None]:
test_df = emotions['test'][:]
test_df.tail()

Unnamed: 0,text,label
1995,i just keep feeling like someone is being unki...,3
1996,im feeling a little cranky negative after this...,3
1997,i feel that i am useful to my people and that ...,1
1998,im feeling more comfortable with derby i feel ...,1
1999,i feel all weird when i have to meet w people ...,4


In [None]:
val_df = emotions['validation'][:]
val_df.tail()

Unnamed: 0,text,label
1995,im having ssa examination tomorrow in the morn...,0
1996,i constantly worry about their fight against n...,1
1997,i feel its important to share this info for th...,1
1998,i truly feel that if you are passionate enough...,1
1999,i feel like i just wanna buy any cute make up ...,1


In [None]:
# Apply the tokenizer to the training text data and save the resulting dict

tokenized_train_data = tokenizer(list(train_df["text"]), return_tensors='pt', truncation=True, padding=True, max_length=128)
tokenized_train_data

{'input_ids': tensor([[  101,  1045,  2134,  ...,     0,     0,     0],
        [  101,  1045,  2064,  ...,     0,     0,     0],
        [  101, 10047,  9775,  ...,     0,     0,     0],
        ...,
        [  101,  1045,  2514,  ...,     0,     0,     0],
        [  101,  1045,  2514,  ...,     0,     0,     0],
        [  101,  1045,  2113,  ...,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}

In [None]:
tokenized_test_data = tokenizer(list(test_df["text"]), return_tensors='pt', truncation=True, padding=True, max_length=128)
tokenized_test_data

{'input_ids': tensor([[  101, 10047,  3110,  ...,     0,     0,     0],
        [  101, 10047,  2039,  ...,     0,     0,     0],
        [  101,  1045,  2196,  ...,     0,     0,     0],
        ...,
        [  101,  1045,  2514,  ...,     0,     0,     0],
        [  101, 10047,  3110,  ...,     0,     0,     0],
        [  101,  1045,  2514,  ...,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}

In [None]:
tokenized_val_data = tokenizer(list(test_df["text"]), return_tensors='pt', truncation=True, padding=True, max_length=128)
tokenized_val_data

{'input_ids': tensor([[  101, 10047,  3110,  ...,     0,     0,     0],
        [  101, 10047,  2039,  ...,     0,     0,     0],
        [  101,  1045,  2196,  ...,     0,     0,     0],
        ...,
        [  101,  1045,  2514,  ...,     0,     0,     0],
        [  101, 10047,  3110,  ...,     0,     0,     0],
        [  101,  1045,  2514,  ...,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}

In [None]:
tokenized_train_data["attention_mask"]

tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])

# 3. Create Train and Validation Dataloader

In [None]:
import torch
import torch.nn as nn
# import Dataset and DataLoader class
from torch.utils.data import Dataset, DataLoader

In [None]:
class TextDataset(Dataset):

  def __init__(self, X, Y):
    self.X = X
    self.Y = Y

  def __len__(self,):
    return len(self.Y)

  def __getitem__(self, index):
    return (self.X["input_ids"][index], self.X["attention_mask"][index], self.Y[index])

In [None]:
train_dataset = TextDataset(tokenized_train_data, train_df["label"])
val_dataset = TextDataset(tokenized_val_data, val_df["label"])

Create the training DataLoader train_dl and the validation DataLoader val_dl with a batch size of 32:

In [None]:
train_dl = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dl = DataLoader(val_dataset, batch_size=32, shuffle=True)

# 4. Creating the model

In [None]:
# implement BertClassifier
class BertClassifier(nn.Module):

  def __init__(self, bert_model, n_classes):
    super().__init__()
    self.bert = bert_model
    self.l1 = nn.Linear(768, n_classes)

  def freeze_bert(self):
    for param in self.bert.parameters():
      param.requires_grad = False


  def forward(self, input_ids, attention_mask):
    # feed input to bert
    X = self.bert(input_ids, attention_mask)
    # extract [CLS] for the classification
    X = X.last_hidden_state[:, 0, :]
    X = self.l1(X)
    return X

In [None]:
# instiantiate the model
model = BertClassifier(bert_model=bert_model, n_classes=6)

In [None]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICE

'cuda'

In [None]:
# pass the model to the gpu
model.to(DEVICE)

BertClassifier(
  (bert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Linear(i

# 5. Model Training

In [None]:
import torch

epochs = 20
lr = 0.0025
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()  # Binary Cross Entropy Loss

In [None]:
# To get a better idea of how well your model performs
# you should implement an accuracy function that is
# called after each epoch of your training loop
def accuracy(predictions, labels):
    _, predicted_classes = torch.max(predictions, dim=1)
    correct_predictions = (predicted_classes == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions
    return accuracy


In [None]:
# Freezing Parameters of bert
model.freeze_bert()

In [None]:
def train(net, train_dl, val_dl , epochs, optimizer, loss_func):
    # set the model on train mode
    net.train()
    # define arrays for evaluation
    train_losses, valid_losses = [], []
    train_accs, valid_accs = [], []

    # loop over all epochs
    for epoch in tqdm(range(epochs)):
        # reset evaluation variables
        train_loss, valid_loss = 0, 0
        train_acc, valid_acc = 0, 0
        # training loop
        for step, batch in enumerate(train_dl):
            # push batch to gpu
            input_ids, attention_mask, labels = [x.to(DEVICE) for x in batch]
            # predict
            y_hat = net(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            # calculate loss and adjust weights
            loss = loss_func(y_hat, labels)
            # zero the all gradients
            # calc gradients
            loss.backward()
            # update weights
            optimizer.step()
            # reset optimizer
            optimizer.zero_grad()
            # calculate evaluation kpis
            train_loss += loss
            train_acc  += accuracy(y_hat, labels)

        net.eval()
        # valiadation loop
        with torch.no_grad():
          for step, batch in enumerate(val_dl):
            # push batch to gpu
            input_ids, attention_mask, labels = [x.to(DEVICE) for x in batch]
            # predict
            y_hat = net(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            # calculate evaluation kpis
            loss = loss_func(y_hat, labels)
            valid_loss += loss
            valid_acc  += accuracy(y_hat, labels)


        # calculations for evaluations
        train_loss /= len(train_dl)
        train_acc  /= len(train_dl)
        valid_loss /= len(val_dl)
        valid_acc  /= len(val_dl)

        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        train_accs.append(train_acc)
        valid_accs.append(valid_acc)


        print(f'Epoch: {epoch+1} \tTraining Loss: {train_loss:.6f} \tTraining Acc: {train_acc:.6f} \tValidation Loss: {valid_loss:.6f} \tValidation Acc: {valid_acc:.6f}')

    # return losses for plotting
    return train_losses, valid_losses, train_accs, valid_accs

In [None]:
# Execute the train function and train the model.
train_losses, valid_losses, train_accs, valid_accs = train(model, train_dl, val_dl, epochs, optimizer, loss_func)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 1 	Training Loss: 1.066215 	Training Acc: 0.606187 	Validation Loss: 2.618959 	Validation Acc: 0.294643
Epoch: 2 	Training Loss: 0.924207 	Training Acc: 0.660125 	Validation Loss: 2.443849 	Validation Acc: 0.270337
Epoch: 3 	Training Loss: 0.917863 	Training Acc: 0.660813 	Validation Loss: 2.480536 	Validation Acc: 0.275794
Epoch: 4 	Training Loss: 0.922149 	Training Acc: 0.658750 	Validation Loss: 2.482159 	Validation Acc: 0.263889
Epoch: 5 	Training Loss: 0.923060 	Training Acc: 0.660375 	Validation Loss: 2.426999 	Validation Acc: 0.279762
Epoch: 6 	Training Loss: 0.918290 	Training Acc: 0.663937 	Validation Loss: 2.444786 	Validation Acc: 0.269345
Epoch: 7 	Training Loss: 0.915632 	Training Acc: 0.662625 	Validation Loss: 2.448596 	Validation Acc: 0.276786
Epoch: 8 	Training Loss: 0.916373 	Training Acc: 0.659687 	Validation Loss: 2.477908 	Validation Acc: 0.280258
Epoch: 9 	Training Loss: 0.916071 	Training Acc: 0.662188 	Validation Loss: 2.459360 	Validation Acc: 0.262897
E

# 6. Examples

['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']
0,1,2,3,4,5

In [None]:
labels_arr = []
labels_arr.append("sadness")
labels_arr.append("joy")
labels_arr.append("love")
labels_arr.append("anger")
labels_arr.append("fear")
labels_arr.append("surprise")


Test Sentences:

In [None]:
# 0 sadness
#test_sentence = "Sometimes I feel so lost and alone, as if no one could truly understand my sadness. It's hard to carry this burden while putting on a smile and pretending everything is okay. But deep inside, I'm battling the darkness and hoping for a glimmer of light that shows me it can get better"
# 1 joy
#test_sentence = "Feeling absolutely elated today! Just got some fantastic news that brought a big smile to my face. Grateful for the amazing moments that fill our lives with joy and positivity. Spread happiness and make someone's day brighter!"
# 2 love
#test_sentence = "Feeling an overwhelming warmth in my heart, like a gentle embrace from the universe. It's a magical force that brings people together, inspires acts of kindness, and makes the world a brighter place. Embracing this beautiful energy and sharing it with everyone around me. "
# 3 anger
#test_sentence = "There's a fierce energy propelling me forward, demanding change and justice. Injustice will not prevail as long as we stand united, speaking up for what's right. Let's channel this passionate force to create a world where equality and fairness reign. Together, we can overcome any obstacle and build a brighter future"
# 4 fear
test_sentence = "The heart races, palms sweat, and a chill runs down my spine. Uncertainty hangs heavy in the air, but I refuse to let it paralyze me. Embracing the unknown, I find strength in every step forward. It's in the face of fear that true courage emerges. I take a deep breath, summoning resilience from within, ready to conquer whatever challenges lie ahead."
# 5 surprise
#test_sentence = "Did a double take, eyes widened, and jaw dropped in disbelief. Life has a way of throwing unexpected curveballs that leave you momentarily speechless. The element of surprise keeps us on our toes, reminding us that anything is possible. Embrace the unexpected twists and turns, for they often lead to the most extraordinary adventures"


In [None]:
tokenized_sentence = tokenizer(test_sentence, return_tensors='pt', truncation=True, padding=True, max_length=128)
test_sentence_dataset = TextDataset(tokenized_sentence, "0")
test_sentence_dl = DataLoader(test_sentence_dataset, batch_size=32, shuffle=True)

In [None]:
# Tokenize the sentence
inputs = tokenizer(test_sentence, return_tensors='pt')
input_ids = inputs['input_ids'].to(DEVICE)
attention_mask = inputs['attention_mask'].to(DEVICE)

# Forward pass to obtain predictions
with torch.no_grad():
    output = model(input_ids=input_ids, attention_mask=attention_mask)
    predicted_labels = torch.argmax(output[0])

# Print the predicted label
print("The sentence has the following emotion: ", labels_arr[predicted_labels.item()])