In [53]:
import os
import sys
sys.path.append(os.path.abspath('..'))
from utils.data_preprocessing import DataPreprocessor
from transformers import DistilBertTokenizerFast
import pandas as pd
import numpy as np
import nltk
import torch.nn as nn
from utils.emotion_regressor import EmotionRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from transformers import get_linear_schedule_with_warmup
from torch.utils.data import DataLoader
from tqdm import tqdm
import nlpaug.augmenter.word as naw
import torch
from torch.utils.data import Dataset
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit


nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\avin5\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\avin5\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [None]:
df = DataPreprocessor('../data/track-b.csv')
df.preprocess()
data = df.data
data

Unnamed: 0,id,text,anger,fear,joy,sadness,surprise
0,eng_train_track_b_00001,colorado middle of nowhere,0,1,0,0,1
1,eng_train_track_b_00002,this involved swimming a pretty large lake tha...,0,2,0,0,0
2,eng_train_track_b_00003,it was one of my most shameful experiences,0,1,0,3,0
3,eng_train_track_b_00004,after all i had vegetables coming out my ears ...,0,0,0,0,0
4,eng_train_track_b_00005,then the screaming started,0,3,0,1,2
...,...,...,...,...,...,...,...
2763,eng_train_track_b_02764,she cants her hip against my waist into my sid...,0,0,2,0,1
2764,eng_train_track_b_02765,i then did the dishes whitened my teeth watche...,0,0,0,0,0
2765,eng_train_track_b_02766,it just kind of gradually vanished over a coup...,0,0,0,0,1
2766,eng_train_track_b_02767,i didnt look out of my hands,0,1,0,0,0


### Data Augmentation

As the number of samples are pretty low i.e 2768 samples, the model performance will be very poor due to insufficient data. Here we are using a data augmentation method using a nlpaug library. There are several other methods for augmenting the data.

Data augmentation may or may not improve the performance of the model. In our approach it helps to imporve the model performance drastically.
NLPAug library offers three type of augmentation like character level, word level and Sentence level. Generating more augmented data will also costs the model performance, here we are trying to keep it simple.

In [None]:


augmenter = naw.SynonymAug(aug_src='wordnet', aug_p=0.2)
augmented_rows = []

emotion_columns = ['anger', 'fear', 'joy', 'sadness', 'surprise']

# Loop through each row in the original dataset
for idx, row in data.iterrows():
    original_text = str(row['text'])
    
    try:
        augmented_text = augmenter.augment(original_text)
    except:
        augmented_text = original_text

    # Only add if augmentation changed the sentence
    if augmented_text != original_text:
        new_row = {
            'id': f"{row['id']}_aug",  # Append _aug to indicate it's synthetic
            'text': augmented_text
        }
        # Copy emotion intensities
        for emo in emotion_columns:
            new_row[emo] = row[emo]
        augmented_rows.append(new_row)

aug_df = pd.DataFrame(augmented_rows)
combined_df = pd.concat([data, aug_df], ignore_index=True)

# Save to new CSV
# combined_df.to_csv("dataset_augmented.csv", index=False, encoding='utf-8')


In [None]:
pre = DataPreprocessor('../data/dataset_augmented.csv')
pre.preprocess()
aug_data = pre.data
aug_data

Unnamed: 0,id,text,anger,fear,joy,sadness,surprise
0,eng_train_track_b_00001,colorado middle of nowhere,0,1,0,0,1
1,eng_train_track_b_00002,this involved swimming a pretty large lake tha...,0,2,0,0,0
2,eng_train_track_b_00003,it was one of my most shameful experiences,0,1,0,3,0
3,eng_train_track_b_00004,after all i had vegetables coming out my ears ...,0,0,0,0,0
4,eng_train_track_b_00005,then the screaming started,0,3,0,1,2
...,...,...,...,...,...,...,...
5531,eng_train_track_b_02764_aug,she cants her hip against my waist into my sid...,0,0,2,0,1
5532,eng_train_track_b_02765_aug,i then did the dishes whitened my dentition wa...,0,0,0,0,0
5533,eng_train_track_b_02766_aug,information technology just kind of gradually ...,0,0,0,0,1
5534,eng_train_track_b_02767_aug,i didnt wait out of my hands,0,1,0,0,0


In [None]:

class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts.tolist()
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "labels": torch.tensor(label, dtype=torch.float)
        }

# Here we are using multilabel stratified split to ensure that the distribution of emotions is similar in both training and validation sets.
# This is important for multi-label classification tasks to ensure that each emotion is represented in both sets.
# If we use train test split, it may lead to imbalanced classes in the training and validation sets, which can affect the model's performance.
# We will use the DistilBertTokenizerFast to tokenize the text data.

X = aug_data["text"]  # This should already be a Series
y = aug_data[["anger", "fear", "joy", "sadness", "surprise"]].values
msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)


tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')


for train_idx, val_idx in msss.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

train_dataset = EmotionDataset(X_train, y_train, tokenizer)
val_dataset = EmotionDataset(X_val, y_val, tokenizer)



### Model Training

- This function trains the model for a specified number of epochs, monitors validation loss, a
- different hyperparameters can be adjusted to improve performance.
- It uses a linear learning rate scheduler and implements early stopping to prevent overfitting.

In [None]:

def train_model(model, train_dataset, val_dataset, num_epochs=10, batch_size=16, patience=3, learning_rate=2e-5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
    loss_fn = nn.MSELoss()

    total_steps = len(train_loader) * num_epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer, 
        num_warmup_steps=int(0.1 * total_steps), 
        num_training_steps=total_steps
    )

    best_val_loss = float("inf")
    patience_counter = 0
    best_model_state = None

    for epoch in range(num_epochs):
        model.train()
        train_losses = []

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            optimizer.zero_grad()
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs, labels)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()

            train_losses.append(loss.item())

        avg_train_loss = np.mean(train_losses)

        # Validation
        model.eval()
        val_losses = []

        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["labels"].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                loss = loss_fn(outputs, labels)
                val_losses.append(loss.item())

        avg_val_loss = np.mean(val_losses)
        print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f} | Val Loss = {avg_val_loss:.4f}")

        # Early Stopping Logic
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered!")
                break

    # Load best model
    if best_model_state:
        model.load_state_dict(best_model_state)

    return model


In [None]:
# EmotionRegressor is a custom model defined in utils/emotion_regressor.py
# It should inherit from nn.Module and implement the forward method.
model = EmotionRegressor(dropout=0.5)
trained_model = train_model(model, train_dataset, val_dataset, num_epochs=10, batch_size=16, patience=3)


Epoch 1/10: 100%|██████████| 277/277 [01:11<00:00,  3.89it/s]


Epoch 1: Train Loss = 0.6088 | Val Loss = 0.4427


Epoch 2/10: 100%|██████████| 277/277 [01:11<00:00,  3.89it/s]


Epoch 2: Train Loss = 0.3649 | Val Loss = 0.2987


Epoch 3/10: 100%|██████████| 277/277 [01:11<00:00,  3.88it/s]


Epoch 3: Train Loss = 0.2486 | Val Loss = 0.2459


Epoch 4/10: 100%|██████████| 277/277 [01:11<00:00,  3.87it/s]


Epoch 4: Train Loss = 0.1799 | Val Loss = 0.2220


Epoch 5/10: 100%|██████████| 277/277 [01:12<00:00,  3.83it/s]


Epoch 5: Train Loss = 0.1413 | Val Loss = 0.2016


Epoch 6/10: 100%|██████████| 277/277 [01:12<00:00,  3.84it/s]


Epoch 6: Train Loss = 0.1137 | Val Loss = 0.1897


Epoch 7/10: 100%|██████████| 277/277 [01:12<00:00,  3.84it/s]


Epoch 7: Train Loss = 0.1001 | Val Loss = 0.1798


Epoch 8/10: 100%|██████████| 277/277 [01:13<00:00,  3.75it/s]


Epoch 8: Train Loss = 0.0878 | Val Loss = 0.1750


Epoch 9/10: 100%|██████████| 277/277 [01:15<00:00,  3.69it/s]


Epoch 9: Train Loss = 0.0808 | Val Loss = 0.1718


Epoch 10/10: 100%|██████████| 277/277 [01:15<00:00,  3.66it/s]


Epoch 10: Train Loss = 0.0772 | Val Loss = 0.1702


### Model Evaluation

- Evaluation function to compute MAE, RMSE, and R² for the trained model
- It is a good metric to evaluate the performance of regression models, especially in multi-label settings.

In [None]:


def evaluate_model(model, test_loader):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            all_preds.append(outputs.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    return all_preds, all_labels



In [None]:
def print_metrics(preds, labels, emotion_labels=["anger", "fear", "joy", "sadness", "surprise"]):
    for i, emotion in enumerate(emotion_labels):
        mae = mean_absolute_error(labels[:, i], preds[:, i])
        rmse = np.sqrt(mean_squared_error(labels[:, i], preds[:, i]))
        r2 = r2_score(labels[:, i], preds[:, i])
        print(f"\n🧠 Emotion: {emotion}")
        print(f"   - MAE:  {mae:.4f}")
        print(f"   - RMSE: {rmse:.4f}")
        print(f"   - R²:   {r2:.4f}")


In [None]:
# Assuming test_loader is your DataLoader for test set
test_loader = DataLoader(val_dataset, batch_size=16)  # Using validation set as test set for demonstration
preds, labels = evaluate_model(trained_model, test_loader )
print_metrics(preds, labels)



🧠 Emotion: anger
   - MAE:  0.1592
   - RMSE: 0.3323
   - R²:   0.6604

🧠 Emotion: fear
   - MAE:  0.3523
   - RMSE: 0.5099
   - R²:   0.7341

🧠 Emotion: joy
   - MAE:  0.1851
   - RMSE: 0.3512
   - R²:   0.7521

🧠 Emotion: sadness
   - MAE:  0.2750
   - RMSE: 0.4612
   - R²:   0.6998

🧠 Emotion: surprise
   - MAE:  0.2397
   - RMSE: 0.3872
   - R²:   0.6803



| Emotion   | MAE   | RMSE  | R²    | Performance Summary
|-----------|-------|-------|-------|----------------------
| Anger     | 0.159 | 0.332 | 0.660 | Very low error; slightly lower R²
| Fear      | 0.352 | 0.510 | 0.734 | Highest MAE; decent R² (still good)
| Joy       | 0.185 | 0.351 | 0.752 | Very balanced and strong
| Sadness   | 0.275 | 0.461 | 0.700 | Medium performance
| Surprise  | 0.240 | 0.387 | 0.680 | Decent generalization


 Key Observations:

- Joy shows the best R² (0.75) and low errors → The model predicts it well.

- Anger has the lowest MAE (0.1592) → Very accurate predictions.

- Fear has the highest MAE/RMSE → Might benefit from:

More balanced training samples for "fear"

Targeted augmentation focused on fearful contexts

- Sadness & Surprise are in the middle — good, but could be fine-tuned further.

In [None]:

# Save the model's state_dict
# torch.save(model.state_dict(), "emotion_classifier_model_2.pt")

