***MULTI-LABEL EMOTION DETECTION***

***Modules loading***

In [1]:

import torch
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, hamming_loss
from transformers import BertTokenizer

from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    get_linear_schedule_with_warmup
)
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import AdamW
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler
import warnings

warnings.filterwarnings('ignore')

***SEEDING***

In [2]:
# SEEDING
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

***LOAD DATA***

In [3]:
# LOAD DATA
try:
    df = pd.read_csv('go_emotions_dataset.csv')
except FileNotFoundError:
    raise FileNotFoundError("Could not find go_emotions_dataset.csv")

***SAMPLES***

In [4]:
df = df.sample(5000, random_state=42)

***TEXT-CLEANING***

In [5]:
df['text'] = df['text'].str.replace(r'http\S+|www\S+', '', regex=True).str.strip()
label_columns = df.columns[4:]


***HANDLING ERROR***

In [6]:
if not all(col in df.columns for col in ['text'] + list(label_columns)):
    raise ValueError("Dataset missing required columns")
if df['text'].isnull().any() or df[label_columns].isnull().any().any():
    raise ValueError("Dataset contains missing values")
if not df[label_columns].apply(lambda x: x.isin([0, 1])).all().all():
    raise ValueError("Labels must be binary (0 or 1)")

In [7]:
texts = df["text"].values
labels = df[label_columns].values


***TRAINING AND TESTING***

In [8]:
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

***TOKENIZER***

In [9]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
max_len = 128

def tokenize(texts):
    return tokenizer(list(texts), truncation=True, padding=True, max_length=max_len, return_tensors='pt')

train_enc = tokenize(X_train)
val_enc = tokenize(X_val)
test_enc = tokenize(X_test)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [10]:
def create_dataset(enc, lbls):
    return TensorDataset(enc['input_ids'], enc['attention_mask'], torch.tensor(lbls, dtype=torch.float))

train_dataset = create_dataset(train_enc, y_train)
val_dataset = create_dataset(val_enc, y_val)
test_dataset = create_dataset(test_enc, y_test)


In [11]:
batch_size = 32  # Increased batch size
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MODEL
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=len(label_columns),
    problem_type="multi_label_classification"
).to(device)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:

# LOSS FUNCTION WITH POS WEIGHT
label_freq = df[label_columns].mean().values
pos_weight = torch.tensor((1 - label_freq) / np.clip(label_freq, 0.01, 0.99)).to(device)
loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

In [14]:
# OPTIMIZER & SCHEDULER
optimizer = AdamW(model.parameters(), lr=2e-5)
epochs = 3
warmup_steps = int(0.1 * len(train_loader) * epochs)
scheduler = get_linear_schedule_with_warmup(optimizer, warmup_steps, len(train_loader) * epochs)

In [15]:
# MIXED PRECISION
scaler = GradScaler()

# TRAINING LOOP WITH EARLY STOPPING
best_val_f1 = 0
patience = 2
patience_counter = 0

for epoch in range(epochs):
    model.train()
    total_loss = 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}")
    for batch in loop:
        input_ids, attention_mask, labels = [x.to(device) for x in batch]
        optimizer.zero_grad()

        with autocast():
            outputs = model(input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs.logits, labels)

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        total_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1} - Train Loss: {avg_loss:.4f}")

Epoch 1/3: 100%|██████████| 113/113 [27:27<00:00, 14.58s/it, loss=1.62]


Epoch 1 - Train Loss: 1.2933


Epoch 2/3: 100%|██████████| 113/113 [27:18<00:00, 14.50s/it, loss=0.914]


Epoch 2 - Train Loss: 1.1859


Epoch 3/3: 100%|██████████| 113/113 [27:14<00:00, 14.47s/it, loss=1.34]

Epoch 3 - Train Loss: 1.0997





***VALIDATION***

In [17]:
# Validation
model.eval() # Fixed: removed unexpected indentation
preds, trues, probs = [], [], []
with torch.no_grad():
    for batch in val_loader:
        input_ids, attention_mask, labels = [x.to(device) for x in batch]
        logits = model(input_ids, attention_mask=attention_mask).logits
        prob = torch.sigmoid(logits)
        pred = prob > 0.5
        preds.extend(pred.cpu().numpy())
        trues.extend(labels.cpu().numpy())
        probs.extend(prob.cpu().numpy())

val_f1 = f1_score(trues, preds, average="micro")
val_ham = hamming_loss(trues, preds)
print(f"Validation Micro F1: {val_f1:.4f} | Hamming Loss: {val_ham:.4f}")


Validation Micro F1: 0.1419 | Hamming Loss: 0.3684


***EARLY STOPPING***

In [23]:
# Early stopping
if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        patience_counter = 0
        torch.save(model.state_dict(), "best_model.pt")
else:
        patience_counter += 1
if patience_counter >= patience:
        print("Early stopping triggered")


Early stopping triggered


***LOAD MODEL***

In [24]:
model.load_state_dict(torch.load("best_model.pt"))

<All keys matched successfully>

In [25]:
def find_best_thresholds(val_trues, val_probs):
    thresholds = np.arange(0.1, 0.9, 0.1)
    best_thresholds = []
    for i in range(val_trues.shape[1]):
        best_f1, best_t = 0, 0.5
        for t in thresholds:
            preds = (val_probs[:, i] > t).astype(int)
            f1 = f1_score(val_trues[:, i], preds)
            if f1 > best_f1:
                best_f1, best_t = f1, t
        best_thresholds.append(best_t)
    return best_thresholds

val_trues = np.array(trues)
val_probs = np.array(probs)
best_thresholds = find_best_thresholds(val_trues, val_probs)
print("Best Thresholds per Emotion:", dict(zip(label_columns, best_thresholds)))

Best Thresholds per Emotion: {'amusement': np.float64(0.6), 'anger': np.float64(0.6), 'annoyance': np.float64(0.5), 'approval': np.float64(0.6), 'caring': np.float64(0.6), 'confusion': np.float64(0.7000000000000001), 'curiosity': np.float64(0.6), 'desire': np.float64(0.7000000000000001), 'disappointment': np.float64(0.5), 'disapproval': np.float64(0.4), 'disgust': np.float64(0.5), 'embarrassment': np.float64(0.6), 'excitement': np.float64(0.7000000000000001), 'fear': np.float64(0.5), 'gratitude': np.float64(0.8), 'grief': 0.5, 'joy': np.float64(0.7000000000000001), 'love': np.float64(0.7000000000000001), 'nervousness': np.float64(0.7000000000000001), 'optimism': np.float64(0.6), 'pride': np.float64(0.4), 'realization': np.float64(0.6), 'relief': np.float64(0.4), 'remorse': np.float64(0.8), 'sadness': np.float64(0.6), 'surprise': np.float64(0.6), 'neutral': np.float64(0.5)}


In [26]:
model.eval()
test_preds, test_trues, test_probs = [], [], []
with torch.no_grad():
    for batch in test_loader:
        input_ids, attention_mask, labels = [x.to(device) for x in batch]
        logits = model(input_ids, attention_mask=attention_mask).logits
        prob = torch.sigmoid(logits)
        test_probs.extend(prob.cpu().numpy())
        test_trues.extend(labels.cpu().numpy())

test_probs = np.array(test_probs)
test_preds = np.zeros_like(test_probs)
for i in range(len(label_columns)):
    test_preds[:, i] = (test_probs[:, i] > best_thresholds[i]).astype(int)

micro_f1 = f1_score(test_trues, test_preds, average="micro")
macro_f1 = f1_score(test_trues, test_preds, average="macro")
ham_loss = hamming_loss(test_trues, test_preds)

print("\n----- Final Test Results -----")
print(f"Micro F1 Score:  {micro_f1:.4f}")
print(f"Macro F1 Score:  {macro_f1:.4f}")
print(f"Hamming Loss:   {ham_loss:.4f}")


----- Final Test Results -----
Micro F1 Score:  0.1736
Macro F1 Score:  0.1607
Hamming Loss:   0.2049


In [27]:
def predict_emotion(text, thresholds=0.5):
    try:
        model.eval()
        encoding = tokenizer(text, truncation=True, padding=True, max_length=max_len, return_tensors="pt")
        input_ids = encoding["input_ids"].to(device)
        attention_mask = encoding["attention_mask"].to(device)

        with torch.no_grad():
            logits = model(input_ids, attention_mask=attention_mask).logits
            probs = torch.sigmoid(logits).cpu().numpy().flatten()
            if isinstance(thresholds, list):
                preds = [1 if probs[i] > thresholds[i] else 0 for i in range(len(probs))]
            else:
                preds = (probs > thresholds).astype(int)

        emotions = [label_columns[i] for i, p in enumerate(preds) if p == 1]
        scores = {label_columns[i]: f"{probs[i]:.2f}" for i in range(len(probs))}
        print("\nInput Text:", text)
        print("Detected Emotions:", ", ".join(emotions) if emotions else "None")
        print("Confidence Scores:", scores)
        return {"emotions": emotions, "scores": scores}
    except Exception as e:
        print(f"Error predicting emotion: {e}")
        return None

In [28]:
predict_emotion("I feel grateful and joyful for everything I have.", thresholds=best_thresholds)


Input Text: I feel grateful and joyful for everything I have.
Detected Emotions: approval, caring, desire, gratitude, joy, love, optimism, pride, relief
Confidence Scores: {'amusement': '0.54', 'anger': '0.41', 'annoyance': '0.39', 'approval': '0.66', 'caring': '0.67', 'confusion': '0.16', 'curiosity': '0.41', 'desire': '0.73', 'disappointment': '0.35', 'disapproval': '0.21', 'disgust': '0.34', 'embarrassment': '0.33', 'excitement': '0.59', 'fear': '0.28', 'gratitude': '0.81', 'grief': '0.38', 'joy': '0.75', 'love': '0.74', 'nervousness': '0.28', 'optimism': '0.66', 'pride': '0.58', 'realization': '0.33', 'relief': '0.54', 'remorse': '0.40', 'sadness': '0.41', 'surprise': '0.43', 'neutral': '0.36'}


{'emotions': ['approval',
  'caring',
  'desire',
  'gratitude',
  'joy',
  'love',
  'optimism',
  'pride',
  'relief'],
 'scores': {'amusement': '0.54',
  'anger': '0.41',
  'annoyance': '0.39',
  'approval': '0.66',
  'caring': '0.67',
  'confusion': '0.16',
  'curiosity': '0.41',
  'desire': '0.73',
  'disappointment': '0.35',
  'disapproval': '0.21',
  'disgust': '0.34',
  'embarrassment': '0.33',
  'excitement': '0.59',
  'fear': '0.28',
  'gratitude': '0.81',
  'grief': '0.38',
  'joy': '0.75',
  'love': '0.74',
  'nervousness': '0.28',
  'optimism': '0.66',
  'pride': '0.58',
  'realization': '0.33',
  'relief': '0.54',
  'remorse': '0.40',
  'sadness': '0.41',
  'surprise': '0.43',
  'neutral': '0.36'}}