In [1]:
import os
# --- MAGIC FIX FOR KERNEL CRASH ---
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import numpy as np
import copy
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.utils.class_weight import compute_class_weight
from torch.amp import autocast, GradScaler

print("âœ… Libraries imported (Safe Mode).")

  from .autonotebook import tqdm as notebook_tqdm


âœ… Libraries imported (Safe Mode).


In [2]:
# --- 1. Setup Device ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"ðŸš€ Using device: {device}")

# --- 2. Define Model Name ---
# We use j-hartmann because it has already seen MELD data
model_name = "j-hartmann/emotion-english-distilroberta-base"

# --- 3. Define Data Paths ---
DATA_DIR = '../data/MELD_processed/'
TRAIN_FILE = os.path.join(DATA_DIR, 'train_text.csv')
VAL_FILE = os.path.join(DATA_DIR, 'dev_text.csv')
TEST_FILE = os.path.join(DATA_DIR, 'test_text.csv')

print(f"Model: {model_name}")
print(f"Train file: {TRAIN_FILE}")

ðŸš€ Using device: cuda
Model: j-hartmann/emotion-english-distilroberta-base
Train file: ../data/MELD_processed/train_text.csv


In [3]:
# --- 4. Load CSV metadata ---
data_files = {"train": TRAIN_FILE, "val": VAL_FILE, "test": TEST_FILE}
dataset = load_dataset("csv", data_files=data_files)

# --- 5. Rename columns ---
# Ensure column names match what the model expects
if "text" not in dataset["train"].column_names:
    dataset = dataset.rename_column("sentence", "text")
dataset = dataset.rename_column("emotion", "label")

# --- 6. Encode Labels ---
print("Encoding labels...")
dataset = dataset.class_encode_column("label")

# --- 7. Get Mappings ---
labels_list = dataset["train"].features["label"].names
num_labels = len(labels_list)

label2id = {label: i for i, label in enumerate(labels_list)}
id2label = {i: label for i, label in enumerate(labels_list)}

print(f"Loaded {num_labels} labels: {labels_list}")

Encoding labels...
Loaded 7 labels: ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']


In [4]:
# --- 8. Load Tokenizer ---
tokenizer = AutoTokenizer.from_pretrained(model_name)

# --- 9. Define Tokenization Function ---
def tokenize_function(batch):
    return tokenizer(
        batch["text"],
        padding="max_length", # Critical for stability
        truncation=True,
        max_length=128
    )

# --- 10. Apply Tokenization ---
print("Tokenizing dataset...")
processed_dataset = dataset.map(tokenize_function, batched=True)
processed_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

print("âœ… Data tokenized.")

Tokenizing dataset...


Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1108/1108 [00:00<00:00, 2790.81 examples/s]


âœ… Data tokenized.


In [6]:
from transformers import AutoModelForSequenceClassification

# --- 11. Load Model ---
print(f"Loading {model_name}...")

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True, # Resets the head (if needed)
    use_safetensors=True          # <-- THIS IS THE FIX (Bypasses security error)
)

# --- FREEZE BASE LAYERS ---
# Since we are fine-tuning a "Generalist" on "Specific" data,
# freezing is safer and faster.
print("ðŸ¥¶ Freezing base model layers...")

# For DistilRoBERTa, the body is named 'distilroberta'
# (Note: Check if your model uses 'roberta' or 'distilroberta' as base name)
if hasattr(model, "distilroberta"):
    base_model = model.distilroberta
elif hasattr(model, "roberta"):
    base_model = model.roberta
else:
    base_model = model.base_model

for param in base_model.parameters():
    param.requires_grad = False

# Verify freezing
print("Checking trainable parameters:")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"  -> {name} is UN-FROZEN (Trainable)")

model = model.to(device)
print("âœ… Model loaded and frozen.")

Loading j-hartmann/emotion-english-distilroberta-base...
ðŸ¥¶ Freezing base model layers...
Checking trainable parameters:
  -> classifier.dense.weight is UN-FROZEN (Trainable)
  -> classifier.dense.bias is UN-FROZEN (Trainable)
  -> classifier.out_proj.weight is UN-FROZEN (Trainable)
  -> classifier.out_proj.bias is UN-FROZEN (Trainable)
âœ… Model loaded and frozen.


In [7]:
# --- 12. Calculate Class Weights (Fix Imbalance) ---
train_labels = np.array(processed_dataset["train"]["label"])
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.arange(num_labels),
    y=train_labels
)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
print(f"Class Weights: {class_weights}")

# --- 13. Define Optimizer & Loss ---
# Low learning rate (2e-5) prevents destroying the pre-trained knowledge
optimizer = optim.AdamW(model.parameters(), lr=2e-5) 
criterion = nn.CrossEntropyLoss(weight=class_weights)

# --- 14. DataLoaders ---
# Standard loaders work because we padded in Cell 4
train_dataloader = DataLoader(processed_dataset["train"], shuffle=True, batch_size=16)
val_dataloader = DataLoader(processed_dataset["val"], batch_size=16)

print("âœ… Optimizer and Loaders ready.")

Class Weights: tensor([1.2866, 5.2652, 5.3241, 0.8186, 0.3030, 2.0891, 1.1841],
       device='cuda:0')
âœ… Optimizer and Loaders ready.


In [None]:
# --- 15. Training Loop ---
def train_model(model, epochs=4):
    scaler = GradScaler('cuda') # Faster training with mixed precision
    best_f1 = 0.0
    
    for epoch in range(epochs):
        print(f"\n--- Epoch {epoch+1}/{epochs} ---")
        
        # Train
        model.train()
        train_loss = 0
        for batch in tqdm(train_dataloader, desc="Training"):
            input_ids = batch['input_ids'].to(device)
            mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            
            optimizer.zero_grad()
            
            with autocast('cuda'):
                outputs = model(input_ids=input_ids, attention_mask=mask)
                loss = criterion(outputs.logits, labels)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            train_loss += loss.item()
            
        # Validation
        model.eval()
        val_preds, val_labels = [], []
        with torch.no_grad():
            for batch in val_dataloader:
                input_ids = batch['input_ids'].to(device)
                mask = batch['attention_mask'].to(device)
                labels = batch['label'].to(device)
                
                with autocast('cuda'):
                    outputs = model(input_ids=input_ids, attention_mask=mask)
                
                preds = torch.argmax(outputs.logits, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())
        
        val_f1 = f1_score(val_labels, val_preds, average='weighted')
        val_acc = accuracy_score(val_labels, val_preds)
        
        print(f"Train Loss: {train_loss/len(train_dataloader):.4f}")
        print(f"Val Acc: {val_acc:.4f} | Val F1: {val_f1:.4f}")
        
        if val_f1 > best_f1:
            best_f1 = val_f1
            # Save the best model
            SAVE_PATH = "../models/ter_model_finetuned_expert.pth"
            torch.save(model.state_dict(), SAVE_PATH)
            print(f"âœ¨ Saved new best model to {SAVE_PATH}")

# --- Run Training ---
print("ðŸš€ Starting Expert Fine-Tuning...")
train_model(model, epochs=4) 
print("âœ… Training Complete.")

ðŸš€ Starting Expert Fine-Tuning...

--- Epoch 1/4 ---


Training: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 625/625 [00:37<00:00, 16.78it/s]


Train Loss: 1.4818
Val Acc: 0.4549 | Val F1: 0.4799
âœ¨ Saved new best model to ../models/ter_model_finetuned_expert.pth

--- Epoch 2/4 ---


Training: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 625/625 [00:36<00:00, 17.33it/s]


Train Loss: 1.4278
Val Acc: 0.4540 | Val F1: 0.4799
âœ¨ Saved new best model to ../models/ter_model_finetuned_expert.pth

--- Epoch 3/4 ---


Training: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 625/625 [00:35<00:00, 17.78it/s]


Train Loss: 1.4117
Val Acc: 0.4657 | Val F1: 0.4894
âœ¨ Saved new best model to ../models/ter_model_finetuned_expert.pth

--- Epoch 4/4 ---


Training: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 625/625 [00:30<00:00, 20.35it/s]


Train Loss: 1.3987
Val Acc: 0.4819 | Val F1: 0.5011
