In [1]:
import os
import torch
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from transformers import DebertaV2TokenizerFast, DebertaV2ForSequenceClassification, Trainer, TrainingArguments
from tabulate import tabulate
from collections import Counter


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Load Data
df = pd.read_csv('multimodal_dataset_normalized.csv')
display(df.head())

Unnamed: 0,Audio_Song,Lyric_Song,Arousal,Valence,Quadrant,Emotion,lyric_id,word_count,unique_word_count,lexical_diversity,...,rms_mean,rms_std,beat_strength,low_energy_ratio,energy_entropy,brightness,warmth,activity,harmonic_energy_ratio,harmonicity
0,A005,L055,0.7875,0.6875,Q1,Surprise,L055,0.583846,-0.032136,-1.136493,...,-1.315662,-1.165455,0.896397,-0.700513,-0.145042,1.086334,-0.092941,[0.06471955],-1.951364,-0.583475
1,A011,L061,0.68125,0.85625,Q1,Happiness,L061,-0.54914,-0.97493,-1.286263,...,0.107522,-0.408386,0.006682,0.068631,0.750471,0.147696,0.04022,[0.18521025],0.472448,-0.078961
2,A014,L064,0.8625,0.725,Q1,Surprise,L064,0.415581,-0.162177,-1.133997,...,0.361195,-0.265328,0.710477,-0.673247,0.854382,1.970159,-0.798261,[0.17745368],-0.949336,-0.454885
3,A019,L069,0.78125,0.81875,Q1,Excitement,L069,-0.229436,-0.308472,-0.401603,...,-0.219121,-0.979168,-0.061622,-0.34051,1.062094,0.474343,0.110013,[0.13742129],0.24476,-0.150335
4,A022,L072,0.76875,0.8375,Q1,Excitement,L072,-0.599619,-0.828634,-0.755787,...,0.916902,0.959027,-0.008507,-0.482841,0.29892,-0.169472,0.269772,[0.22606403],-0.078616,-0.234134


In [3]:
#Emotion Class
unique_items = df["Quadrant"].unique()
print(unique_items)

all_classes = unique_items
class_to_idx = {cls: i for i, cls in enumerate(all_classes)}
print("Class to index mapping:", class_to_idx)

['Q1' 'Q2' 'Q3' 'Q4']
Class to index mapping: {'Q1': 0, 'Q2': 1, 'Q3': 2, 'Q4': 3}


In [4]:
#Extract path and emotion
data_dir = "MERGE_Bimodal_Complete/lyrics"
labels_dict = {f"{row['Quadrant']}/{row['Lyric_Song']}.txt": row['Quadrant']
               for _, row in df.iterrows()}
print(labels_dict)
print(dict(Counter(labels_dict.values())))

{'Q1/L055.txt': 'Q1', 'Q1/L061.txt': 'Q1', 'Q1/L064.txt': 'Q1', 'Q1/L069.txt': 'Q1', 'Q1/L072.txt': 'Q1', 'Q1/L074.txt': 'Q1', 'Q1/L089.txt': 'Q1', 'Q1/L092.txt': 'Q1', 'Q1/L093.txt': 'Q1', 'Q1/L097.txt': 'Q1', 'Q1/L100.txt': 'Q1', 'Q1/L114-116.txt': 'Q1', 'Q1/L127-122.txt': 'Q1', 'Q1/L136-123.txt': 'Q1', 'Q1/L142-96.txt': 'Q1', 'Q1/L144-110.txt': 'Q1', 'Q1/L145-113.txt': 'Q1', 'Q1/L150-124.txt': 'Q1', 'Q1/L151-111.txt': 'Q1', 'Q1/L152-114.txt': 'Q1', 'Q1/L154-119.txt': 'Q1', 'Q1/L155-117.txt': 'Q1', 'Q1/L157-115.txt': 'Q1', 'Q1/L004-120.txt': 'Q1', 'Q1/L005-84.txt': 'Q1', 'Q1/L018-109.txt': 'Q1', 'Q1/L022-121.txt': 'Q1', 'Q1/L036-118.txt': 'Q1', 'Q1/L038-102.txt': 'Q1', 'Q1/L042-108.txt': 'Q1', 'Q1/L043-130.txt': 'Q1', 'Q1/L050-149.txt': 'Q1', 'Q1/L162.txt': 'Q1', 'Q1/L166.txt': 'Q1', 'Q1/L167.txt': 'Q1', 'Q1/L187.txt': 'Q1', 'Q1/L196.txt': 'Q1', 'Q1/MT0000040773.txt': 'Q1', 'Q1/MT0000045345.txt': 'Q1', 'Q1/MT0000080053.txt': 'Q1', 'Q1/MT0000091401.txt': 'Q1', 'Q1/MT0000093583.txt': '

In [5]:
model_name = 'microsoft/deberta-v3-base'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X_list = []
y_list = []

for file, label in labels_dict.items():
    filepath = os.path.join(data_dir, file)
    
    with open(filepath, "r", encoding="utf-8") as f:
        X_list.append(f.read())
        y_list.append(class_to_idx[label])

X_trainval, X_test, y_trainval, y_test = train_test_split(
    X_list, y_list, test_size=0.2, random_state=42, stratify=y_list
)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.25, random_state=42, stratify=y_trainval
)

print("Train size:", len(X_train), len(y_train))
print("Val size:", len(X_val), len(y_val))
print("Test size:", len(X_test), len(y_test))

# Initialize the BERT tokenizer
tokenizer = DebertaV2TokenizerFast.from_pretrained(model_name)

X_train = tokenizer(list(X_train), truncation=True, padding=True, max_length=128, return_tensors="pt")
X_val = tokenizer(list(X_val), truncation=True, padding=True, max_length=128, return_tensors="pt")
X_test = tokenizer(list(X_test), truncation=True, padding=True, max_length=128, return_tensors="pt")

Train size: 1329 1329
Val size: 443 443
Test size: 444 444




In [6]:
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx].detach().clone() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

In [7]:
train_dataset = TextDataset(X_train, list(y_train))
val_dataset = TextDataset(X_val, list(y_val))
test_dataset = TextDataset(X_test, list(y_test))

model = DebertaV2ForSequenceClassification.from_pretrained(model_name, num_labels=4)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)  # Single-label prediction
    return {
        "accuracy": accuracy_score(labels, predictions),
        "precision": precision_score(labels, predictions, average="macro", zero_division=1),
        "recall": recall_score(labels, predictions, average="macro", zero_division=1),
        "f1": f1_score(labels, predictions, average="macro", zero_division=1),
    }

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
torch.cuda.empty_cache()
print("Flushed CUDA cache")

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,   # your TokenizedDataset instance
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()

print("Training done")

# Check results on the test set
metrics = trainer.evaluate(eval_dataset=test_dataset)
print(tabulate(metrics.items(), headers=["Metric", "Value"], tablefmt="pretty"))

Flushed CUDA cache


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2288,1.159821,0.474041,0.734238,0.43319,0.303593
2,0.7427,0.808292,0.690745,0.689529,0.675314,0.672668
3,1.0368,0.957246,0.641084,0.668557,0.633945,0.634083
4,0.6076,0.808538,0.715576,0.709788,0.708177,0.706561
5,0.4878,0.922942,0.706546,0.699288,0.697344,0.696814


Training done


+-------------------------+--------------------+
|         Metric          |       Value        |
+-------------------------+--------------------+
|        eval_loss        | 0.9085888266563416 |
|      eval_accuracy      | 0.6891891891891891 |
|     eval_precision      | 0.6765863393388956 |
|       eval_recall       | 0.6774180911680913 |
|         eval_f1         | 0.6737530934432109 |
|      eval_runtime       |       3.9467       |
| eval_samples_per_second |      112.498       |
|  eval_steps_per_second  |       9.375        |
|          epoch          |        5.0         |
+-------------------------+--------------------+


In [10]:
torch.cuda.empty_cache()
print("Flushed CUDA cache")

# Create a DataLoader for the test set
test_loader = DataLoader(test_dataset, batch_size=8)

model.config.output_hidden_states = True
model.config.output_attentions = True

model.eval()
hidden_state_batches = []
attention_batches = []


with torch.no_grad():
    batches_done = 0
    for batch in test_loader:
        batch = {k: v.to(model.device) for k, v in batch.items()}
        outputs = model(**batch)

        # Choose the layer you want (e.g., last layer)
        last_hidden = outputs.hidden_states[-1]  # shape: [batch_size, seq_len, hidden_dim]
        last_attention = outputs.attentions[-1]  # shape: [batch_size, num_heads, seq_len, seq_len]

        # Optionally reduce attention across heads (e.g., mean)
        attn_weights = last_attention.mean(dim=1)  # shape: [batch_size, seq_len, seq_len]
        attn_diag = torch.diagonal(attn_weights, dim1=1, dim2=2)  # shape: [batch_size, seq_len]

        hidden_state_batches.append(last_hidden)
        attention_batches.append(attn_diag)

        batches_done += 1
        print(f"Batches done: {batches_done}", end="\r")   
    print("\nDone")
    

# Concatenate across batches
x_text = torch.cat(hidden_state_batches, dim=0)  # shape: [total_samples, seq_len, hidden_dim]
a_text = torch.cat(attention_batches, dim=0)     # shape: [total_samples, seq_len]

Flushed CUDA cache
Donehes done: 56


In [11]:
print(x_text.shape)
print(a_text.shape)

torch.Size([444, 128, 768])
torch.Size([444, 128])
