# Emotion Classification with XGBoost

This notebook demonstrates how to extract MFCC features from the nEMO dataset, train an XGBoost classifier, and evaluate its performance.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchaudio
from datasets import load_dataset, Audio
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score


  from .autonotebook import tqdm as notebook_tqdm


## Configuration and Hyperparameters

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SAMPLE_RATE = 16_000
BATCH_SIZE = 16
NUM_EPOCHS = 25
N_MFCC = 40


## Load and Prepare Dataset

In [3]:
# Load the nEMO dataset
raw_ds = load_dataset('amu-cai/nEMO', split='train')
ds = raw_ds.train_test_split(test_size=0.1, seed=42)
train_raw = ds['train']
eval_raw = ds['test']

# Cast to uniform sampling rate
train_raw = train_raw.cast_column('audio', Audio(sampling_rate=SAMPLE_RATE))
eval_raw = eval_raw.cast_column('audio', Audio(sampling_rate=SAMPLE_RATE))

# Label mapping
emotions = sorted(set(train_raw['emotion']))
label2id = {e:i for i,e in enumerate(emotions)}
id2label = {i:e for e,i in label2id.items()}
NUM_LABELS = len(emotions)
print(f"Found {NUM_LABELS} emotion classes: {emotions}")

Found 6 emotion classes: ['anger', 'fear', 'happiness', 'neutral', 'sadness', 'surprised']


## Feature Extraction (MFCC)

In [4]:
# Define MFCC transform and Dataset
mfcc_transform = torchaudio.transforms.MFCC(sample_rate=SAMPLE_RATE, n_mfcc=N_MFCC)

class NemoDataset(Dataset):
    def __init__(self, hf_dataset):
        self.dataset = hf_dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        sample = self.dataset[idx]
        waveform = torch.tensor(sample['audio']['array']).float()
        if waveform.ndim == 1:
            waveform = waveform.unsqueeze(0)
        mfcc = mfcc_transform(waveform)
        feats = mfcc.mean(dim=2).squeeze()
        label = label2id[sample['emotion']]
        return feats, label

# Create DataLoaders
torch_train = NemoDataset(train_raw)
torch_eval = NemoDataset(eval_raw)
train_loader = DataLoader(torch_train, batch_size=BATCH_SIZE, shuffle=True)
eval_loader = DataLoader(torch_eval, batch_size=BATCH_SIZE)




## Extract Features and Labels for XGBoost

In [5]:
def extract_features(loader):
    X_list, y_list = [], []
    for feats, labels in loader:
        X_list.append(feats.numpy())
        y_list.append(labels.numpy())
    X = np.concatenate(X_list, axis=0)
    y = np.concatenate(y_list, axis=0)
    return X, y

X_train, y_train = extract_features(train_loader)
X_eval, y_eval = extract_features(eval_loader)
print(f"Train: {X_train.shape}, Eval: {X_eval.shape}")

Train: (4032, 40), Eval: (449, 40)


## Train XGBoost Classifier

In [11]:
clf = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=NUM_LABELS,
    learning_rate=0.1,
    max_depth=10,
    n_estimators=500,
    use_label_encoder=False,
    eval_metric='mlogloss'
)

clf.fit(
    X_train, y_train,
    eval_set=[(X_eval, y_eval)],
    verbose=True
)

[0]	validation_0-mlogloss:1.67462
[1]	validation_0-mlogloss:1.58059
[2]	validation_0-mlogloss:1.50251


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[3]	validation_0-mlogloss:1.43138
[4]	validation_0-mlogloss:1.37248
[5]	validation_0-mlogloss:1.31721
[6]	validation_0-mlogloss:1.26382
[7]	validation_0-mlogloss:1.21560
[8]	validation_0-mlogloss:1.17478
[9]	validation_0-mlogloss:1.13579
[10]	validation_0-mlogloss:1.09763
[11]	validation_0-mlogloss:1.06281
[12]	validation_0-mlogloss:1.02901
[13]	validation_0-mlogloss:0.99820
[14]	validation_0-mlogloss:0.96960
[15]	validation_0-mlogloss:0.94035
[16]	validation_0-mlogloss:0.91696
[17]	validation_0-mlogloss:0.89522
[18]	validation_0-mlogloss:0.87346
[19]	validation_0-mlogloss:0.85080
[20]	validation_0-mlogloss:0.83005
[21]	validation_0-mlogloss:0.81182
[22]	validation_0-mlogloss:0.79418
[23]	validation_0-mlogloss:0.77898
[24]	validation_0-mlogloss:0.76339
[25]	validation_0-mlogloss:0.74847
[26]	validation_0-mlogloss:0.73207
[27]	validation_0-mlogloss:0.71685
[28]	validation_0-mlogloss:0.70468
[29]	validation_0-mlogloss:0.69016
[30]	validation_0-mlogloss:0.67662
[31]	validation_0-mlogloss:

## Evaluation

In [12]:
y_pred = clf.predict(X_eval)
acc = accuracy_score(y_eval, y_pred)
f1  = f1_score(y_eval, y_pred, average='weighted')
print(f"Accuracy: {acc:.4f}")
print(f"Weighted F1: {f1:.4f}")

Accuracy: 0.9042
Weighted F1: 0.9043
