In [None]:
# setup
import numpy as np
import os
import sys
import matplotlib.pyplot as plt
sys.path.append(os.path.abspath('../src'))
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Concatenate, Dense, Dropout, Flatten
from model import build_mfcc_branch, build_spec_branch

In [None]:
# load and inspect features
FEATURES_DIR = '../data/features/'
MODEL_SAVE_PATH = '../results/models/emotion_dual_model.h5'

mfcc = np.load(os.path.join(FEATURES_DIR, 'mfccs.npy'))
spec = np.load(os.path.join(FEATURES_DIR, 'spectrograms.npy'))
labels = np.load(os.path.join(FEATURES_DIR, 'feature_labels.npy'))

unique, counts = np.unique(labels, return_counts=True)
for label, count in zip(unique, counts):
    print(f"{label}: {count}")

anger: 3688
frustration: 3778
happy: 2507
neutral: 4211
sad: 4010


In [None]:
# preprocess features, reshaping and normalizing
if mfcc.shape[1] == 13:
    mfcc = np.transpose(mfcc, (0, 2, 1))
mfcc = mfcc[..., np.newaxis]

if spec.shape[1] == 128:
    spec = np.transpose(spec, (0, 2, 1))
spec = spec[..., np.newaxis]

mfcc_mean = np.mean(mfcc)
mfcc_std = np.std(mfcc)
mfcc = (mfcc - mfcc_mean) / mfcc_std

spec_mean = np.mean(spec)
spec_std = np.std(spec)
spec = (spec - spec_mean) / spec_std


In [None]:
# encode labels and find class weights
le = LabelEncoder()
y_int = le.fit_transform(labels)
y = to_categorical(y_int)

print("Class label order:", le.classes_)

class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_int), y=y_int)
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

Class label order: ['anger' 'frustration' 'happy' 'neutral' 'sad']


In [None]:
# train/validation split
from sklearn.model_selection import train_test_split
mfcc_train, mfcc_val, spec_train, spec_val, y_train, y_val = train_test_split(
    mfcc, spec, y, test_size=0.2, stratify=y_int, random_state=42
)

In [None]:
# build dual input model
mfcc_input = Input(shape=mfcc.shape[1:])
spec_input = Input(shape=spec.shape[1:])

mfcc_branch = build_mfcc_branch(mfcc_input)
spec_branch = build_spec_branch(spec_input)

combined = Concatenate()([mfcc_branch, spec_branch])
x = Flatten()(combined)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
out = Dense(y.shape[1], activation='softmax')(x)

model = Model(inputs=[mfcc_input, spec_input], outputs=out)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# train model
checkpoint = ModelCheckpoint(MODEL_SAVE_PATH, save_best_only=True, monitor='val_accuracy', mode='max')

history = model.fit(
    [mfcc_train, spec_train], y_train,
    validation_data=([mfcc_val, spec_val], y_val),
    epochs=20,
    batch_size=32,
    callbacks=[checkpoint],
    verbose=1,
    class_weight=class_weights_dict
)

print(f"Model trained and saved to {MODEL_SAVE_PATH}")

Epoch 1/20
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 537ms/step - accuracy: 0.2465 - loss: 2.2950



[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 567ms/step - accuracy: 0.2466 - loss: 2.2937 - val_accuracy: 0.4553 - val_loss: 1.3003
Epoch 2/20
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 546ms/step - accuracy: 0.4017 - loss: 1.3722



[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 575ms/step - accuracy: 0.4017 - loss: 1.3722 - val_accuracy: 0.4727 - val_loss: 1.2450
Epoch 3/20
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4337s[0m 10s/step - accuracy: 0.4314 - loss: 1.3203 - val_accuracy: 0.4617 - val_loss: 1.2719
Epoch 4/20
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 563ms/step - accuracy: 0.4420 - loss: 1.3032



[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 586ms/step - accuracy: 0.4420 - loss: 1.3032 - val_accuracy: 0.4894 - val_loss: 1.2198
Epoch 5/20
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 542ms/step - accuracy: 0.4556 - loss: 1.2766



[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 565ms/step - accuracy: 0.4556 - loss: 1.2766 - val_accuracy: 0.4985 - val_loss: 1.1980
Epoch 6/20
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 561ms/step - accuracy: 0.4665 - loss: 1.2564 - val_accuracy: 0.4952 - val_loss: 1.1958
Epoch 7/20
[1m455/455[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 579ms/step - accuracy: 0.4698 - loss: 1.2470 - val_accuracy: 0.4820 - val_loss: 1.2051
Epoch 8/20
[1m126/455[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m3:08[0m 573ms/step - accuracy: 0.4810 - loss: 1.2455

In [None]:
# visualization
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training & Validation Accuracy Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()