In [None]:
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Dataset Path
dataset_path = 'Audio_Speech_Actors_01-24'

In [None]:
def extract_features(file_path, duration=3, sr=22050):
    audio, _ = librosa.load(file_path, duration=duration, res_type='kaiser_fast')
    # Audio Cleaning: Remove dead air
    audio, _ = librosa.effects.trim(audio)
    
    # Uniform Padding
    expected_length = duration * sr
    if len(audio) < expected_length:
        audio = np.pad(audio, (0, expected_length - len(audio)), 'constant')
    else:
        audio = audio[:expected_length]

    # Convert to Log-Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
    return log_mel_spec[..., np.newaxis]

In [None]:
# Visualize one sample for the report
sample_path = os.path.join(dataset_path, 'Actor_01/03-01-01-01-01-01-01.wav')
if os.path.exists(sample_path):
    spec = extract_features(sample_path)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spec.squeeze(), y_axis='mel', x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Log-Mel Spectrogram (EDA Phase)')
    plt.show()

In [None]:
X, y = [], []
for subdir, dirs, files in os.walk(dataset_path):
    for file in files:
        if file.endswith('.wav'):
            parts = file.split('-')
            if len(parts) > 2:
                emotion = int(parts[2]) - 1 
                full_path = os.path.join(subdir, file)
                X.append(extract_features(full_path))
                y.append(emotion)

X = np.array(X)
y = np.array(y)
print(f"Data Loaded: {X.shape[0]} samples")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

model = models.Sequential([
    layers.Input(shape=(128, 130, 1)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.BatchNormalization(), 
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.3), 
    layers.GlobalAveragePooling2D(), 
    layers.Dense(8, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test))

# Plotting Training Curves
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Accuracy Curve')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss Curve')
plt.legend()
plt.show()

In [None]:
# Predictions
y_pred = np.argmax(model.predict(X_test), axis=1)
emotions = ['Neutral', 'Calm', 'Happy', 'Sad', 'Angry', 'Fearful', 'Disgust', 'Surprised']

# Classification Report (Macro F1)
print(classification_report(y_test, y_pred, target_names=emotions))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=emotions, yticklabels=emotions, cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

model.save('ser_model.keras')