In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from keras import layers, models
from sklearn.model_selection import train_test_split

dataset_path = 'Audio_Speech_Actors_01-24'


In [None]:
def build_logmel_feature(audio_file, duration=3, sr=22050):

    signal, _ = librosa.load(audio_file, duration=duration)

    # Remove silence
    signal, _ = librosa.effects.trim(signal)

    # Ensure equal length
    target_len = duration * sr
    if len(signal) < target_len:
        signal = np.pad(signal, (0, target_len - len(signal)))
    else:
        signal = signal[:target_len]

    mel = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128)
    logmel = librosa.power_to_db(mel, ref=np.max)

    # Normalize
    logmel = (logmel - np.mean(logmel)) / (np.std(logmel) + 1e-6)

    return logmel[..., np.newaxis]


In [None]:
features_list = []
label_list = []

for root_dir, folder_names, file_names in os.walk(dataset_path):

    for wav_file in file_names:

        if wav_file.endswith('.wav'):

            name_parts = wav_file.split('-')

            if len(name_parts) > 2:

                emotion_id = int(name_parts[2]) - 1
                absolute_path = os.path.join(root_dir, wav_file)

                features_list.append(build_logmel_feature(absolute_path))
                label_list.append(emotion_id)

features_array = np.array(features_list)
labels_array = np.array(label_list)

print("Dataset Loaded:", features_array.shape)


In [None]:
train_x, temp_x, train_y, temp_y = train_test_split(
    features_array,
    labels_array,
    test_size=0.2,
    stratify=labels_array,
    random_state=42
)

val_x, test_x, val_y, test_y = train_test_split(
    temp_x,
    temp_y,
    test_size=0.5,
    stratify=temp_y,
    random_state=42
)

print("Train shape:", train_x.shape)


In [None]:
model = models.Sequential([

    layers.Input(shape=train_x.shape[1:]),

    layers.Conv2D(32,(3,3),activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(64,(3,3),activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(128,(3,3),activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.GlobalAveragePooling2D(),

    layers.Dropout(0.4),

    layers.Dense(64,activation='relu'),
    layers.Dense(8,activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


In [None]:
history = model.fit(
    train_x,
    train_y,
    epochs=35,
    batch_size=32,
    validation_data=(val_x,val_y)
)


In [None]:
model.save('ser_model.keras')
print("Model trained and saved as ser_model.keras")


In [None]:
loss, accuracy = model.evaluate(test_x, test_y)
print(f"Test Accuracy: {accuracy * 100:.2f}%")
