In [1]:
import numpy as np
import librosa

def extract_features(file, max_pad_len=200):
    y, sr = librosa.load(file, sr=16000)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    
    if mfccs.shape[1] < max_pad_len:
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfccs = mfccs[:, :max_pad_len]
    
    return mfccs


In [14]:
import os
import numpy as np

X, y, file_paths = [], [], []   # <-- added file_paths here
emotions = {"happy":0, "sad":1, "angry":2, "neutral":3}

# Base path where your folders are stored
base_path = r"E:/Speech_Data_urdu"

for emotion, label in emotions.items():
    folder = os.path.join(base_path, emotion)   # e.g. E:\Speech_Data\happy
    for file in os.listdir(folder):
        file_path = os.path.join(folder, file)
        
        # Make sure only audio files are processed
        if file_path.endswith(".wav"):
            features = extract_features(file_path)
            features = features.T   
            X.append(features)
            y.append(label)
            file_paths.append(file_path)   # now works ✅

X = np.array(X)
y = np.array(y)

print("Features shape:", X.shape)   # (num_samples, 40, 200)
print("Labels shape:", y.shape)     # (num_samples,)
print("File paths saved:", len(file_paths))   # should match X.shape[0]


Features shape: (400, 200, 40)
Labels shape: (400,)
File paths saved: 400


In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional

num_classes = 4  # happy, sad, angry, neutral

model = Sequential([
    Bidirectional(LSTM(128, return_sequences=True), input_shape=(200, 40)),  # time=200, features=40
    Dropout(0.3),
    BatchNormalization(),

    Bidirectional(LSTM(64)),
    Dropout(0.3),
    BatchNormalization(),

    Dense(64, activation='relu'),
    Dropout(0.3),

    Dense(num_classes, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()


  super().__init__(**kwargs)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test, paths_train, paths_test = train_test_split(
    X, y, file_paths,
    test_size=0.2,
    random_state=42,
    stratify=y
)
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=15,
                    batch_size=32)


Epoch 1/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 886ms/step - accuracy: 0.2647 - loss: 2.0143 - val_accuracy: 0.3500 - val_loss: 1.3041
Epoch 2/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 748ms/step - accuracy: 0.4344 - loss: 1.4572 - val_accuracy: 0.6500 - val_loss: 1.2034
Epoch 3/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 734ms/step - accuracy: 0.5494 - loss: 1.0484 - val_accuracy: 0.7000 - val_loss: 1.1022
Epoch 4/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 759ms/step - accuracy: 0.6279 - loss: 0.8978 - val_accuracy: 0.7875 - val_loss: 0.9641
Epoch 5/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 860ms/step - accuracy: 0.7835 - loss: 0.6343 - val_accuracy: 0.8375 - val_loss: 0.8076
Epoch 6/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 848ms/step - accuracy: 0.8212 - loss: 0.5184 - val_accuracy: 0.8500 - val_loss: 0.6580
Epoch 7/30
[1m10/10[0m [

In [10]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc*100:.2f}%")


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.4539 - loss: 2.3340
Test Accuracy: 46.25%


In [None]:
import numpy as np
import random
import IPython.display as ipd

# Pick a random test sample
i = random.randint(0, len(X_test) - 1)

# Prepare features for model input
sample_features = np.expand_dims(X_test[i], axis=0)

# Predict
prediction = model.predict(sample_features)
predicted_index = np.argmax(prediction)

# Labels
predicted_emotion = list(emotions.keys())[predicted_index]
true_emotion = list(emotions.keys())[y_test[i]]

print("Predicted Emotion:", predicted_emotion)
print("True Emotion:", true_emotion)
print("File:", paths_test[i])

# Play the actual audio
ipd.Audio(paths_test[i])
