In [71]:
import os
import glob
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping


In [73]:
def extract_mfcc_sequence(file_path, n_mfcc=40, max_len=200):
    y, sr = librosa.load(file_path, res_type='scipy')
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    # Pad or truncate to fixed length for batching
    if mfcc.shape[1] < max_len:
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0,0),(0,pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]
    return mfcc.T  # Shape: (max_len, n_mfcc)


In [75]:
emotions = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
}
observed_emotions = list(emotions.values())

def load_data_dl(test_size=0.2, max_len=200):
    x, y = [], []
    # Update these paths for your dataset
    speech_files = glob.glob('/Users/dushyantyadav/Downloads/Audio_Speech_Actors_01-24/Actor*/**/*.wav', recursive=True)
    song_files = glob.glob('/Users/dushyantyadav/Downloads/Audio_Song_Actors_01-24/Actor*/**/*.wav', recursive=True)
    all_files = speech_files + song_files
    for file in all_files:
        file_name = os.path.basename(file)
        emotion_code = file_name.split("-")[2]
        emotion = emotions.get(emotion_code)
        if emotion not in observed_emotions:
            continue
        try:
            mfcc_seq = extract_mfcc_sequence(file, max_len=max_len)
            x.append(mfcc_seq)
            y.append(emotion)
        except Exception as e:
            print(f"Error processing {file}: {e}")
    x = np.array(x)
    y = np.array(y)
    le = LabelEncoder()
    y_enc = le.fit_transform(y)
    y_cat = to_categorical(y_enc)
    x_train, x_test, y_train, y_test = train_test_split(x, y_cat, test_size=test_size, random_state=42, stratify=y)
    return x_train, x_test, y_train, y_test, le


In [77]:
def build_cnn_lstm(input_shape, num_classes):
    model = Sequential()
    model.add(Conv1D(64, kernel_size=5, activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.3))
    model.add(Conv1D(128, kernel_size=5, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.3))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [87]:
# Load data
x_train, x_test, y_train, y_test, le = load_data_dl(test_size=0.2, max_len=200)
input_shape = x_train.shape[1:]  # (max_len, n_mfcc)
num_classes = y_train.shape[1]

# Build model
model = build_cnn_lstm(input_shape, num_classes)

# Early stopping for better generalization
#early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train
history = model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=100,
    batch_size=32,
    #callbacks=[early_stop]
)

# Evaluate
loss, acc = model.evaluate(x_test, y_test)
print("Test accuracy:", acc)
from sklearn.metrics import classification_report, confusion_matrix
y_pred = np.argmax(model.predict(x_test), axis=1)
y_true = np.argmax(y_test, axis=1)
print(classification_report(y_true, y_pred, target_names=le.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [91]:
# Example for a deep learning pipeline using MFCCs
test_file = '/Users/dushyantyadav/Downloads/Crema/1001_DFA_ANG_XX.wav'
mfcc_seq = extract_mfcc_sequence(test_file, max_len=200)  # Use your pipeline's function
mfcc_seq = np.expand_dims(mfcc_seq, axis=0)  # Reshape for batch (1, max_len, n_mfcc)


In [93]:
pred = model.predict(mfcc_seq)  # For Keras/TensorFlow models
predicted_class = np.argmax(pred)
predicted_emotion = le.classes_[predicted_class]
print("Predicted emotion:", predicted_emotion)


Predicted emotion: surprised


In [95]:
# Example for a deep learning pipeline using MFCCs
test_file = '/Users/dushyantyadav/Downloads/Crema/1001_IEO_DIS_LO.wav'
mfcc_seq = extract_mfcc_sequence(test_file, max_len=200)  # Use your pipeline's function
mfcc_seq = np.expand_dims(mfcc_seq, axis=0)  # Reshape for batch (1, max_len, n_mfcc)


In [97]:
pred = model.predict(mfcc_seq)  # For Keras/TensorFlow models
predicted_class = np.argmax(pred)
predicted_emotion = le.classes_[predicted_class]
print("Predicted emotion:", predicted_emotion)

Predicted emotion: happy


In [99]:
# Example for a deep learning pipeline using MFCCs
test_file = '/Users/dushyantyadav/Downloads/Crema/1001_IOM_HAP_XX.wav'
mfcc_seq = extract_mfcc_sequence(test_file, max_len=200)  # Use your pipeline's function
mfcc_seq = np.expand_dims(mfcc_seq, axis=0)  # Reshape for batch (1, max_len, n_mfcc)


In [101]:
pred = model.predict(mfcc_seq)  # For Keras/TensorFlow models
predicted_class = np.argmax(pred)
predicted_emotion = le.classes_[predicted_class]
print("Predicted emotion:", predicted_emotion)

Predicted emotion: surprised


In [130]:

test_file = '/Users/dushyantyadav/Downloads/Crema/1083_IEO_ANG_MD.wav'
mfcc_seq = extract_mfcc_sequence(test_file, max_len=200)  # Use your pipeline's function
mfcc_seq = np.expand_dims(mfcc_seq, axis=0)  # Reshape for batch (1, max_len, n_mfcc)

In [132]:
pred = model.predict(mfcc_seq)  # For Keras/TensorFlow models
predicted_class = np.argmax(pred)
predicted_emotion = le.classes_[predicted_class]
print("Predicted emotion:", predicted_emotion)

Predicted emotion: surprised
