In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import glob
import numpy as np
import librosa
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pickle


In [None]:
emotions = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
}
observed_emotions = list(emotions.values())
DATA_PATHS = [
    '/content/drive/MyDrive/Audio_Speech_Actors_01-24/Actor_*/*.wav',
    '/content/drive/MyDrive/Audio_Song_Actors_01-24/Actor_*/*.wav',

]



In [None]:
def add_noise(data, noise_factor=0.005):
    noise = np.random.randn(len(data))
    return data + noise_factor * noise

def shift(data, shift_max=0.2, shift_direction='both'):
    shift_amt = np.random.randint(int(len(data) * shift_max))
    if shift_direction == 'right':
        shift_amt = -shift_amt
    elif shift_direction == 'both':
        if np.random.randint(0, 2) == 1:
            shift_amt = -shift_amt
    return np.roll(data, shift_amt)

def stretch(data, rate=1.1):
    return librosa.effects.time_stretch(data, rate)


In [None]:
def extract_mfcc_sequence(file_path, n_mfcc=40, max_len=200):
    y, sr = librosa.load(file_path, res_type='scipy')
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    if mfcc.shape[1] < max_len:
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0,0),(0,pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]
    return mfcc.T


In [None]:
def load_data_dl(test_size=0.2, max_len=200, augment=True):
    x, y = [], []
    files = []
    for path in DATA_PATHS:
        files.extend(glob.glob(path, recursive=True))
    for file in files:
        file_name = os.path.basename(file)

        try:

            emotion_code = file_name.split("-")[2]
            emotion = emotions.get(emotion_code)
        except Exception:
            emotion = None
        if emotion not in observed_emotions:
            continue
        try:

            mfcc_seq = extract_mfcc_sequence(file, max_len=max_len)
            x.append(mfcc_seq)
            y.append(emotion)
            if augment:
                y_audio, sr = librosa.load(file, res_type='scipy')
                # Augmented: noise
                mfcc_noise = librosa.feature.mfcc(y=add_noise(y_audio), sr=sr, n_mfcc=40)
                if mfcc_noise.shape[1] < max_len:
                    pad_width = max_len - mfcc_noise.shape[1]
                    mfcc_noise = np.pad(mfcc_noise, pad_width=((0,0),(0,pad_width)), mode='constant')
                else:
                    mfcc_noise = mfcc_noise[:, :max_len]
                x.append(mfcc_noise.T)
                y.append(emotion)
                # Augmented: shift
                y_shift = shift(y_audio)
                mfcc_shift = librosa.feature.mfcc(y=y_shift, sr=sr, n_mfcc=40)
                if mfcc_shift.shape[1] < max_len:
                    pad_width = max_len - mfcc_shift.shape[1]
                    mfcc_shift = np.pad(mfcc_shift, pad_width=((0,0),(0,pad_width)), mode='constant')
                else:
                    mfcc_shift = mfcc_shift[:, :max_len]
                x.append(mfcc_shift.T)
                y.append(emotion)
                # Augmented: stretch
                y_stretch = librosa.effects.time_stretch(y_audio, rate=1.1)
                mfcc_stretch = librosa.feature.mfcc(y=y_stretch, sr=sr, n_mfcc=40)
                if mfcc_stretch.shape[1] < max_len:
                    pad_width = max_len - mfcc_stretch.shape[1]
                    mfcc_stretch = np.pad(mfcc_stretch, pad_width=((0,0),(0,pad_width)), mode='constant')
                else:
                    mfcc_stretch = mfcc_stretch[:, :max_len]
                x.append(mfcc_stretch.T)
                y.append(emotion)
        except Exception as e:
            print(f"Error processing {file}: {e}")
    x = np.array(x)
    y = np.array(y)
    le = LabelEncoder()
    y_enc = le.fit_transform(y)
    y_cat = to_categorical(y_enc)
    x_train, x_test, y_train, y_test = train_test_split(
        x, y_cat, test_size=test_size, random_state=42, stratify=y)

    with open('label_encoder.pkl', 'wb') as f:
        pickle.dump(le, f)
    return x_train, x_test, y_train, y_test, le, y


In [None]:
def build_cnn_bilstm(input_shape, num_classes):
    model = Sequential()
    model.add(Conv1D(64, kernel_size=5, activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.3))
    model.add(Conv1D(128, kernel_size=5, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.3))
    model.add(Bidirectional(LSTM(128, return_sequences=False)))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [None]:
x_train, x_test, y_train, y_test, le, y_all = load_data_dl(test_size=0.2, max_len=200, augment=True)
y_train_labels = np.argmax(y_train, axis=1)
class_weights = compute_class_weight('balanced', classes=np.unique(y_train_labels), y=y_train_labels)
class_weight_dict = dict(enumerate(class_weights))
print("Class weights:", class_weight_dict)


Class weights: {0: np.float64(0.8152535328345802), 1: np.float64(0.8152535328345802), 2: np.float64(1.5973127035830619), 3: np.float64(0.8152535328345802), 4: np.float64(0.8152535328345802), 5: np.float64(1.6291528239202657), 6: np.float64(0.8152535328345802), 7: np.float64(1.5947154471544716)}


In [None]:
input_shape = x_train.shape[1:]
num_classes = y_train.shape[1]
model = build_cnn_bilstm(input_shape, num_classes)

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=60,
    batch_size=32,
    callbacks=[early_stop],
    class_weight=class_weight_dict
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/60
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 214ms/step - accuracy: 0.2195 - loss: 1.9356 - val_accuracy: 0.3420 - val_loss: 1.5820
Epoch 2/60
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 191ms/step - accuracy: 0.4050 - loss: 1.4941 - val_accuracy: 0.5148 - val_loss: 1.2420
Epoch 3/60
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 188ms/step - accuracy: 0.5443 - loss: 1.1836 - val_accuracy: 0.6014 - val_loss: 1.0950
Epoch 4/60
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 185ms/step - accuracy: 0.6461 - loss: 0.9559 - val_accuracy: 0.6386 - val_loss: 0.9301
Epoch 5/60
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 182ms/step - accuracy: 0.6749 - loss: 0.8472 - val_accuracy: 0.6361 - val_loss: 0.9539
Epoch 6/60
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 190ms/step - accuracy: 0.7115 - loss: 0.7629 - val_accuracy: 0.6305 - val_loss: 1.0510
Epoch 7/60

In [None]:
loss, acc = model.evaluate(x_test, y_test)
print("Test accuracy:", acc)

y_pred = np.argmax(model.predict(x_test), axis=1)
y_true = np.argmax(y_test, axis=1)
print(classification_report(y_true, y_pred, target_names=le.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 90ms/step - accuracy: 0.9366 - loss: 0.2150
Test accuracy: 0.9398573040962219
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 47ms/step
              precision    recall  f1-score   support

       angry       0.97      0.99      0.98       301
        calm       0.93      0.93      0.93       301
     disgust       0.93      0.90      0.91       154
     fearful       0.96      0.94      0.95       301
       happy       0.95      0.95      0.95       301
     neutral       0.94      0.89      0.91       150
         sad       0.89      0.94      0.91       301
   surprised       0.93      0.93      0.93       153

    accuracy                           0.94      1962
   macro avg       0.94      0.93      0.94      1962
weighted avg       0.94      0.94      0.94      1962

Confusion Matrix:
 [[297   0   2   2   0   0   0   0]
 [  0 281   2   2   1   2  13   0]
 [  6   1 138   0   0   1   5   3]
 [  2   0

In [None]:
model.save("model3.keras")


In [38]:
model.save('/content/drive/MyDrive/your_folder/model3.keras')