In [1]:
# =========================================================
# 1. IMPORTS
# =========================================================
import os
import glob
import numpy as np
import librosa
import soundfile
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv1D, MaxPooling1D, BatchNormalization,
    Bidirectional, LSTM, Dense, Dropout
)
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

2025-12-26 08:44:30.012906: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766738670.426608      17 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766738670.542220      17 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766738671.558335      17 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766738671.558379      17 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766738671.558388      17 computation_placer.cc:177] computation placer alr

In [2]:
# =========================================================
# 2. CONFIGURATION (UNCHANGED)
# =========================================================
INPUT_DIR = "/kaggle/input"

EMOTIONS = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

OBSERVED_EMOTIONS = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful']

In [3]:
# =========================================================
# 3. FEATURE EXTRACTION (MFCC TIME-SERIES)
# =========================================================
def extract_feature(file_name, n_mfcc=40):
    """
    Extract MFCC features as a time-series for CNN + BiLSTM
    Output shape: (time_steps, n_mfcc)
    """
    try:
        with soundfile.SoundFile(file_name) as sound_file:
            X = sound_file.read(dtype="float32")
            sample_rate = sound_file.samplerate

            # Convert stereo to mono
            if len(X.shape) > 1:
                X = np.mean(X, axis=1)

            mfccs = librosa.feature.mfcc(
                y=X,
                sr=sample_rate,
                n_mfcc=n_mfcc
            )

            return mfccs.T  # (time, features)

    except Exception:
        return None


In [4]:
# =========================================================
# 4. DATA LOADING (SAME AS YOUR ORIGINAL LOGIC)
# =========================================================
def load_data(test_size=0.2):
    x, y = [], []

    # Recursively find wav files
    wav_files = glob.glob(os.path.join(INPUT_DIR, "**/*.wav"), recursive=True)

    if not wav_files:
        print("❌ Error: No .wav files found in dataset")
        return None, None, None, None

    print(f"Found {len(wav_files)} files. Extracting features...")

    for file in wav_files:
        file_name = os.path.basename(file)
        parts = file_name.split("-")

        # RAVDESS emotion code is 3rd field
        if len(parts) < 3:
            continue

        emotion_code = parts[2]
        emotion = EMOTIONS.get(emotion_code)

        if emotion in OBSERVED_EMOTIONS:
            feature = extract_feature(file)
            if feature is not None:
                x.append(feature)
                y.append(OBSERVED_EMOTIONS.index(emotion))

    print(f"Extraction complete. Dataset size: {len(x)} samples")

    # Pad sequences for CNN + BiLSTM
    x = pad_sequences(x, padding="post", dtype="float32")

    # One-hot encode labels
    y = to_categorical(y, num_classes=len(OBSERVED_EMOTIONS))

    return train_test_split(x, y, test_size=test_size, random_state=9)

In [5]:
# =========================================================
# 5. LOAD DATA
# =========================================================
x_train, x_test, y_train, y_test = load_data()

if x_train is None:
    raise RuntimeError("Dataset loading failed")

print("Training data shape:", x_train.shape)
print("Testing data shape :", x_test.shape)


Found 2880 files. Extracting features...
Extraction complete. Dataset size: 2112 samples
Training data shape: (1689, 479, 40)
Testing data shape : (423, 479, 40)


In [6]:
# =========================================================
# 6. CNN + BiLSTM MODEL
# =========================================================
model = Sequential([

    Conv1D(128, kernel_size=5, activation="relu",
           input_shape=(x_train.shape[1], x_train.shape[2])),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Conv1D(256, kernel_size=5, activation="relu"),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Bidirectional(LSTM(128, return_sequences=True)),
    Bidirectional(LSTM(64)),

    Dense(128, activation="relu"),
    Dropout(0.4),

    Dense(len(OBSERVED_EMOTIONS), activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-12-26 08:46:42.133973: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [7]:
# =========================================================
# 7. TRAINING
# =========================================================
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=100,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 526ms/step - accuracy: 0.2692 - loss: 1.6254 - val_accuracy: 0.1797 - val_loss: 2.6183
Epoch 2/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 506ms/step - accuracy: 0.3369 - loss: 1.4991 - val_accuracy: 0.1797 - val_loss: 2.2300
Epoch 3/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 509ms/step - accuracy: 0.3962 - loss: 1.4222 - val_accuracy: 0.1891 - val_loss: 2.3037
Epoch 4/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 524ms/step - accuracy: 0.4032 - loss: 1.3423 - val_accuracy: 0.2837 - val_loss: 1.7745
Epoch 5/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 506ms/step - accuracy: 0.4266 - loss: 1.2813 - val_accuracy: 0.3191 - val_loss: 1.9827
Epoch 6/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 507ms/step - accuracy: 0.4161 - loss: 1.3020 - val_accuracy: 0.4515 - val_loss: 1.2833
Epoch 7/100
[1m

In [8]:
# =========================================================
# 8. EVALUATION
# =========================================================
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"\n✅ CNN + BiLSTM Accuracy: {accuracy * 100:.2f}%")


✅ CNN + BiLSTM Accuracy: 94.33%


In [9]:
# =========================================================
# 9. SAVE MODEL
# =========================================================
MODEL_PATH = "/kaggle/working/voice_emotion_cnn_bilstm.h5"
model.save(MODEL_PATH)

print(f"\n✅ Model saved to: {MODEL_PATH}")
print("⚠️ Download this file from the Kaggle Output section")




✅ Model saved to: /kaggle/working/voice_emotion_cnn_bilstm.h5
⚠️ Download this file from the Kaggle Output section
