In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2, l1
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd


In [3]:
data_path = '/content/drive/MyDrive/Mars'


In [4]:
parent_folders = ['Audio_Speech_Actors_01-24', 'Audio_Song_Actors_01-24']


In [5]:
# Map RAVDESS emotion IDs to emotion labels
emotion_map = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

def get_emotion_from_filename(filename):
    """
    Extracts emotion label from RAVDESS-style filename.
    Example filename: '03-01-05-02-02-02-12.wav'
    """
    try:
        parts = filename.split("-")
        if len(parts) >= 3:
            emotion_id = parts[2]
            return emotion_map.get(emotion_id, None)
        else:
            return None
    except Exception as e:
        print(f"⚠️ Error parsing filename {filename}: {e}")
        return None


In [6]:
import librosa
import numpy as np

def pitch_shift(y, sr, n_steps):
    return librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)

def time_stretch(y, rate):
    return librosa.effects.time_stretch(y, rate)


In [7]:
import librosa
import numpy as np

def extract_features(file_path, duration=3, sr=22050):
    """
    Extracts audio features from a .wav file for emotion classification.

    Parameters:
        file_path (str): Path to the WAV file.
        duration (int): Audio length to standardize in seconds.
        sr (int): Sampling rate.

    Returns:
        np.ndarray: 1D array of 194 extracted features, or None on failure.
    """
    try:
        # Load and trim/pad audio
        y, sr = librosa.load(file_path, sr=sr)
        desired_len = duration * sr
        if len(y) < desired_len:
            y = np.pad(y, (0, desired_len - len(y)))
        else:
            y = y[:desired_len]

        features = []

        # MFCC (40)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
        features.extend(np.mean(mfcc, axis=1))

        # Chroma (12)
        stft = np.abs(librosa.stft(y))
        chroma = librosa.feature.chroma_stft(S=stft, sr=sr)
        features.extend(np.mean(chroma, axis=1))

        # Mel Spectrogram (128)
        mel = librosa.feature.melspectrogram(y=y, sr=sr)
        features.extend(np.mean(mel, axis=1))

        # Spectral Contrast (7)
        contrast = librosa.feature.spectral_contrast(S=stft, sr=sr)
        features.extend(np.mean(contrast, axis=1))

        # Tonnetz (6)
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)
        features.extend(np.mean(tonnetz, axis=1))

        # Spectral Flatness (1)
        flatness = librosa.feature.spectral_flatness(y=y)
        features.append(np.mean(flatness))

        # Validate final feature length
        if len(features) != 194:
            print(f"⚠️ Feature length mismatch for {file_path}: got {len(features)}")
            return None

        return np.array(features, dtype=np.float32)

    except Exception as e:
        print(f"❌ Failed to extract features from {file_path}: {e}")
        return None


In [10]:
import os
import numpy as np
import librosa
from tqdm import tqdm

# Define dataset path
data_path = '/content/drive/MyDrive/Mars'

# Parent folders
parent_folders = ['Audio_Speech_Actors_01-24', 'Audio_Song_Actors_01-24']

# Feature and label lists
X, y = [], []

# Count total files for accurate progress bar
total_files = 0
for parent in parent_folders:
    parent_path = os.path.join(data_path, parent)
    for actor_folder in os.listdir(parent_path):
        actor_path = os.path.join(parent_path, actor_folder)
        if os.path.isdir(actor_path):
            total_files += len([f for f in os.listdir(actor_path) if f.endswith('.wav')])

# Feature extraction with progress bar
print("🔍 Extracting features from audio files...")
with tqdm(total=total_files, desc="📦 Processing", unit="file") as pbar:
    for parent in parent_folders:
        parent_path = os.path.join(data_path, parent)
        for actor_folder in os.listdir(parent_path):
            actor_path = os.path.join(parent_path, actor_folder)
            if not os.path.isdir(actor_path):
                continue
            for file in os.listdir(actor_path):
                if file.endswith('.wav'):
                    full_path = os.path.join(actor_path, file)
                    emotion = get_emotion_from_filename(file)
                    features = extract_features(full_path)
                    if features is not None and emotion is not None:
                        X.append(features)
                        y.append(emotion)
                    pbar.update(1)

# Convert to arrays
X = np.array(X)
y = np.array(y)

# Final output summary
print("✅ Feature extraction complete!")
print("🎯 Total samples:", len(X))
print("📐 Feature shape:", X.shape)
print("🏷️ Labels shape:", y.shape)


🔍 Extracting features from audio files...


📦 Processing: 100%|██████████| 2452/2452 [31:46<00:00,  1.29file/s]

✅ Feature extraction complete!
🎯 Total samples: 2452
📐 Feature shape: (2452, 194)
🏷️ Labels shape: (2452,)





In [11]:
X = np.array(X)
y = np.array(y)

print("✅ Feature shape:", X.shape)
print("✅ Label shape:", y.shape)

✅ Feature shape: (2452, 194)
✅ Label shape: (2452,)


In [12]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)


In [13]:
from sklearn.model_selection import train_test_split

X = np.array(X)
y = np.array(y_encoded)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


In [14]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [21]:
print(X_train.shape)

(1961, 194)


In [15]:

pca = PCA(n_components=97, svd_solver='full')
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

print("✅ Final feature shape after PCA:", X_train_pca.shape)

✅ Final feature shape after PCA: (1961, 97)


In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.regularizers import l2
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# ⚖️ Compute class weights if needed (can be omitted after SMOTE)
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))

# ✅ Callbacks
early_stop = EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True, mode='max', verbose=1)
lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', patience=5, factor=0.5, min_lr=1e-6, mode='max', verbose=1)
model_checkpoint = ModelCheckpoint('best_model_v3.h5', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)

# ✅ Improved Architecture
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train_pca.shape[1],), kernel_initializer='he_uniform'),
    BatchNormalization(),
    Dropout(0.6),

    Dense(256, activation='relu', kernel_initializer='he_uniform'),
    BatchNormalization(),
    Dropout(0.4),

    Dense(128, activation='relu', kernel_initializer='he_uniform'),
    BatchNormalization(),
    Dropout(0.4),

    Dense(64, activation='relu', kernel_initializer='he_uniform'),
    Dropout(0.2),

    Dense(8, activation='softmax')
])


# ✅ Compile
model.compile(
    optimizer=AdamW(learning_rate=0.0005, weight_decay=5e-5),
    loss=SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

# ✅ Train
history = model.fit(
    X_train_pca, y_train,
    validation_data=(X_test_pca, y_test),
    epochs=300,
    batch_size=32,
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.1336 - loss: 3.0030 - val_accuracy: 0.1752 - val_loss: 2.0333
Epoch 2/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.2137 - loss: 2.4651 - val_accuracy: 0.2301 - val_loss: 1.9577
Epoch 3/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.2653 - loss: 2.1510 - val_accuracy: 0.3299 - val_loss: 1.8761
Epoch 4/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.3047 - loss: 2.0647 - val_accuracy: 0.3829 - val_loss: 1.7918
Epoch 5/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.3214 - loss: 1.8811 - val_accuracy: 0.4277 - val_loss: 1.6741
Epoch 6/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.3463 - loss: 1.7572 - val_accuracy: 0.4521 - val_loss: 1.5517
Epoch 7/300
[1m62/62[0m [

In [17]:
from sklearn.metrics import classification_report, accuracy_score

# Get predictions
y_pred = model.predict(X_test_pca)
y_pred_labels = np.argmax(y_pred, axis=1)

# Convert integer labels to strings
target_names = [str(cls) for cls in le.classes_]

# Print the report
print(classification_report(y_test, y_pred_labels, target_names=target_names))
print(f"✅ Overall Test Accuracy: {accuracy_score(y_test, y_pred_labels):.4f}")


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step
              precision    recall  f1-score   support

       angry       0.92      0.87      0.89        75
        calm       0.81      0.93      0.87        75
     disgust       0.74      0.67      0.70        39
     fearful       0.79      0.72      0.76        75
       happy       0.82      0.77      0.79        75
     neutral       0.85      0.92      0.89        38
         sad       0.83      0.76      0.79        75
   surprised       0.72      0.92      0.81        39

    accuracy                           0.82       491
   macro avg       0.81      0.82      0.81       491
weighted avg       0.82      0.82      0.82       491

✅ Overall Test Accuracy: 0.8167


In [18]:
print("✅ Model input shape:", model.input_shape)
print("✅ PCA components used:", X_train_pca.shape[1])


✅ Model input shape: (None, 97)
✅ PCA components used: 97


In [19]:
import pickle

# Assuming `pca` is your already fitted PCA object
with open('pca_transform_22113034.pkl', 'wb') as f:
    pickle.dump(pca, f)

print("✅ PCA saved to 'pca_transform.pkl'")

from tensorflow.keras.models import save_model

# Assuming scaler is already fitted, and model is already trained
# Save MinMaxScaler using pickle
with open('minmax_scaler_22113034.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Save Keras model in HDF5 format
model.save('emotion_keras_model_22113034.h5')

print("✅ Saved MinMaxScaler and Keras model.")




✅ PCA saved to 'pca_transform.pkl'
✅ Saved MinMaxScaler and Keras model.


In [20]:
import pickle

# Load the scaler
with open("minmax_scaler_22113034.pkl", "rb") as f:
    scaler = pickle.load(f)

# Print the number of features it expects
print("✅ Scaler was trained on:", scaler.n_features_in_, "features")


✅ Scaler was trained on: 194 features
