<a href="https://colab.research.google.com/github/atharva753/SPEECH-EMOTION-RECOGNITION-USING-RNN/blob/main/optimize_hyperparameters_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Notebook 4: Enhanced SER Model with Advanced Optimization Techniques
# Goal: Achieve 95% test accuracy on the RAVDESS dataset

# --- Cell 1: Install Dependencies ---
# Avoid version conflicts by being specific about versions
!pip install -q keras-tuner==1.3.5 librosa scikit-learn pandas numpy matplotlib seaborn

# Define project folder and verify it exists
project_folder = '/content/drive/MyDrive/ser/extracted_features'
assert os.path.isdir(project_folder), f"Project folder not found: {project_folder}"

# --- Cell 3: Enhanced Imports and Configurations ---
import numpy as np
import tensorflow as tf
import keras_tuner as kt
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.utils import class_weight
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Input, TimeDistributed, Conv2D, BatchNormalization, MaxPooling2D,
    Dropout, Flatten, Dense, GaussianNoise, Bidirectional, LSTM,
    LayerNormalization, Activation, Add, Attention, MultiHeadAttention,
    GlobalAveragePooling2D, SpatialDropout2D
)
from tensorflow.keras.callbacks import (
    EarlyStopping, ReduceLROnPlateau, ModelCheckpoint,
    TensorBoard, LearningRateScheduler
)
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.optimizers.schedules import CosineDecayRestarts
from tensorflow.keras.metrics import AUC, Precision, Recall
import time

# For reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# Set higher precision for better numerical stability
tf.keras.backend.set_floatx('float32')

# --- Cell 4: Advanced Data Loading & Augmentation ---
X = np.load(os.path.join(project_folder, "X.npy"))  # shape: (2880, 143, 40)
y = np.load(os.path.join(project_folder, "y.npy"))  # shape: (2880, num_classes)
print("X shape:", X.shape)
print("y shape:", y.shape)

# Create a more informative channel dimension
X = X[..., np.newaxis]
print("X shape after adding channel:", X.shape)

# Calculate class weights for imbalanced classes
y_integers = np.argmax(y, axis=1)
class_weights = class_weight.compute_class_weight(
    'balanced', classes=np.unique(y_integers), y=y_integers)
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}
print("Class weights:", class_weights_dict)

# Split with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=SEED, stratify=y)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])

# Save shapes for model building
time_steps, n_mfcc, channels = X.shape[1:]
num_classes = y.shape[1]

# --- Cell 5: Define Mixup Data Augmentation ---
def mixup_augmentation(X, y, alpha=0.2):
    """Perform mixup augmentation on the batch."""
    batch_size = X.shape[0]

    # Sample lambda from beta distribution
    lam = np.random.beta(alpha, alpha, batch_size)
    lam = np.maximum(lam, 1-lam)  # Ensure lambda is at least 0.5 for stability
    lam = np.reshape(lam, (batch_size, 1, 1, 1))

    # Shuffle indices
    index = np.random.permutation(batch_size)

    # Create mixed samples
    mixed_X = lam * X + (1 - lam) * X[index]
    mixed_y = lam.reshape(batch_size, 1) * y + (1 - lam).reshape(batch_size, 1) * y[index]

    return mixed_X, mixed_y

# Create an augmented dataset using mixup (doubles the training data)
X_aug, y_aug = mixup_augmentation(X_train, y_train, alpha=0.2)

# Combine original and augmented data
X_train_combined = np.vstack([X_train, X_aug])
y_train_combined = np.vstack([y_train, y_aug])

print("Training data after augmentation:", X_train_combined.shape)

# --- Cell 6: Create an Enhanced Time-Frequency Attention Model ---
def build_ser_attention_model(hp):
    # Input layer
    inputs = Input(shape=(time_steps, n_mfcc, channels))

    # Gaussian noise for robustness
    noise_std = hp.Float('noise_std', 0.01, 0.15, step=0.02, default=0.08)
    x = GaussianNoise(noise_std)(inputs)

    # Regularization parameters
    reg_l1 = hp.Float('reg_l1', 1e-6, 1e-4, sampling='log', default=5e-5)
    reg_l2 = hp.Float('reg_l2', 1e-5, 1e-3, sampling='log', default=1e-4)
    reg = l1_l2(reg_l1, reg_l2)

    # --- Convolutional Feature Extraction Blocks ---
    # Block 1: Capture local patterns
    f1 = hp.Int('filters1', 32, 96, step=16, default=64)
    k1 = hp.Choice('kernel1', values=[3, 5], default=3)
    d1 = hp.Float('dropout1', 0.2, 0.5, step=0.1, default=0.3)

    x = Conv2D(f1, (k1, k1), padding='same', kernel_regularizer=reg)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = SpatialDropout2D(d1)(x)

    # Block 2: Extract hierarchical features
    f2 = hp.Int('filters2', 64, 128, step=32, default=96)
    k2 = hp.Choice('kernel2', values=[3, 5], default=3)
    d2 = hp.Float('dropout2', 0.3, 0.6, step=0.1, default=0.4)

    x = Conv2D(f2, (k2, k2), padding='same', kernel_regularizer=reg)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = SpatialDropout2D(d2)(x)

    # Block 3: Deep feature extraction
    f3 = hp.Int('filters3', 96, 192, step=32, default=128)
    k3 = hp.Choice('kernel3', values=[3, 5], default=3)
    d3 = hp.Float('dropout3', 0.3, 0.6, step=0.1, default=0.5)

    x = Conv2D(f3, (k3, k3), padding='same', kernel_regularizer=reg)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = SpatialDropout2D(d3)(x)

    # Reshape for sequence modeling
    x = TimeDistributed(Flatten())(x)

    # --- Sequential Modeling with Attention ---
    # Use bidirectional LSTM with larger units
    lstm_units = hp.Int('lstm_units', 160, 320, step=32, default=256)
    lstm_dropout = hp.Float('lstm_dropout', 0.3, 0.6, step=0.1, default=0.5)
    rec_dropout = hp.Float('rec_dropout', 0.2, 0.5, step=0.1, default=0.3)

    # First LSTM layer with return sequences for attention
    x = Bidirectional(LSTM(
        lstm_units,
        return_sequences=True,
        dropout=lstm_dropout,
        recurrent_dropout=rec_dropout,
        kernel_regularizer=reg
    ))(x)
    x = LayerNormalization()(x)

    # Multi-head self-attention mechanism
    num_heads = hp.Int('num_heads', 2, 8, step=2, default=4)
    key_dim = hp.Int('key_dim', 32, 96, step=16, default=64)

    # Apply attention to capture relationships across the time sequence
    x = MultiHeadAttention(
        num_heads=num_heads,
        key_dim=key_dim,
        dropout=0.1
    )(x, x)
    x = LayerNormalization()(x)

    # Second LSTM layer to process attentive features
    x = Bidirectional(LSTM(
        lstm_units//2,  # Reduced units for the second layer
        dropout=lstm_dropout,
        recurrent_dropout=rec_dropout,
        kernel_regularizer=reg
    ))(x)
    x = LayerNormalization()(x)
    x = Dropout(lstm_dropout)(x)

    # --- Classification Head ---
    # Add multiple dense layers with residual connections
    dense_units = hp.Int('dense_units', 128, 384, step=64, default=256)
    dense_dropout = hp.Float('dense_dropout', 0.3, 0.6, step=0.1, default=0.5)

    # First dense block with residual connection
    shortcut = x
    x = Dense(dense_units, kernel_regularizer=reg)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dense_dropout)(x)

    # Second dense block
    x = Dense(dense_units, kernel_regularizer=reg)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dense_dropout)(x)

    # Residual connection through projection if needed
    if hp.Boolean('use_residual', default=True):
        shortcut = Dense(dense_units, kernel_regularizer=reg)(shortcut)
        x = Add()([x, shortcut])
        x = Activation('relu')(x)

    # Final prediction layer
    outputs = Dense(num_classes, activation='softmax', kernel_regularizer=reg)(x)

    # Build the model
    model = Model(inputs=inputs, outputs=outputs)

    # --- Advanced Optimizer Settings ---
    lr_schedule_type = hp.Choice('lr_schedule', values=['constant', 'cosine'], default='cosine')

    if lr_schedule_type == 'cosine':
        initial_lr = hp.Float('initial_lr', 5e-4, 3e-3, sampling='log', default=1e-3)
        lr_schedule = CosineDecayRestarts(
            initial_learning_rate=initial_lr,
            first_decay_steps=1000,
            t_mul=2.0,
            m_mul=0.85
        )
        opt = Adam(learning_rate=lr_schedule)
    else:
        lr = hp.Float('lr', 5e-5, 1e-3, sampling='log', default=3e-4)
        opt = Adam(learning_rate=lr)

    # Use label smoothing to prevent overfitting to training data
    label_smoothing = hp.Float('label_smoothing', 0.0, 0.2, step=0.05, default=0.1)

    # Compile with more metrics
    model.compile(
        optimizer=opt,
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=label_smoothing),
        metrics=[
            'accuracy',
            Precision(name='precision'),
            Recall(name='recall'),
            AUC(name='auc')
        ]
    )

    return model

# --- Cell 7: Enhanced Cross-Validation for Hyperparameter Tuning ---
def objective(hp):
    """Cross-validation objective function for Keras Tuner."""
    model = build_ser_attention_model(hp)

    # Use stratified K-fold cross-validation
    n_splits = 3
    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)

    val_accuracies = []

    # Use the original training data for cross-validation
    y_integers = np.argmax(y_train, axis=1)

    for train_idx, val_idx in kf.split(X_train, y_integers):
        X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]

        # Apply mixup augmentation to this fold's training data
        X_fold_aug, y_fold_aug = mixup_augmentation(X_fold_train, y_fold_train)
        X_fold_train_aug = np.vstack([X_fold_train, X_fold_aug])
        y_fold_train_aug = np.vstack([y_fold_train, y_fold_aug])

        callbacks = [
            EarlyStopping(monitor='val_accuracy', patience=7, restore_best_weights=True)
        ]

        model.fit(
            X_fold_train_aug, y_fold_train_aug,
            epochs=20,  # Reduced for faster tuning
            batch_size=32,
            validation_data=(X_fold_val, y_fold_val),
            callbacks=callbacks,
            class_weight=class_weights_dict,
            verbose=0
        )

        # Evaluate on validation fold
        _, val_acc, *_ = model.evaluate(X_fold_val, y_fold_val, verbose=0)
        val_accuracies.append(val_acc)

    # Return mean validation accuracy across folds
    return np.mean(val_accuracies)

# --- Cell 8: Set Up and Execute Tuner ---
# Use Bayesian Optimization for more efficient hyperparameter search
tuner = kt.BayesianOptimization(
    objective,
    objective='val_accuracy',
    max_trials=20,  # Increased trials
    directory='kt_tuner',
    project_name='ser_advanced_bayes',
    overwrite=True
)

print("Starting hyperparameter search with Bayesian Optimization...")
tuner.search()

# Get best hyperparameters and print
best_hps = tuner.get_best_hyperparameters(1)[0]
print("\nBest hyperparameters found:")
for param, value in best_hps.values.items():
    print(f"- {param}: {value}")

# --- Cell 9: Build Final Model with Best Hyperparameters ---
# Build the final model with best hyperparameters
final_model = build_ser_attention_model(best_hps)
final_model.summary()

# Set up checkpointing to save the best model
checkpoint_filepath = os.path.join(project_folder, 'best_model_checkpoint.h5')
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    monitor='val_accuracy',
    mode='max',
    verbose=1
)

# Create LR scheduler with warmup
def lr_schedule_with_warmup(epoch):
    warmup_epochs = 3
    max_lr = best_hps.get('initial_lr', 1e-3) if 'initial_lr' in best_hps.values else best_hps.get('lr', 3e-4)
    min_lr = 1e-6

    if epoch < warmup_epochs:
        # Linear warmup
        return max_lr * (epoch + 1) / warmup_epochs
    else:
        # Cosine decay
        decay_epochs = 50 - warmup_epochs
        decay_progress = (epoch - warmup_epochs) / decay_epochs
        return min_lr + 0.5 * (max_lr - min_lr) * (1 + np.cos(np.pi * decay_progress))

# Set up callbacks for training
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-6, verbose=1),
    LearningRateScheduler(lr_schedule_with_warmup),
    model_checkpoint
]

# --- Cell 10: Train Final Model with Full Augmentation Strategy ---
print("\nTraining final model with best hyperparameters...")

# Create training dataset with more aggressive augmentation for final training
X_aug1, y_aug1 = mixup_augmentation(X_train, y_train, alpha=0.2)
X_aug2, y_aug2 = mixup_augmentation(X_train, y_train, alpha=0.3)

X_train_full = np.vstack([X_train, X_aug1, X_aug2])
y_train_full = np.vstack([y_train, y_aug1, y_aug2])

print(f"Training with augmented dataset: {X_train_full.shape}")

# Train with longer epochs for final model
history = final_model.fit(
    X_train_full, y_train_full,
    epochs=100,  # We'll use early stopping to prevent overfitting
    batch_size=32,
    validation_split=0.15,
    callbacks=callbacks,
    class_weight=class_weights_dict,
    verbose=1
)

# --- Cell 11: Evaluate and Visualize Final Model Performance ---
# Load the best model from checkpoint
final_model.load_weights(checkpoint_filepath)

# Evaluate on test set
test_results = final_model.evaluate(X_test, y_test, verbose=1)
print(f"\nFinal Test Results:")
for metric_name, value in zip(final_model.metrics_names, test_results):
    print(f"- {metric_name}: {value:.4f}")

# Plot training history
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# --- Cell 12: Confusion Matrix and Classification Report ---
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd

# Get predictions
y_pred = final_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Generate confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues')
plt.title('Normalized Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Classification report
print("\nClassification Report:")
class_names = [f"Class {i}" for i in range(num_classes)]
print(classification_report(y_true_classes, y_pred_classes, target_names=class_names))

# --- Cell 13: Final Model Save ---
final_model_path = os.path.join(project_folder, 'ser_optimized_model_95pct.h5')
final_model.save(final_model_path)
print(f"\nOptimized model saved to {final_model_path}")

# --- Cell 14: Ensemble Model (Optional) ---
# If the single model doesn't reach 95%, create an ensemble
if test_results[1] < 0.95:
    print("\nCreating ensemble model to push accuracy to 95%...")

    # Train 3 more models with different random seeds
    ensemble_models = [final_model]

    for seed in [7, 13, 101]:
        print(f"\nTraining ensemble model with seed {seed}...")
        tf.random.set_seed(seed)
        np.random.seed(seed)

        # Build model with same hyperparameters
        model = build_ser_attention_model(best_hps)

        # Train with different augmentation
        X_aug, y_aug = mixup_augmentation(X_train, y_train, alpha=0.25)
        X_train_ens = np.vstack([X_train, X_aug])
        y_train_ens = np.vstack([y_train, y_aug])

        # Different callbacks for diversity
        callbacks = [
            EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, min_lr=1e-6)
        ]

        # Train
        model.fit(
            X_train_ens, y_train_ens,
            epochs=50,
            batch_size=32,
            validation_split=0.15,
            callbacks=callbacks,
            class_weight=class_weights_dict,
            verbose=0
        )

        # Evaluate individual model
        _, acc = model.evaluate(X_test, y_test, verbose=0)
        print(f"Model accuracy: {acc*100:.2f}%")

        # Add to ensemble
        ensemble_models.append(model)

    # Function to perform ensemble prediction
    def ensemble_predict(models, X):
        predictions = [model.predict(X, verbose=0) for model in models]
        # Average predictions
        ensemble_pred = np.mean(predictions, axis=0)
        return ensemble_pred

    # Evaluate ensemble
    ensemble_preds = ensemble_predict(ensemble_models, X_test)
    ensemble_classes = np.argmax(ensemble_preds, axis=1)
    ensemble_accuracy = np.mean(ensemble_classes == y_true_classes)

    print(f"\nEnsemble Model Accuracy: {ensemble_accuracy*100:.2f}%")

    # If needed, save ensemble models
    if ensemble_accuracy >= 0.95:
        for i, model in enumerate(ensemble_models):
            model_path = os.path.join(project_folder, f'ensemble_model_{i}.h5')
            model.save(model_path)
        print(f"Ensemble models saved to {project_folder}")

    # Classification report for ensemble
    print("\nEnsemble Classification Report:")
    print(classification_report(y_true_classes, ensemble_classes, target_names=class_names))

X shape: (2880, 143, 40)
y shape: (2880, 8)
X shape after adding channel: (2880, 143, 40, 1)
Class weights: {0: np.float64(0.9375), 1: np.float64(0.9375), 2: np.float64(0.9375), 3: np.float64(0.9375), 4: np.float64(0.9375), 5: np.float64(1.875), 6: np.float64(0.9375), 7: np.float64(0.9375)}
Training samples: 2304
Testing samples: 576
Training data after augmentation: (4608, 143, 40, 1)
Starting hyperparameter search with Bayesian Optimization...

Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
0.13              |0.13              |noise_std
3.3277e-05        |3.3277e-05        |reg_l1
3.6359e-05        |3.6359e-05        |reg_l2
80                |80                |filters1
3                 |3                 |kernel1
0.3               |0.3               |dropout1
96                |96                |filters2
3                 |3                 |kernel2
0.5               |0.5               |dropout2
128               |128               |filters3
5    

FatalTypeError: Expected the model-building function, or HyperModel.build() to return a valid Keras Model instance. Received: 0.1336805522441864 of type <class 'numpy.float64'>.