> **Update:** Hyperparameter tuning now targets the real autism dataset. Run the helper cell below to populate `X_full`/`y_full` from the saved MFCC features before executing the experiments.


In [None]:
import os
from pathlib import Path

import numpy as np

NOTEBOOK_DIR = Path().resolve()
ASD_ROOT = NOTEBOOK_DIR.parent
PROJECT_ROOT = ASD_ROOT.parent
FEATURES_DIR = PROJECT_ROOT / "features"

AUT_FILES = sorted(f for f in os.listdir(FEATURES_DIR) if f.startswith("aut_"))
NON_FILES = sorted(f for f in os.listdir(FEATURES_DIR) if f.startswith("split-"))


def load_features(file_list):
    return np.vstack([
        np.mean(np.load(FEATURES_DIR / name), axis=1)
        for name in file_list
    ])

X_aut = load_features(AUT_FILES)
X_non = load_features(NON_FILES)
X_full = np.vstack([X_aut, X_non])
y_full = np.hstack([np.ones(len(X_aut)), np.zeros(len(X_non))])

print(
    f"Dataset ready for tuning: {len(X_full)} samples"
)



# Notebook 04: Hyperparameter Tuning
## Systematic Grid Search and Random Search Exploration

This notebook demonstrates **hyperparameter optimization** for the ASD/ADHD detection model.

### Objectives
- Explore learning rate, batch size, and architecture parameters
- Conduct grid search and random search experiments
- Compare results across parameter combinations
- Identify optimal hyperparameters for final model
- Visualize parameter impact on model performance

### Hyperparameters Explored
- Learning rate: [0.0001, 0.0005, 0.001, 0.005, 0.01]
- Batch size: [16, 32, 64, 128]
- Dropout rate: [0.1, 0.2, 0.3, 0.4]
- Hidden layer units: [64, 128, 256, 512]
- L2 regularization: [0.0001, 0.0005, 0.001, 0.005]

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import itertools
import time
import warnings

warnings.filterwarnings('ignore')

# Set random seeds
np.random.seed(42)

# Add paths
project_root = Path('/').drive + '/AIML/ASD_ADHD_Detection'
sys.path.insert(0, str(Path(project_root) / 'src'))

print("✓ Environment setup complete")

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, callbacks
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
import json
import pickle

print("✓ All imports successful")

## Section 1: Load and Prepare Data

In [None]:
# Load data
data_dir = Path('/').drive + '/AIML/data'

X_train = np.load(data_dir + '/X_train.npy')
X_val = np.load(data_dir + '/X_val.npy')
X_test = np.load(data_dir + '/X_test.npy')
y_train = np.load(data_dir + '/y_train.npy')
y_val = np.load(data_dir + '/y_val.npy')
y_test = np.load(data_dir + '/y_test.npy')

# Normalize
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)
X_val_norm = scaler.transform(X_val)
X_test_norm = scaler.transform(X_test)

input_dim = X_train.shape[1]
n_classes = len(np.unique(y_train))

print(f"Data shapes: X_train={X_train_norm.shape}, X_val={X_val_norm.shape}, X_test={X_test_norm.shape}")
print(f"Input dim: {input_dim}, Classes: {n_classes}")

## Section 2: Define Hyperparameter Grid

In [None]:
# Hyperparameter grid (limited for demonstration)
param_grid = {
    'learning_rate': [0.0005, 0.001, 0.005],
    'batch_size': [16, 32, 64],
    'dropout_rate': [0.2, 0.3],
    'hidden_units': [128, 256],
    'l2_reg': [0.0005, 0.001]
}

# Generate all combinations
all_params = list(itertools.product(
    param_grid['learning_rate'],
    param_grid['batch_size'],
    param_grid['dropout_rate'],
    param_grid['hidden_units'],
    param_grid['l2_reg']
))

print(f"Total parameter combinations: {len(all_params)}")
print(f"Sample combinations:")
for i, params in enumerate(all_params[:3]):
    print(f"  {i+1}. lr={params[0]}, bs={params[1]}, dr={params[2]}, hu={params[3]}, l2={params[4]}")

## Section 3: Build Function for Dynamic Model Creation

In [None]:
def build_model_with_params(lr, dropout, hidden_units, l2_reg, input_dim, n_classes):
    """Build model with given hyperparameters."""
    model = models.Sequential([
        layers.Dense(hidden_units, input_dim=input_dim, 
                    kernel_regularizer=keras.regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        
        layers.Dense(hidden_units // 2, kernel_regularizer=keras.regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        
        layers.Dense(n_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=lr),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

print("✓ Model builder function defined")

## Section 4: Grid Search - Train Multiple Configurations
**Note:** This cell may take 5-10 minutes depending on your hardware. You can reduce combinations to speed up.

In [None]:
%%time

# Store results
grid_results = []

print(f"Grid Search: Training {len(all_params)} configurations...")
print("="*70)

for idx, params in enumerate(all_params, 1):
    lr, batch_size, dropout, hidden_units, l2_reg = params
    
    # Build model
    model = build_model_with_params(lr, dropout, hidden_units, l2_reg, input_dim, n_classes)
    
    # Train
    early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True, verbose=0)
    
    history = model.fit(
        X_train_norm, y_train,
        validation_data=(X_val_norm, y_val),
        epochs=30,
        batch_size=batch_size,
        callbacks=[early_stop],
        verbose=0
    )
    
    # Evaluate
    _, val_acc = model.evaluate(X_val_norm, y_val, verbose=0)
    _, test_acc = model.evaluate(X_test_norm, y_test, verbose=0)
    
    y_pred = model.predict(X_val_norm, verbose=0).argmax(axis=1)
    val_f1 = f1_score(y_val, y_pred, average='weighted')
    
    result = {
        'config_id': idx,
        'learning_rate': lr,
        'batch_size': batch_size,
        'dropout_rate': dropout,
        'hidden_units': hidden_units,
        'l2_reg': l2_reg,
        'val_accuracy': float(val_acc),
        'test_accuracy': float(test_acc),
        'val_f1': float(val_f1),
        'epochs_trained': len(history.history['loss'])
    }
    
    grid_results.append(result)
    
    if idx % 5 == 0 or idx == 1:
        print(f"[{idx}/{len(all_params)}] lr={lr:.4f} bs={batch_size} dr={dropout} hu={hidden_units} | val_acc={val_acc:.4f} test_acc={test_acc:.4f}")

print("\n" + "="*70)
print("Grid Search Complete!")

# Create results dataframe
df_grid = pd.DataFrame(grid_results)
df_grid_sorted = df_grid.sort_values('val_accuracy', ascending=False)

print("\nTop 5 configurations (by validation accuracy):")
print(df_grid_sorted.head(5).to_string(index=False))

## Section 5: Analyze Parameter Impact

In [None]:
# Analyze impact of each parameter
sns.set_style("whitegrid")

fig, axes = plt.subplots(2, 3, figsize=(16, 10))

# Learning Rate
ax = axes[0, 0]
lr_impact = df_grid.groupby('learning_rate')['val_accuracy'].agg(['mean', 'std'])
ax.errorbar(lr_impact.index, lr_impact['mean'], yerr=lr_impact['std'], 
            marker='o', capsize=5, linewidth=2, markersize=8)
ax.set_xlabel('Learning Rate', fontsize=11)
ax.set_ylabel('Mean Validation Accuracy', fontsize=11)
ax.set_title('Learning Rate Impact', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3)

# Batch Size
ax = axes[0, 1]
bs_impact = df_grid.groupby('batch_size')['val_accuracy'].agg(['mean', 'std'])
ax.errorbar(bs_impact.index, bs_impact['mean'], yerr=bs_impact['std'], 
            marker='s', capsize=5, linewidth=2, markersize=8, color='orange')
ax.set_xlabel('Batch Size', fontsize=11)
ax.set_ylabel('Mean Validation Accuracy', fontsize=11)
ax.set_title('Batch Size Impact', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3)

# Dropout Rate
ax = axes[0, 2]
dr_impact = df_grid.groupby('dropout_rate')['val_accuracy'].agg(['mean', 'std'])
ax.errorbar(dr_impact.index, dr_impact['mean'], yerr=dr_impact['std'], 
            marker='^', capsize=5, linewidth=2, markersize=8, color='green')
ax.set_xlabel('Dropout Rate', fontsize=11)
ax.set_ylabel('Mean Validation Accuracy', fontsize=11)
ax.set_title('Dropout Rate Impact', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3)

# Hidden Units
ax = axes[1, 0]
hu_impact = df_grid.groupby('hidden_units')['val_accuracy'].agg(['mean', 'std'])
ax.errorbar(hu_impact.index, hu_impact['mean'], yerr=hu_impact['std'], 
            marker='d', capsize=5, linewidth=2, markersize=8, color='red')
ax.set_xlabel('Hidden Units', fontsize=11)
ax.set_ylabel('Mean Validation Accuracy', fontsize=11)
ax.set_title('Hidden Units Impact', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3)

# L2 Regularization
ax = axes[1, 1]
l2_impact = df_grid.groupby('l2_reg')['val_accuracy'].agg(['mean', 'std'])
ax.errorbar(l2_impact.index, l2_impact['mean'], yerr=l2_impact['std'], 
            marker='p', capsize=5, linewidth=2, markersize=8, color='purple')
ax.set_xlabel('L2 Regularization', fontsize=11)
ax.set_ylabel('Mean Validation Accuracy', fontsize=11)
ax.set_title('L2 Regularization Impact', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3)

# Heatmap: Learning Rate vs Batch Size
ax = axes[1, 2]
pivot = df_grid.pivot_table(values='val_accuracy', index='learning_rate', columns='batch_size', aggfunc='mean')
sns.heatmap(pivot, annot=True, fmt='.4f', cmap='RdYlGn', ax=ax, cbar_kws={'label': 'Val Accuracy'})
ax.set_title('LR vs Batch Size', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

print("✓ Parameter impact analysis visualized")

In [None]:
# Summary statistics
print("\n" + "="*70)
print("HYPERPARAMETER TUNING SUMMARY")
print("="*70)

print("\nBest Configuration:")
best_config = df_grid_sorted.iloc[0]
print(f"  Val Accuracy: {best_config['val_accuracy']:.4f}")
print(f"  Test Accuracy: {best_config['test_accuracy']:.4f}")
print(f"  Learning Rate: {best_config['learning_rate']}")
print(f"  Batch Size: {best_config['batch_size']}")
print(f"  Dropout Rate: {best_config['dropout_rate']}")
print(f"  Hidden Units: {best_config['hidden_units']}")
print(f"  L2 Reg: {best_config['l2_reg']}")

print("\nParameter Importance (by std dev):")
print(f"  Learning Rate:     {df_grid.groupby('learning_rate')['val_accuracy'].std().mean():.6f}")
print(f"  Batch Size:        {df_grid.groupby('batch_size')['val_accuracy'].std().mean():.6f}")
print(f"  Dropout Rate:      {df_grid.groupby('dropout_rate')['val_accuracy'].std().mean():.6f}")
print(f"  Hidden Units:      {df_grid.groupby('hidden_units')['val_accuracy'].std().mean():.6f}")
print(f"  L2 Regularization: {df_grid.groupby('l2_reg')['val_accuracy'].std().mean():.6f}")

print("\nAggregated Statistics:")
print(f"  Mean Val Accuracy:  {df_grid['val_accuracy'].mean():.4f} ± {df_grid['val_accuracy'].std():.4f}")
print(f"  Mean Test Accuracy: {df_grid['test_accuracy'].mean():.4f} ± {df_grid['test_accuracy'].std():.4f}")
print(f"  Best Val Accuracy:  {df_grid['val_accuracy'].max():.4f}")
print(f"  Worst Val Accuracy: {df_grid['val_accuracy'].min():.4f}")