In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import shap
import json
import time
from tqdm import tqdm
from scipy.stats import ttest_ind

from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Dropout, Flatten,
                                     Dense, LSTM, MultiHeadAttention, Concatenate, Reshape)
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.saving import register_keras_serializable

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis
from sklearn.metrics import (classification_report, accuracy_score, confusion_matrix,
                             roc_curve, auc, precision_score, recall_score, f1_score)
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline

# =============================================================================
# --- 🚀 CONFIGURATION ---
# =============================================================================
# Define the datasets and their specific modes for TRAINING.
# You can list one or more datasets here.
TRAIN_CONFIG = [
    {"dataset": "Italian", "mode": "A", "feature_mode": "ALL"},

]

# Define the single dataset and its mode for TESTING.
# - Set to a dictionary to test on a specific unseen dataset.
# - Set to None to use a validation split from the training data for the final evaluation.
# TEST_CONFIG = {"dataset": "mPower", "mode": "ALL_VALIDS", "feature_mode": "ALL"}
# TEST_CONFIG = None 
TEST_CONFIG = {"dataset": "Neurovoz", "mode": "A", "feature_mode": "ALL"},

# --- Path and Run ID Configuration ---
# A unique name for this training run is generated automatically from the TRAIN_CONFIG
train_datasets_str = '_'.join(sorted([d['dataset'] for d in TRAIN_CONFIG]))
RUN_ID = f"trained_on_{train_datasets_str}"
RESULTS_PATH = os.path.join(os.getcwd(), "runs", RUN_ID)
PLOTS_PATH = os.path.join(RESULTS_PATH, "plots")
os.makedirs(RESULTS_PATH, exist_ok=True)
os.makedirs(PLOTS_PATH, exist_ok=True)

# Hyperparameters
EPOCHS = 30
BATCH_SIZE = 32
LEARNING_RATE = 0.001
DROPOUT_RATE = 0.5
L2_STRENGTH = 0.01

# =============================================================================
# --- ⏱️ RUNTIME TRACKING UTILITIES ---
# =============================================================================
class RuntimeTracker:
    def __init__(self, results_path):
        self.results_path = results_path
        self.runtime_data = []
        
    def track_runtime(self, model_name, phase, start_time, end_time):
        """Track runtime for a specific model and phase"""
        runtime_seconds = end_time - start_time
        runtime_minutes = runtime_seconds / 60
        
        runtime_info = {
            'Model': model_name,
            'Phase': phase,
            'Runtime_Seconds': runtime_seconds,
            'Runtime_Minutes': runtime_minutes,
            'Timestamp': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
        }
        
        self.runtime_data.append(runtime_info)
        print(f"⏱️ {model_name} - {phase}: {runtime_minutes:.2f} minutes ({runtime_seconds:.2f} seconds)")
        
    def save_runtime_summary(self):
        """Save all runtime data to CSV"""
        runtime_df = pd.DataFrame(self.runtime_data)
        runtime_path = os.path.join(self.results_path, "model_runtimes.csv")
        runtime_df.to_csv(runtime_path, index=False)
        
        # Create summary statistics
        summary_stats = runtime_df.groupby('Model').agg({
            'Runtime_Minutes': ['sum', 'mean'],
            'Runtime_Seconds': ['sum', 'mean']
        }).round(3)
        
        summary_stats.columns = ['Total_Minutes', 'Avg_Minutes', 'Total_Seconds', 'Avg_Seconds']
        summary_stats.reset_index(inplace=True)
        
        summary_path = os.path.join(self.results_path, "runtime_summary.csv")
        summary_stats.to_csv(summary_path, index=False)
        
        print(f"\n📊 Runtime Summary:")
        print(summary_stats.to_string(index=False))
        print(f"✅ Runtime data saved to: {runtime_path}")
        print(f"✅ Runtime summary saved to: {summary_path}")

# Initialize runtime tracker
runtime_tracker = RuntimeTracker(RESULTS_PATH)

# =============================================================================
# --- Your Custom CNN Models ---
# =============================================================================
@register_keras_serializable()
class ParkinsonDetectorModel(Model):
    """Your end-to-end CNN model for original features."""
    def __init__(self, input_shape, **kwargs):
        super(ParkinsonDetectorModel, self).__init__(**kwargs)
        self.input_shape_config = input_shape
        self.reshape_in = Reshape((input_shape[0], input_shape[1], 1))
        self.conv1a = Conv2D(64, 5, activation='relu', kernel_regularizer=l2(L2_STRENGTH), padding='same')
        self.conv1b = Conv2D(64, 5, activation='relu', kernel_regularizer=l2(L2_STRENGTH), padding='same')
        self.pool1 = MaxPooling2D(5)
        self.drop1 = Dropout(DROPOUT_RATE)
        self.conv2a = Conv2D(64, 5, activation='relu', kernel_regularizer=l2(L2_STRENGTH), padding='same')
        self.conv2b = Conv2D(64, 5, activation='relu', kernel_regularizer=l2(L2_STRENGTH), padding='same', name="last_conv_layer")
        self.pool2 = MaxPooling2D(5)
        self.drop2 = Dropout(DROPOUT_RATE)
        self.flatten_cnn = Flatten()
        self.attention = MultiHeadAttention(num_heads=2, key_dim=64)
        self.flatten_att = Flatten()
        self.lstm1 = LSTM(128, return_sequences=True)
        self.lstm2 = LSTM(128, return_sequences=False)
        self.drop_lstm = Dropout(DROPOUT_RATE)
        self.concat = Concatenate()
        self.dense_bottleneck = Dense(128, activation='relu', name='bottleneck_features')
        self.dense_output = Dense(1, activation='sigmoid')

    def call(self, inputs, extract_features=False, grad_cam=False):
        x = self.reshape_in(inputs)
        x = self.conv1a(x); x = self.conv1b(x); x = self.pool1(x); x = self.drop1(x, training=False)
        x = self.conv2a(x)
        last_conv_output = self.conv2b(x)
        x = self.pool2(last_conv_output); x = self.drop2(x, training=False)
        cnn_flat = self.flatten_cnn(x)
        shape = tf.shape(x); sequence = tf.reshape(x, [-1, shape[1] * shape[2], shape[3]])
        att_out = self.attention(query=sequence, key=sequence, value=sequence); att_flat = self.flatten_att(att_out)
        lstm_seq = self.lstm1(sequence); lstm_out = self.lstm2(lstm_seq); lstm_out = self.drop_lstm(lstm_out, training=False)
        concatenated = self.concat([cnn_flat, att_flat, lstm_out]); bottleneck = self.dense_bottleneck(concatenated)
        final_output = self.dense_output(bottleneck)
        if grad_cam: return final_output, last_conv_output
        if extract_features: return bottleneck
        return final_output

    def get_config(self):
        config = super(ParkinsonDetectorModel, self).get_config()
        config.update({"input_shape": self.input_shape_config}); return config
    @classmethod
    def from_config(cls, config):
        return cls(**config)

@register_keras_serializable()
class ParkinsonDetectorModelNCA(Model):
    """A modified version of your model for the small NCA input."""
    def __init__(self, input_shape, **kwargs):
        super(ParkinsonDetectorModelNCA, self).__init__(**kwargs)
        self.input_shape_config = input_shape
        self.reshape_in = Reshape((input_shape[0], input_shape[1], 1))
        self.conv1a = Conv2D(64, 3, activation='relu', kernel_regularizer=l2(L2_STRENGTH), padding='same')
        self.conv1b = Conv2D(64, 3, activation='relu', kernel_regularizer=l2(L2_STRENGTH), padding='same')
        self.pool1 = MaxPooling2D(2)
        self.drop1 = Dropout(DROPOUT_RATE)
        self.conv2a = Conv2D(64, 3, activation='relu', kernel_regularizer=l2(L2_STRENGTH), padding='same')
        self.conv2b = Conv2D(64, 3, activation='relu', kernel_regularizer=l2(L2_STRENGTH), padding='same', name="last_conv_layer")
        self.pool2 = MaxPooling2D(2)
        self.drop2 = Dropout(DROPOUT_RATE)
        self.flatten_cnn = Flatten()
        self.attention = MultiHeadAttention(num_heads=2, key_dim=64)
        self.flatten_att = Flatten()
        self.lstm1 = LSTM(128, return_sequences=True)
        self.lstm2 = LSTM(128, return_sequences=False)
        self.drop_lstm = Dropout(DROPOUT_RATE)
        self.concat = Concatenate()
        self.dense_bottleneck = Dense(128, activation='relu', name='bottleneck_features')
        self.dense_output = Dense(1, activation='sigmoid')

    def call(self, inputs, extract_features=False, grad_cam=False):
        x = self.reshape_in(inputs)
        x = self.conv1a(x); x = self.conv1b(x); x = self.pool1(x); x = self.drop1(x, training=False)
        x = self.conv2a(x)
        last_conv_output = self.conv2b(x)
        x = self.pool2(last_conv_output); x = self.drop2(x, training=False)
        cnn_flat = self.flatten_cnn(x)
        shape = tf.shape(x); sequence = tf.reshape(x, [-1, shape[1] * shape[2], shape[3]])
        att_out = self.attention(query=sequence, key=sequence, value=sequence); att_flat = self.flatten_att(att_out)
        lstm_seq = self.lstm1(sequence); lstm_out = self.lstm2(lstm_seq); lstm_out = self.drop_lstm(lstm_out, training=False)
        concatenated = self.concat([cnn_flat, att_flat, lstm_out]); bottleneck = self.dense_bottleneck(concatenated)
        final_output = self.dense_output(bottleneck)
        if grad_cam: return final_output, last_conv_output
        if extract_features: return bottleneck
        return final_output

    def get_config(self):
        config = super(ParkinsonDetectorModelNCA, self).get_config()
        config.update({"input_shape": self.input_shape_config}); return config
    @classmethod
    def from_config(cls, config):
        return cls(**config)

# =============================================================================
# --- Data Loading & Helper Functions ---
# =============================================================================
def load_single_dataset(config):
    dataset_name = config['dataset']
    mode = config['mode']
    feature_mode = config['feature_mode']
    path = os.path.join(dataset_name, "data", f"features_{mode}_{feature_mode}.npz")
    if not os.path.exists(path):
        print(f"WARNING: Data file not found at {path}. Skipping."); return None
    print(f"--- Loading data from {path} ---")
    with np.load(path) as data:
        mel = data['mel_spectrogram']; mfccs = data['mfcc']
        X = np.concatenate((mel, mfccs), axis=1)
        labels = data['labels']
        mel_bins = mel.shape[1]
    print(f"Loaded {dataset_name} successfully. Shape: {X.shape}"); return X, labels, mel_bins

def load_and_combine_data(configs):
    all_X, all_y, mel_bins = [], [], -1
    for config in configs:
        data = load_single_dataset(config)
        if data: 
            all_X.append(data[0]); all_y.append(data[1])
            if mel_bins == -1: mel_bins = data[2]
    if not all_X: raise ValueError("No data could be loaded for this configuration.")
    combined_X = np.concatenate(all_X, axis=0); combined_y = np.concatenate(all_y, axis=0)
    print(f"\n--- All data combined. Final shape: X={combined_X.shape}, y={combined_y.shape} ---")
    return combined_X, combined_y, mel_bins

# =============================================================================
# --- Plotting and Evaluation Functions ---
# =============================================================================
def plot_and_save_history(history, model_name, path):
    history_df = pd.DataFrame(history.history)
    history_df.to_csv(os.path.join(path, f"{model_name}_history.csv"))
    plt.style.use('seaborn-v0_8-whitegrid'); fig, axes = plt.subplots(1, 3, figsize=(20, 5))
    axes[0].plot(history_df['loss'], label='Train Loss'); axes[0].plot(history_df['val_loss'], label='Val Loss', linestyle='--')
    axes[0].set_title(f'{model_name} - Model Loss'); axes[0].set_xlabel('Epoch'); axes[0].legend()
    axes[1].plot(history_df['accuracy'], label='Train Acc'); axes[1].plot(history_df['val_accuracy'], label='Val Acc', linestyle='--')
    axes[1].set_title(f'{model_name} - Model Accuracy'); axes[1].set_xlabel('Epoch'); axes[1].legend()
    auc_keys = [k for k in history_df.columns if 'auc' in k and 'val' not in k]
    if auc_keys:
        auc_key = auc_keys[-1]; val_auc_key = 'val_' + auc_key
        axes[2].plot(history_df[auc_key], label='Train AUC'); axes[2].plot(history_df[val_auc_key], label='Val AUC', linestyle='--')
        axes[2].set_title(f'{model_name} - Model AUC'); axes[2].set_xlabel('Epoch'); axes[2].legend()
    plt.tight_layout(); plt.savefig(os.path.join(path, f"{model_name}_history.png"), dpi=300); plt.close()
    print(f"✅ Saved training history and plot for {model_name}.")

def calculate_and_save_metrics(y_true, y_pred, y_pred_proba, model_name, results_path, plots_path):
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel() if cm.shape == (2,2) else (0,0,0,0)
    cm_df = pd.DataFrame(cm, index=['True Healthy', 'True Parkinson'], columns=['Pred Healthy', 'Pred Parkinson'])
    cm_df.to_csv(os.path.join(plots_path, f"{model_name}_confusion_matrix.csv"))
    
    fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
    roc_auc_score = auc(fpr, tpr)
    
    metrics_data = {
        'Model': model_name, 'Accuracy': accuracy_score(y_true, y_pred), 'AUC': roc_auc_score,
        'F1-Score': f1_score(y_true, y_pred, zero_division=0), 'Precision': precision_score(y_true, y_pred, zero_division=0),
        'Sensitivity (Recall)': recall_score(y_true, y_pred, zero_division=0),
        'TP': tp, 'TN': tn, 'FP': fp, 'FN': fn
    }
    
    summary_path = os.path.join(results_path, "evaluation_metrics.csv")
    summary_df = pd.read_csv(summary_path) if os.path.exists(summary_path) else pd.DataFrame()
    summary_df = pd.concat([summary_df, pd.DataFrame([metrics_data])], ignore_index=True)
    summary_df.to_csv(summary_path, index=False)
    
    plot_and_save_confusion_matrix(cm, model_name, plots_path)
    plot_and_save_roc_curve(y_true, y_pred_proba, model_name, plots_path)
    print(f"✅ Saved all metrics and plots for {model_name}.")
    return metrics_data['Accuracy']

def plot_and_save_confusion_matrix(cm, model_name, path):
    cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] if cm.sum(axis=1)[:, np.newaxis].all() > 0 else cm
    fig, axes = plt.subplots(1, 2, figsize=(14, 6)); class_names = ['Healthy', 'Parkinson']
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0], xticklabels=class_names, yticklabels=class_names)
    axes[0].set_title(f'{model_name}\nConfusion Matrix (Counts)'); axes[0].set_xlabel('Predicted'); axes[0].set_ylabel('True')
    sns.heatmap(cm_percent, annot=True, fmt='.2%', cmap='Blues', ax=axes[1], xticklabels=class_names, yticklabels=class_names)
    axes[1].set_title(f'{model_name}\nConfusion Matrix (Percentages)'); axes[1].set_xlabel('Predicted'); axes[1].set_ylabel('True')
    plt.tight_layout(); plt.savefig(os.path.join(path, f"{model_name}_confusion_matrix.png"), dpi=300); plt.close()

def plot_and_save_roc_curve(y_true, y_pred_proba, model_name, path):
    fpr, tpr, _ = roc_curve(y_true, y_pred_proba); roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(8, 6)); plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:0.3f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--'); plt.xlabel('False Positive Rate'); plt.ylabel('True Positive Rate')
    plt.title(f'{model_name} - ROC Curve'); plt.legend(loc="lower right"); plt.grid(True)
    plt.savefig(os.path.join(path, f"{model_name}_roc_curve.png"), dpi=300); plt.close()

# =============================================================================
# --- Explainability Functions ---
# =============================================================================
def run_full_shap_analysis(model, X_train, X_test, y_test, output_path, mel_bins, num_samples=50):
    print("\n--- Running SHAP Analysis ---"); os.makedirs(output_path, exist_ok=True)
    inputs = tf.keras.Input(shape=model.input_shape_config); outputs = model(inputs)
    functional_model = Model(inputs, outputs)
    idx = np.random.choice(len(X_test), min(num_samples, len(X_test)), replace=False)
    test_samples, y_true_samples = X_test[idx], y_test[idx]
    explainer = shap.GradientExplainer(functional_model, X_train[:50])
    shap_values = explainer.shap_values(test_samples)
    if isinstance(shap_values, list): shap_values = shap_values[0]
    mean_abs_shap = np.mean(np.abs(shap_values.reshape(shap_values.shape[0], -1)), axis=0)
    top_idx = np.argsort(mean_abs_shap)[::-1][:20]
    coords = [np.unravel_index(i, (shap_values.shape[1], shap_values.shape[2])) for i in top_idx]
    labels = [f"Mel F{f} @ T{t}" if f < mel_bins else f"MFCC F{f-mel_bins} @ T{t}" for t, f in coords]
    plt.figure(figsize=(12, 6)); plt.bar(range(20), mean_abs_shap[top_idx])
    plt.xticks(range(20), labels, rotation=45, ha="right"); plt.title("Top-20 Global SHAP Features")
    plt.ylabel("Mean |SHAP value|"); plt.tight_layout(); plt.savefig(os.path.join(output_path, "shap_global_bar.png"), dpi=300); plt.close()
    print("-> Saved SHAP global bar plot.")
    hc_mask, pd_mask = (y_true_samples == 0), (y_true_samples == 1)
    if np.any(hc_mask):
        hc_mean = shap_values[hc_mask].mean(axis=0).squeeze()
        plt.figure(); plt.imshow(hc_mean, cmap="bwr", aspect="auto"); cbar = plt.colorbar(); cbar.set_label("Mean SHAP Value")
        plt.title("Average SHAP - Healthy"); plt.savefig(os.path.join(output_path, "shap_summary_healthy.png"), dpi=300); plt.close()
    if np.any(pd_mask):
        pd_mean = shap_values[pd_mask].mean(axis=0).squeeze()
        plt.figure(); plt.imshow(pd_mean, cmap="bwr", aspect="auto"); cbar = plt.colorbar(); cbar.set_label("Mean SHAP Value")
        plt.title("Average SHAP - Parkinson's"); plt.savefig(os.path.join(output_path, "shap_summary_parkinson.png"), dpi=300); plt.close()
    print("-> Saved SHAP class heatmaps."); print("--- SHAP Analysis Complete ---")

def run_gradcam_analysis(model, X_test, y_test, output_path, num_samples=30):
    print("\n--- Running Grad-CAM Analysis ---"); os.makedirs(output_path, exist_ok=True)
    y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()
    tp_idx = np.where((y_test == 1) & (y_pred == 1))[0]; tn_idx = np.where((y_test == 0) & (y_pred == 0))[0]
    def get_avg_heatmap(indices):
        heatmaps = []
        if len(indices) == 0: return np.zeros(X_test.shape[1:3])
        for i in tqdm(indices, desc="Grad-CAM Progress", leave=False):
            img_array = X_test[i:i+1]
            with tf.GradientTape() as tape:
                final_preds, last_conv_output = model(img_array, grad_cam=True)
                tape.watch(last_conv_output); loss = final_preds[0]
            grads = tape.gradient(loss, last_conv_output)
            pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
            heatmap = last_conv_output[0] @ pooled_grads[..., tf.newaxis]; heatmap = tf.squeeze(heatmap)
            heatmap = tf.maximum(heatmap, 0) / (tf.math.reduce_max(heatmap) + 1e-10)
            heatmaps.append(heatmap.numpy())
        return np.mean(heatmaps, axis=0)
    avg_tp_heatmap = get_avg_heatmap(np.random.choice(tp_idx, min(num_samples, len(tp_idx)), replace=False))
    avg_tn_heatmap = get_avg_heatmap(np.random.choice(tn_idx, min(num_samples, len(tn_idx)), replace=False))
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    im1 = axes[0].imshow(avg_tp_heatmap, cmap='jet', aspect='auto'); axes[0].set_title(f'Avg Grad-CAM for Parkinson\'s (TP)'); cbar1 = fig.colorbar(im1, ax=axes[0]); cbar1.set_label("Activation Intensity")
    im2 = axes[1].imshow(avg_tn_heatmap, cmap='jet', aspect='auto'); axes[1].set_title(f'Avg Grad-CAM for Healthy (TN)'); cbar2 = fig.colorbar(im2, ax=axes[1]); cbar2.set_label("Activation Intensity")
    plt.suptitle("Average Model Attention by Class"); plt.tight_layout()
    plt.savefig(os.path.join(output_path, "gradcam_average_comparison.png"), dpi=300); plt.close()
    print("-> Saved average Grad-CAM comparison."); print("--- Grad-CAM Analysis Complete ---")


In [None]:
# =============================================================================
# --- Main Execution ---
# =============================================================================
if __name__ == '__main__':
    
    # --- PHASE 1: TRAINING ---
    RESULTS_PATH = os.path.join("runs", RUN_ID)
    PLOTS_PATH = os.path.join(RESULTS_PATH, "plots")
    os.makedirs(RESULTS_PATH, exist_ok=True)
    os.makedirs(PLOTS_PATH, exist_ok=True)
    print("="*80)
    print(f"🚀 STARTING TRAINING PHASE | SAVING TO: {RUN_ID}")
    print("="*80)
    
    try:
        X_train_full, y_train_full, mel_bins = load_and_combine_data(TRAIN_CONFIG)
        with open(os.path.join(RESULTS_PATH, 'run_config.json'), 'w') as f:
            json.dump({'mel_bins': mel_bins}, f)
    except ValueError as e:
        exit(f"STOPPING: {e}")

    X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=42, stratify=y_train_full)
    d1, d2 = X_train.shape[1], X_train.shape[2]
    X_train_full_2d = X_train_full.reshape((X_train_full.shape[0], d1 * d2))
    X_train_2d = X_train.reshape((X_train.shape[0], d1 * d2))
    X_val_2d = X_val.reshape((X_val.shape[0], d1 * d2))
    
    print(f"\nTraining data prepared. Train: {X_train.shape}, Validation: {X_val.shape}")
    
    # --- Train Model 1: k-NN (Baseline) ---
    print("\n" + "-"*80)
    print("TRAINING MODEL 1: k-NN (Baseline)")
    print("-"*80)
    start_time = time.time()
    
    pipeline_knn = Pipeline([('scaler', StandardScaler()), ('smote', SMOTE(random_state=42)), ('nca', NeighborhoodComponentsAnalysis(random_state=42, max_iter=200)), ('classifier', KNeighborsClassifier())])
    param_dist_knn = {'nca__n_components': [10, 20, 30, 40], 'classifier__n_neighbors': [3, 5, 7], 'classifier__weights': ['distance'], 'classifier__metric': ['manhattan']}
    search_knn = RandomizedSearchCV(pipeline_knn, param_dist_knn, n_iter=10, cv=3, scoring='accuracy', n_jobs=1, random_state=42, verbose=1)
    search_knn.fit(X_train_full_2d, y_train_full)
    joblib.dump(search_knn.best_estimator_, os.path.join(RESULTS_PATH, "model_1_knn.joblib"))
    
    end_time = time.time()
    runtime_tracker.track_runtime("Model_1_kNN", "Training", start_time, end_time)
    print("✅ Best k-NN pipeline trained and saved.")

    # --- Train Model 2: CNN (End-to-End) ---
    print("\n" + "-"*80)
    print("TRAINING MODEL 2: CNN (End-to-End)")
    print("-"*80)
    start_time = time.time()
    
    MODEL_CNN_PATH = os.path.join(RESULTS_PATH, "model_2_cnn.keras")
    model_cnn = ParkinsonDetectorModel(input_shape=(d1, d2))
    model_cnn.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])
    history_cnn = model_cnn.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE,
                                 callbacks=[ModelCheckpoint(MODEL_CNN_PATH, save_best_only=True, monitor='val_auc', mode='max', verbose=1)])
    plot_and_save_history(history_cnn, "model_2_cnn", PLOTS_PATH)
    
    end_time = time.time()
    runtime_tracker.track_runtime("Model_2_CNN", "Training", start_time, end_time)
    
    # --- Train Model 3: CNN + k-NN ---
    print("\n" + "-"*80)
    print("TRAINING MODEL 3: CNN Feature Extractor + k-NN")
    print("-"*80)
    start_time = time.time()
    
    best_model_cnn_extractor = tf.keras.models.load_model(MODEL_CNN_PATH)
    
    # Create a functional model to extract features from the 'bottleneck_features' layer
    # Fix: Create a new Input layer since the loaded model's input isn't defined yet.
    cnn_input_tensor = tf.keras.Input(shape=(d1, d2))
    cnn_output_tensor = best_model_cnn_extractor(cnn_input_tensor, extract_features=True)
    feature_extractor_model = Model(
        inputs=cnn_input_tensor,
        outputs=cnn_output_tensor
    )

    # Use predict in batches on the full training data to avoid memory issues
    X_train_features = feature_extractor_model.predict(X_train_full, batch_size=BATCH_SIZE)

    # Fit the k-NN model on the extracted features
    search_cnn_knn = RandomizedSearchCV(
        pipeline_knn, 
        param_dist_knn, 
        n_iter=10, 
        cv=3, 
        scoring='accuracy', 
        n_jobs=1, 
        random_state=42, 
        verbose=1
    )
    search_cnn_knn.fit(X_train_features, y_train_full)
    joblib.dump(search_cnn_knn.best_estimator_, os.path.join(RESULTS_PATH, "model_3_cnn_knn.joblib"))
    
    end_time = time.time()
    runtime_tracker.track_runtime("Model_3_CNN_kNN", "Training", start_time, end_time)
    print("✅ Best CNN+k-NN pipeline trained and saved.")
    
    # --- Train Model 4: NCA + CNN ---
    print("\n" + "-"*80)
    print("TRAINING MODEL 4: NCA pre-processing + CNN")
    print("-"*80)
    start_time = time.time()

    NCA_COMPONENTS = 64
    nca_img_dim = int(np.sqrt(NCA_COMPONENTS))

    # Use a pipeline to handle scaling and NCA transformation on the fly
    nca_preprocessor = Pipeline([
        ('scaler', StandardScaler()), 
        ('nca', NeighborhoodComponentsAnalysis(n_components=NCA_COMPONENTS, random_state=42, max_iter=200))
    ])

    # Fit and transform ONLY on the training split, not the full dataset
    X_train_nca = nca_preprocessor.fit_transform(X_train_2d, y_train)
    X_train_nca_3d = X_train_nca.reshape(-1, nca_img_dim, nca_img_dim)

    # Transform the validation split using the fitted preprocessor
    X_val_nca = nca_preprocessor.transform(X_val_2d)
    X_val_nca_3d = X_val_nca.reshape(-1, nca_img_dim, nca_img_dim)

    # Save the preprocessor to be used later for testing
    joblib.dump(nca_preprocessor, os.path.join(RESULTS_PATH, "model_4_nca_preprocessor.joblib"))

    MODEL_NCA_CNN_PATH = os.path.join(RESULTS_PATH, "model_4_nca_cnn.keras")
    model_nca_cnn = ParkinsonDetectorModelNCA(input_shape=(nca_img_dim, nca_img_dim))
    model_nca_cnn.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])

    history_nca_cnn = model_nca_cnn.fit(
        X_train_nca_3d, 
        y_train, 
        validation_data=(X_val_nca_3d, y_val), 
        epochs=EPOCHS, 
        batch_size=BATCH_SIZE,
        callbacks=[ModelCheckpoint(MODEL_NCA_CNN_PATH, save_best_only=True, monitor='val_auc', mode='max', verbose=1)]
    )
    plot_and_save_history(history_nca_cnn, "model_4_nca_cnn", PLOTS_PATH)

    end_time = time.time()
    runtime_tracker.track_runtime("Model_4_NCA_CNN", "Training", start_time, end_time)
    


🚀 STARTING TRAINING PHASE | SAVING TO: trained_on_Italian
--- Loading data from Italian\data\features_A_ALL.npz ---
Loaded Italian successfully. Shape: (440, 60, 94)

--- All data combined. Final shape: X=(440, 60, 94), y=(440,) ---

Training data prepared. Train: (352, 60, 94), Validation: (88, 60, 94)

--------------------------------------------------------------------------------
TRAINING MODEL 1: k-NN (Baseline)
--------------------------------------------------------------------------------
Fitting 3 folds for each of 10 candidates, totalling 30 fits
⏱️ Model_1_kNN - Training: 0.22 minutes (13.00 seconds)
✅ Best k-NN pipeline trained and saved.

--------------------------------------------------------------------------------
TRAINING MODEL 2: CNN (End-to-End)
--------------------------------------------------------------------------------
Epoch 1/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 434ms/step - accuracy: 0.5354 - auc: 0.5633 - loss: 4.0569
Epoch 1: 

TypeError: tuple indices must be integers or slices, not str

In [3]:
    # --- PHASE 2: TESTING AND EXPLAINABILITY ---
    print("\n\n" + "="*80)
    if TEST_CONFIG is None:
        print(f"🔬 TESTING ON VALIDATION SPLIT FROM: {RUN_ID}")
        X_test, y_test = X_val, y_val
    else:
        print(f"🔬 TESTING ON UNSEEN DATASET: {TEST_CONFIG} with models from {RUN_ID}")
        data = load_single_dataset(TEST_CONFIG[0])
        if data is None: 
            exit("No test data found. Aborting.")
        X_test, y_test, _ = data
    print("="*80)

    # Correctly reshape test data for models that expect 2D input
    d1, d2 = X_test.shape[1], X_test.shape[2]
    X_test_2d = X_test.reshape((X_test.shape[0], d1 * d2))
    
    test_results = {}
    
    # --- Test Model 1: k-NN ---
    print("\n" + "-"*80)
    print("EVALUATING MODEL 1: k-NN (Baseline)")
    print("-"*80)
    start_time = time.time()
    
    model_1 = joblib.load(os.path.join(RESULTS_PATH, "model_1_knn.joblib"))
    y_pred_1 = model_1.predict(X_test_2d)
    y_pred_proba_1 = model_1.predict_proba(X_test_2d)[:, 1]
    acc1 = calculate_and_save_metrics(y_test, y_pred_1, y_pred_proba_1, "model_1_knn", RESULTS_PATH, PLOTS_PATH)
    test_results["1: k-NN (Baseline)"] = acc1
    
    end_time = time.time()
    runtime_tracker.track_runtime("Model_1_kNN", "Testing", start_time, end_time)

    # --- Test Model 2: CNN ---
    print("\n" + "-"*80)
    print("EVALUATING MODEL 2: CNN (End-to-End)")
    print("-"*80)
    start_time = time.time()
    
    model_2 = tf.keras.models.load_model(os.path.join(RESULTS_PATH, "model_2_cnn.keras"))
    y_pred_proba_2 = model_2.predict(X_test)
    y_pred_2 = (y_pred_proba_2 > 0.5).astype("int32")
    acc2 = calculate_and_save_metrics(y_test, y_pred_2, y_pred_proba_2, "model_2_cnn", RESULTS_PATH, PLOTS_PATH)
    test_results["2: CNN (End-to-End)"] = acc2
    
    end_time = time.time()
    runtime_tracker.track_runtime("Model_2_CNN", "Testing", start_time, end_time)
    
    # --- Test Model 3: CNN + k-NN ---
    print("\n" + "-"*80)
    print("EVALUATING MODEL 3: CNN + k-NN")
    print("-"*80)
    start_time = time.time()
    
    model_3_extractor = tf.keras.models.load_model(os.path.join(RESULTS_PATH, "model_2_cnn.keras"))
    model_3_knn = joblib.load(os.path.join(RESULTS_PATH, "model_3_cnn_knn.joblib"))
    # Use the feature extractor to get features from the test data
    # Fix: Use the `feature_extractor_model` created earlier for consistency
    X_test_features = feature_extractor_model.predict(X_test, batch_size=BATCH_SIZE)
    y_pred_3 = model_3_knn.predict(X_test_features)
    y_pred_proba_3 = model_3_knn.predict_proba(X_test_features)[:, 1]
    acc3 = calculate_and_save_metrics(y_test, y_pred_3, y_pred_proba_3, "model_3_cnn_knn", RESULTS_PATH, PLOTS_PATH)
    test_results["3: CNN + k-NN"] = acc3
    
    end_time = time.time()
    runtime_tracker.track_runtime("Model_3_CNN_kNN", "Testing", start_time, end_time)
    
    # --- Test Model 4: NCA + CNN ---
    print("\n" + "-"*80)
    print("EVALUATING MODEL 4: NCA + CNN")
    print("-"*80)
    start_time = time.time()
    
    model_4_preprocessor = joblib.load(os.path.join(RESULTS_PATH, "model_4_nca_preprocessor.joblib"))
    model_4_cnn = tf.keras.models.load_model(os.path.join(RESULTS_PATH, "model_4_nca_cnn.keras"))
    
    # Apply the preprocessor to the test data
    X_test_nca = model_4_preprocessor.transform(X_test_2d)
    nca_img_dim = int(np.sqrt(X_test_nca.shape[1]))
    X_test_nca_3d = X_test_nca.reshape(-1, nca_img_dim, nca_img_dim)
    
    y_pred_proba_4 = model_4_cnn.predict(X_test_nca_3d)
    y_pred_4 = (y_pred_proba_4 > 0.5).astype("int32")
    acc4 = calculate_and_save_metrics(y_test, y_pred_4, y_pred_proba_4, "model_4_nca_cnn", RESULTS_PATH, PLOTS_PATH)
    test_results["4: NCA + CNN"] = acc4
    
    end_time = time.time()
    runtime_tracker.track_runtime("Model_4_NCA_CNN", "Testing", start_time, end_time)
    
    # --- Explainability for CNN-based models ---
    print("\n\n" + "="*80)
    print("🔬 RUNNING EXPLAINABILITY ANALYSIS")
    print("="*80)
    explain_path = os.path.join(PLOTS_PATH, "explainability")
    print("\n--- Explaining Model 2 (CNN) and Model 3 (CNN+k-NN) Feature Extractor ---")
    
    start_time = time.time()
    run_full_shap_analysis(model_2, X_train, X_test, y_test, os.path.join(explain_path, "model_2_cnn"), mel_bins)
    run_gradcam_analysis(model_2, X_test, y_test, os.path.join(explain_path, "model_2_cnn"))
    end_time = time.time()
    runtime_tracker.track_runtime("Model_2_CNN", "Explainability", start_time, end_time)
    
    print("\n--- Explaining Model 4 (NCA + CNN) ---")
    start_time = time.time()
    
    # Note: SHAP analysis for Model 4 should use the NCA-transformed training and test data
    run_full_shap_analysis(model_4_cnn, X_train_nca_3d, X_test_nca_3d, y_test, os.path.join(explain_path, "model_4_nca_cnn"), mel_bins=0)
    run_gradcam_analysis(model_4_cnn, X_test_nca_3d, y_test, os.path.join(explain_path, "model_4_nca_cnn"))
    end_time = time.time()
    runtime_tracker.track_runtime("Model_4_NCA_CNN", "Explainability", start_time, end_time)
    
    # --- Save Runtime Summary ---
    runtime_tracker.save_runtime_summary()
    
    # --- Final Summary ---
    print("\n\n" + "="*80)
    print(f"🏆 FINAL SUMMARY - {RUN_ID}")
    print("="*80)
    summary_df = pd.read_csv(os.path.join(RESULTS_PATH, "evaluation_metrics.csv"))
    print(summary_df.to_string())
    best_model_row = summary_df.loc[summary_df['Accuracy'].idxmax()]
    print("-" * 80)
    print(f"\n🚀 Best Performing Model: '{best_model_row['Model']}' with an accuracy of {best_model_row['Accuracy']:.4f}")
    print("="*80)




🔬 TESTING ON UNSEEN DATASET: ({'dataset': 'Neurovoz', 'mode': 'A', 'feature_mode': 'ALL'},) with models from trained_on_Italian
--- Loading data from Neurovoz\data\features_A_ALL.npz ---
Loaded Neurovoz successfully. Shape: (1064, 60, 94)

--------------------------------------------------------------------------------
EVALUATING MODEL 1: k-NN (Baseline)
--------------------------------------------------------------------------------
✅ Saved all metrics and plots for model_1_knn.
⏱️ Model_1_kNN - Testing: 0.03 minutes (1.94 seconds)

--------------------------------------------------------------------------------
EVALUATING MODEL 2: CNN (End-to-End)
--------------------------------------------------------------------------------
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 130ms/step
✅ Saved all metrics and plots for model_2_cnn.
⏱️ Model_2_CNN - Testing: 0.13 minutes (7.81 seconds)

--------------------------------------------------------------------------------
E

Expected: keras_tensor_12
Received: inputs=['Tensor(shape=(50, 60, 94))']


-> Saved SHAP global bar plot.
-> Saved SHAP class heatmaps.
--- SHAP Analysis Complete ---

--- Running Grad-CAM Analysis ---
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 108ms/step


                                                                  

-> Saved average Grad-CAM comparison.
--- Grad-CAM Analysis Complete ---
⏱️ Model_2_CNN - Explainability: 1.64 minutes (98.63 seconds)

--- Explaining Model 4 (NCA + CNN) ---

--- Running SHAP Analysis ---


Expected: keras_tensor_14
Received: inputs=['Tensor(shape=(50, 8, 8))']


-> Saved SHAP global bar plot.
-> Saved SHAP class heatmaps.
--- SHAP Analysis Complete ---

--- Running Grad-CAM Analysis ---
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


                                                                  

-> Saved average Grad-CAM comparison.
--- Grad-CAM Analysis Complete ---
⏱️ Model_4_NCA_CNN - Explainability: 0.33 minutes (19.68 seconds)

📊 Runtime Summary:
          Model  Total_Minutes  Avg_Minutes  Total_Seconds  Avg_Seconds
    Model_1_kNN          0.249        0.124         14.936        7.468
    Model_2_CNN          4.679        1.560        280.718       93.573
Model_3_CNN_kNN          0.284        0.142         17.069        8.535
Model_4_NCA_CNN          0.852        0.284         51.137       17.046
✅ Runtime data saved to: d:\Projects\Voice\Parkinson-s-Disease-Detector-Using-AI\Parkinson-s-Disease-Detector-Using-AI\1\runs\trained_on_Italian\model_runtimes.csv
✅ Runtime summary saved to: d:\Projects\Voice\Parkinson-s-Disease-Detector-Using-AI\Parkinson-s-Disease-Detector-Using-AI\1\runs\trained_on_Italian\runtime_summary.csv


🏆 FINAL SUMMARY - trained_on_Italian
             Model  Accuracy       AUC  F1-Score  Precision  Sensitivity (Recall)   TP   TN   FP   FN
0      m