In [None]:
# @title Cell 1: Environment Setup & Data Loading

"""
ASD Detection Project: Baseline Experiments
Cell 1: Environment setup and load preprocessed datasets
Establishes computational environment and verifies data integrity
"""

print("="*80)
print("BASELINE EXPERIMENTS - CELL 1: ENVIRONMENT SETUP")
print("="*80)

# ==========================================
# 1. GOOGLE DRIVE MOUNTING
# ==========================================

print("\n[STEP 1/10] Mounting Google Drive...")
print("-" * 50)

try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("SUCCESS: Google Drive mounted")
except Exception as e:
    print(f"ERROR: Drive mounting failed - {e}")
    raise

# ==========================================
# 2. PROJECT PATH CONFIGURATION
# ==========================================

print("\n[STEP 2/10] Configuring project paths...")
print("-" * 50)

import os

PROJECT_ROOT = '/content/drive/MyDrive/ASD_GWO_XGBoost_Project'

PROJECT_PATHS = {
    'root': PROJECT_ROOT,
    'dataset': f"{PROJECT_ROOT}/01_Dataset",
    'preprocessed': f"{PROJECT_ROOT}/01_Dataset/splits/no_ethnicity/preprocessed",
    'results': f"{PROJECT_ROOT}/03_Results",
    'baseline_results': f"{PROJECT_ROOT}/03_Results/output_notebook_02/cell_1_setup"
}

for name, path in PROJECT_PATHS.items():
    if os.path.exists(path):
        print(f"  FOUND: {name}")
    elif name in ['results', 'baseline_results']:
        os.makedirs(path, exist_ok=True)
        print(f"  CREATED: {name}")
    else:
        raise FileNotFoundError(f"CRITICAL: Missing {name} directory")

PREPROCESSED_FILES = {
    'train': f"{PROJECT_PATHS['preprocessed']}/train_set_preprocessed.csv",
    'val': f"{PROJECT_PATHS['preprocessed']}/val_set_preprocessed.csv",
    'test': f"{PROJECT_PATHS['preprocessed']}/test_set_preprocessed.csv"
}

print("\nVerifying preprocessed files exist...")
for split_name, file_path in PREPROCESSED_FILES.items():
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"CRITICAL: Missing {split_name}_set_preprocessed.csv")
    file_size = os.path.getsize(file_path) / (1024**2)
    print(f"  {split_name.upper()}: {file_size:.2f} MB")

# ==========================================
# 3. LIBRARY IMPORTS
# ==========================================

print("\n[STEP 3/10] Importing libraries...")
print("-" * 50)

import pandas as pd
import numpy as np
import json
import warnings
warnings.filterwarnings('ignore')

from sklearn.feature_selection import SelectKBest, RFECV, f_classif
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, log_loss,
    confusion_matrix, roc_curve
)
import xgboost as xgb
from scipy.stats import wilcoxon

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns

import time
from datetime import datetime

print("SUCCESS: All libraries imported")

# ==========================================
# 4. GLOBAL CONFIGURATION
# ==========================================

print("\n[STEP 4/10] Setting global configuration...")
print("-" * 50)

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

CONFIG = {
    'random_state': RANDOM_STATE,
    'cv_folds': 5,
    'scoring_metric': 'roc_auc',
    'target_column': 'ASD_traits',
    'n_jobs': -1,
    'n_runs': 10
}

print(f"Random state: {CONFIG['random_state']}")
print(f"Primary metric: {CONFIG['scoring_metric']}")
print(f"CV folds: {CONFIG['cv_folds']}")
print(f"Runs per baseline: {CONFIG['n_runs']}")

# ==========================================
# 5. XGBOOST CONFIGURATIONS
# ==========================================

print("\n[STEP 5/10] Defining XGBoost configurations...")
print("-" * 50)

XGBOOST_UNIFIED_CONFIG = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'n_estimators': 500,
    'max_depth': 8,
    'learning_rate': 0.05,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'tree_method': 'gpu_hist',
    'predictor': 'gpu_predictor',
    'random_state': RANDOM_STATE,
    'n_jobs': -1,
    'verbosity': 0
}

XGBOOST_RFECV_CONFIG = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'n_estimators': 150,
    'max_depth': 6,
    'learning_rate': 0.1,
    'subsample': 1.0,
    'colsample_bytree': 1.0,
    'tree_method': 'gpu_hist',
    'predictor': 'gpu_predictor',
    'random_state': RANDOM_STATE,
    'n_jobs': -1,
    'verbosity': 0
}

SELECTKBEST_CONFIG = {
    'selection_method': 'f_classif',
    'k_values': [8, 12, 16, 20],
    'scoring_function': f_classif,
    'selection_strategy': 'statistical_ranking'
}

RFECV_CONFIG = {
    'step': 5,
    'cv_folds': 3,
    'min_features_to_select': 8,
    'scoring': 'roc_auc',
    'n_jobs': -1
}

print("Unified XGBoost config (final model):")
print(f"  n_estimators={XGBOOST_UNIFIED_CONFIG['n_estimators']}, max_depth={XGBOOST_UNIFIED_CONFIG['max_depth']}, lr={XGBOOST_UNIFIED_CONFIG['learning_rate']}")
print(f"  subsample={XGBOOST_UNIFIED_CONFIG['subsample']}, colsample={XGBOOST_UNIFIED_CONFIG['colsample_bytree']}")
print("\nRFECV XGBoost config (feature selection):")
print(f"  n_estimators={XGBOOST_RFECV_CONFIG['n_estimators']}, max_depth={XGBOOST_RFECV_CONFIG['max_depth']}, lr={XGBOOST_RFECV_CONFIG['learning_rate']}")
print(f"  subsample={XGBOOST_RFECV_CONFIG['subsample']}, colsample={XGBOOST_RFECV_CONFIG['colsample_bytree']}")

# ==========================================
# 6. UTILITY FUNCTIONS
# ==========================================

print("\n[STEP 6/10] Defining utility functions...")
print("-" * 50)

def save_results(results, filepath):
    """Save results dictionary to JSON file"""
    def json_serializer(obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif pd.isna(obj):
            return None
        return str(obj)

    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(filepath, 'w') as f:
        json.dump(results, f, indent=2, default=json_serializer)

def calculate_metrics(y_true, y_pred, y_proba):
    """Calculate comprehensive evaluation metrics"""
    return {
        'accuracy': float(accuracy_score(y_true, y_pred)),
        'precision_macro': float(precision_score(y_true, y_pred, average='macro', zero_division=0)),
        'recall_macro': float(recall_score(y_true, y_pred, average='macro', zero_division=0)),
        'f1_macro': float(f1_score(y_true, y_pred, average='macro', zero_division=0)),
        'roc_auc': float(roc_auc_score(y_true, y_proba)),
        'log_loss': float(log_loss(y_true, y_proba))
    }

def create_confusion_matrix_plot(y_true, y_pred, title, save_path):
    """Create and save confusion matrix visualization"""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True,
                xticklabels=['Non-ASD', 'ASD'],
                yticklabels=['Non-ASD', 'ASD'])
    plt.title(title, fontweight='bold', pad=20)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()

def create_roc_curve_plot(y_true, y_proba, title, save_path):
    """Create and save ROC curve visualization"""
    fpr, tpr, _ = roc_curve(y_true, y_proba)
    auc_score = roc_auc_score(y_true, y_proba)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {auc_score:.4f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title, fontweight='bold', pad=20)
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()

print("SUCCESS: Utility functions defined")

# ==========================================
# 7. LOAD PREPROCESSED DATA
# ==========================================

print("\n[STEP 7/10] Loading preprocessed datasets...")
print("-" * 50)

train_df = pd.read_csv(PREPROCESSED_FILES['train'])
val_df = pd.read_csv(PREPROCESSED_FILES['val'])
test_df = pd.read_csv(PREPROCESSED_FILES['test'])

X_train = train_df.drop(columns=[CONFIG['target_column']])
y_train = train_df[CONFIG['target_column']]
X_val = val_df.drop(columns=[CONFIG['target_column']])
y_val = val_df[CONFIG['target_column']]
X_test = test_df.drop(columns=[CONFIG['target_column']])
y_test = test_df[CONFIG['target_column']]

print(f"Train: {X_train.shape[0]:,} samples x {X_train.shape[1]} features")
print(f"Val:   {X_val.shape[0]:,} samples x {X_val.shape[1]} features")
print(f"Test:  {X_test.shape[0]:,} samples x {X_test.shape[1]} features")

# ==========================================
# 8. DATA INTEGRITY VERIFICATION
# ==========================================

print("\n[STEP 8/10] Verifying data integrity...")
print("-" * 50)

EXPECTED_SAMPLES = {'train': 1270, 'val': 318, 'test': 397}
EXPECTED_FEATURES = 25

actual_samples = {
    'train': X_train.shape[0],
    'val': X_val.shape[0],
    'test': X_test.shape[0]
}

print("Sample count verification:")
for split, expected in EXPECTED_SAMPLES.items():
    actual = actual_samples[split]
    status = "PASS" if actual == expected else "FAIL"
    print(f"  {split.upper()}: {actual:,} (expected {expected:,}) - {status}")
    if actual != expected:
        raise ValueError(f"Sample count mismatch in {split} set")

print("\nFeature count verification:")
for split, X in [('train', X_train), ('val', X_val), ('test', X_test)]:
    n_features = X.shape[1]
    status = "PASS" if n_features == EXPECTED_FEATURES else "FAIL"
    print(f"  {split.upper()}: {n_features} features (expected {EXPECTED_FEATURES}) - {status}")
    if n_features != EXPECTED_FEATURES:
        raise ValueError(f"Feature count mismatch in {split} set")

EXCLUDED_COLUMNS = ['CASE_NO_PATIENTS', 'Ethnicity']
print("\nVerifying excluded columns removed:")
for col in EXCLUDED_COLUMNS:
    if col in X_train.columns:
        raise ValueError(f"Column '{col}' should have been removed")
print(f"  CONFIRMED: {EXCLUDED_COLUMNS} not present")

print("\nTarget encoding verification:")
for split, y in [('train', y_train), ('val', y_val), ('test', y_test)]:
    unique_values = sorted(y.unique())
    if unique_values != [0, 1]:
        raise ValueError(f"Target encoding incorrect in {split} set: {unique_values}")
print("  CONFIRMED: Target encoded as 0=Non-ASD, 1=ASD")

print("\nData quality checks:")
for split, X in [('train', X_train), ('val', X_val), ('test', X_test)]:
    missing = X.isnull().sum().sum()
    infinite = np.isinf(X.select_dtypes(include=[np.number])).sum().sum()
    print(f"  {split.upper()}: missing={missing}, infinite={infinite} - {'PASS' if missing==0 and infinite==0 else 'FAIL'}")
    if missing > 0 or infinite > 0:
        raise ValueError(f"Data quality issues in {split} set")

print("\nClass distribution:")
for split, y in [('train', y_train), ('val', y_val), ('test', y_test)]:
    dist = y.value_counts().to_dict()
    total = len(y)
    print(f"  {split.upper()}: Non-ASD={dist.get(0,0)} ({dist.get(0,0)/total*100:.1f}%), ASD={dist.get(1,0)} ({dist.get(1,0)/total*100:.1f}%)")

# ==========================================
# 9. SYSTEM INFORMATION
# ==========================================

print("\n[STEP 9/10] System configuration...")
print("-" * 50)

import sys
print(f"Python: {sys.version.split()[0]}")
print(f"Pandas: {pd.__version__}")
print(f"NumPy: {np.__version__}")
print(f"XGBoost: {xgb.__version__}")
print(f"Matplotlib backend: {matplotlib.get_backend()}")

try:
    import subprocess
    gpu_info = subprocess.check_output(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader']).decode('utf-8').strip()
    print(f"GPU: {gpu_info}")
    GPU_AVAILABLE = True
except:
    print("GPU: Not available")
    GPU_AVAILABLE = False

# ==========================================
# 10. EXPORT SETUP INFORMATION
# ==========================================

print("\n[STEP 10/10] Saving setup information...")
print("-" * 50)

setup_info = {
    'timestamp': datetime.now().isoformat(),
    'cell': 'Cell 1: Environment Setup & Data Loading',
    'project_paths': PROJECT_PATHS,
    'configuration': CONFIG,
    'xgboost_unified_config': XGBOOST_UNIFIED_CONFIG,
    'xgboost_rfecv_config': XGBOOST_RFECV_CONFIG,
    'selectkbest_config': {k: v for k, v in SELECTKBEST_CONFIG.items() if k != 'scoring_function'},
    'rfecv_config': RFECV_CONFIG,
    'data_verification': {
        'expected_samples': EXPECTED_SAMPLES,
        'actual_samples': actual_samples,
        'expected_features': EXPECTED_FEATURES,
        'excluded_columns_verified': True,
        'target_encoding_verified': True,
        'data_quality_passed': True
    },
    'system_info': {
        'python_version': sys.version.split()[0],
        'pandas_version': pd.__version__,
        'numpy_version': np.__version__,
        'xgboost_version': xgb.__version__,
        'matplotlib_backend': matplotlib.get_backend(),
        'gpu_available': GPU_AVAILABLE
    },
    'status': 'completed'
}

setup_path = f"{PROJECT_PATHS['baseline_results']}/setup_info.json"
save_results(setup_info, setup_path)
print(f"Setup information saved: {setup_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("CELL 1 COMPLETED SUCCESSFULLY")
print("="*80)

print("\nENVIRONMENT STATUS:")
print(f"  Project root: {PROJECT_ROOT}")
print(f"  Results directory: {PROJECT_PATHS['baseline_results']}")
print(f"  GPU available: {GPU_AVAILABLE}")

print("\nDATA STATUS:")
print(f"  Train: {X_train.shape[0]:,} samples x {X_train.shape[1]} features")
print(f"  Validation: {X_val.shape[0]:,} samples x {X_val.shape[1]} features")
print(f"  Test: {X_test.shape[0]:,} samples x {X_test.shape[1]} features")
print(f"  All integrity checks: PASSED")

print("\nCONFIGURATION:")
print(f"  Random state: {CONFIG['random_state']}")
print(f"  Scoring metric: {CONFIG['scoring_metric']}")
print(f"  Runs per baseline: {CONFIG['n_runs']}")

print("\nNEXT STEPS:")
print("  Cell 2: Baseline 1 - XGBoost with All Features")
print("  Cell 3: Baseline 2 - XGBoost with SelectKBest")
print("  Cell 4: Baseline 3 - XGBoost with RFECV")

print("\n" + "="*80)
print("READY FOR BASELINE EXPERIMENTS")
print("="*80)

BASELINE EXPERIMENTS - CELL 1: ENVIRONMENT SETUP

[STEP 1/10] Mounting Google Drive...
--------------------------------------------------
Mounted at /content/drive
SUCCESS: Google Drive mounted

[STEP 2/10] Configuring project paths...
--------------------------------------------------
  FOUND: root
  FOUND: dataset
  FOUND: preprocessed
  FOUND: results
  FOUND: baseline_results

Verifying preprocessed files exist...
  TRAIN: 0.17 MB
  VAL: 0.04 MB
  TEST: 0.05 MB

[STEP 3/10] Importing libraries...
--------------------------------------------------
SUCCESS: All libraries imported

[STEP 4/10] Setting global configuration...
--------------------------------------------------
Random state: 42
Primary metric: roc_auc
CV folds: 5
Runs per baseline: 10

[STEP 5/10] Defining XGBoost configurations...
--------------------------------------------------
Unified XGBoost config (final model):
  n_estimators=500, max_depth=8, lr=0.05
  subsample=0.8, colsample=0.8

RFECV XGBoost config (feature 

In [None]:
# @title Cell 2: Baseline 1 Training - XGBoost with All Features

"""
ASD Detection Project: Baseline Experiments
Cell 2: Baseline 1 - XGBoost trained with all features (no feature selection)
10 independent runs with unified XGBoost configuration
Control group for feature selection comparison
"""

print("="*80)
print("BASELINE 1 TRAINING: XGBOOST WITH ALL FEATURES")
print("="*80)

# ==========================================
# 1. VERIFY PREREQUISITES
# ==========================================

print("\n[STEP 1/7] Verifying Cell 1 completion...")
print("-" * 50)

try:
    assert 'X_train' in dir() and X_train.shape[1] == 25
    assert 'X_val' in dir() and X_val.shape[0] == 318
    assert 'XGBOOST_UNIFIED_CONFIG' in dir()
    assert 'CONFIG' in dir() and CONFIG['n_runs'] == 10
    print(f"VERIFIED: Training data ({X_train.shape[0]} samples x {X_train.shape[1]} features)")
    print(f"VERIFIED: Validation data ({X_val.shape[0]} samples)")
    print(f"VERIFIED: XGBoost unified config loaded")
    print(f"VERIFIED: Number of runs = {CONFIG['n_runs']}")
except (NameError, AssertionError) as e:
    raise RuntimeError("ERROR: Cell 1 must be executed first") from e

# ==========================================
# 2. EXPERIMENT SETUP
# ==========================================

print("\n[STEP 2/7] Setting up Baseline 1 experiment...")
print("-" * 50)

BASELINE_1_DIR = f"{PROJECT_PATHS['baseline_results']}/baseline_1_all_features"
MODELS_DIR = f"{BASELINE_1_DIR}/models"
os.makedirs(MODELS_DIR, exist_ok=True)

print(f"Experiment directory: {BASELINE_1_DIR}")
print(f"Models directory: {MODELS_DIR}")

print("\nExperiment configuration:")
print(f"  Method: No feature selection (all {X_train.shape[1]} features)")
print(f"  Number of runs: {CONFIG['n_runs']}")
print(f"  Random seeds: {CONFIG['random_state']} to {CONFIG['random_state'] + CONFIG['n_runs'] - 1}")
print(f"  XGBoost config: n_estimators={XGBOOST_UNIFIED_CONFIG['n_estimators']}, "
      f"max_depth={XGBOOST_UNIFIED_CONFIG['max_depth']}, "
      f"learning_rate={XGBOOST_UNIFIED_CONFIG['learning_rate']}")

# ==========================================
# 3. TRAINING LOOP - 10 INDEPENDENT RUNS
# ==========================================

print("\n[STEP 3/7] Training 10 independent models...")
print("-" * 50)

training_results = {
    'experiment_info': {
        'name': 'Baseline 1: XGBoost with All Features',
        'description': 'Control group - no feature selection applied',
        'timestamp': datetime.now().isoformat(),
        'n_runs': CONFIG['n_runs']
    },
    'configuration': {
        'xgboost_config': XGBOOST_UNIFIED_CONFIG,
        'n_features': X_train.shape[1],
        'feature_names': X_train.columns.tolist()
    },
    'training_results': {
        'runs': [],
        'statistics': {}
    },
    'best_model': {}
}

start_time_total = time.time()
validation_aucs = []

for run_id in range(CONFIG['n_runs']):
    print(f"\nRun {run_id + 1}/{CONFIG['n_runs']} (seed={CONFIG['random_state'] + run_id})")
    print("-" * 40)

    start_time_run = time.time()

    # Initialize model with unique random seed
    model_config = XGBOOST_UNIFIED_CONFIG.copy()
    model_config['random_state'] = CONFIG['random_state'] + run_id

    model = xgb.XGBClassifier(**model_config)

    # Train on all features
    print(f"  Training on {X_train.shape[1]} features...")
    model.fit(X_train, y_train, verbose=False)

    # Predict on validation set
    y_val_pred = model.predict(X_val)
    y_val_proba = model.predict_proba(X_val)[:, 1]

    # Calculate validation metrics
    val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)

    # Calculate training time
    training_time = time.time() - start_time_run

    # Save model
    model_path = f"{MODELS_DIR}/run_{run_id}_model.json"
    model.save_model(model_path)

    # Store results
    run_result = {
        'run_id': run_id,
        'random_state': CONFIG['random_state'] + run_id,
        'training_time_seconds': round(training_time, 2),
        'validation_metrics': val_metrics,
        'model_path': model_path
    }

    training_results['training_results']['runs'].append(run_result)
    validation_aucs.append(val_metrics['roc_auc'])

    print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
    print(f"  Validation Accuracy: {val_metrics['accuracy']:.4f}")
    print(f"  Training time: {training_time:.2f}s")
    print(f"  Model saved: {model_path}")

total_training_time = time.time() - start_time_total

print(f"\nTotal training time: {total_training_time:.2f}s ({total_training_time/60:.2f} min)")

# ==========================================
# 4. STATISTICAL SUMMARY
# ==========================================

print("\n[STEP 4/7] Computing statistical summary...")
print("-" * 50)

validation_aucs_array = np.array(validation_aucs)

statistics = {
    'mean_val_auc': float(np.mean(validation_aucs_array)),
    'std_val_auc': float(np.std(validation_aucs_array, ddof=1)),
    'min_val_auc': float(np.min(validation_aucs_array)),
    'max_val_auc': float(np.max(validation_aucs_array)),
    'median_val_auc': float(np.median(validation_aucs_array)),
    'best_run_id': int(np.argmax(validation_aucs_array)),
    'total_training_time_seconds': round(total_training_time, 2)
}

training_results['training_results']['statistics'] = statistics

print("Validation AUC statistics across 10 runs:")
print(f"  Mean:   {statistics['mean_val_auc']:.4f}")
print(f"  Std:    {statistics['std_val_auc']:.4f}")
print(f"  Min:    {statistics['min_val_auc']:.4f}")
print(f"  Max:    {statistics['max_val_auc']:.4f}")
print(f"  Median: {statistics['median_val_auc']:.4f}")
print(f"\nBest run: Run {statistics['best_run_id']} "
      f"(AUC = {statistics['max_val_auc']:.4f})")

# ==========================================
# 5. IDENTIFY AND SAVE BEST MODEL
# ==========================================

print("\n[STEP 5/7] Saving best model...")
print("-" * 50)

best_run_id = statistics['best_run_id']
best_run_result = training_results['training_results']['runs'][best_run_id]

# Load and save best model separately
best_model = xgb.XGBClassifier()
best_model.load_model(best_run_result['model_path'])

best_model_path = f"{BASELINE_1_DIR}/baseline_1_best_model.json"
best_model.save_model(best_model_path)

training_results['best_model'] = {
    'run_id': best_run_id,
    'random_state': best_run_result['random_state'],
    'validation_auc': best_run_result['validation_metrics']['roc_auc'],
    'validation_metrics': best_run_result['validation_metrics'],
    'model_path': best_model_path
}

print(f"Best model: Run {best_run_id}")
print(f"  Validation AUC: {best_run_result['validation_metrics']['roc_auc']:.4f}")
print(f"  Validation Accuracy: {best_run_result['validation_metrics']['accuracy']:.4f}")
print(f"  Saved to: {best_model_path}")

# ==========================================
# 6. FEATURE IMPORTANCE ANALYSIS
# ==========================================

print("\n[STEP 6/7] Analyzing feature importance (best model)...")
print("-" * 50)

feature_importance = best_model.feature_importances_
feature_names = X_train.columns.tolist()

importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': feature_importance
}).sort_values('importance', ascending=False)

print("\nTop 10 most important features:")
for idx, (_, row) in enumerate(importance_df.head(10).iterrows(), 1):
    print(f"  {idx:2d}. {row['feature']:<25s} {row['importance']:.4f}")

importance_csv_path = f"{BASELINE_1_DIR}/feature_importance.csv"
importance_df.to_csv(importance_csv_path, index=False)
print(f"\nFeature importance saved: {importance_csv_path}")

# ==========================================
# 7. SAVE TRAINING RESULTS
# ==========================================

print("\n[STEP 7/7] Saving training results...")
print("-" * 50)

results_path = f"{BASELINE_1_DIR}/baseline_1_training_results.json"
save_results(training_results, results_path)

print(f"Training results saved: {results_path}")

# Generate summary statistics file
summary = {
    'experiment': 'Baseline 1: XGBoost with All Features',
    'n_features': X_train.shape[1],
    'n_runs': CONFIG['n_runs'],
    'validation_auc': {
        'mean': statistics['mean_val_auc'],
        'std': statistics['std_val_auc'],
        'range': [statistics['min_val_auc'], statistics['max_val_auc']]
    },
    'best_run': {
        'run_id': best_run_id,
        'validation_auc': statistics['max_val_auc']
    },
    'training_time_total_minutes': round(total_training_time / 60, 2)
}

summary_path = f"{BASELINE_1_DIR}/training_summary.json"
save_results(summary, summary_path)
print(f"Training summary saved: {summary_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("BASELINE 1 TRAINING COMPLETED SUCCESSFULLY")
print("="*80)

print("\nEXPERIMENT SUMMARY:")
print(f"  Method: XGBoost with all features (no selection)")
print(f"  Features used: {X_train.shape[1]} features")
print(f"  Training runs: {CONFIG['n_runs']}")
print(f"  Total training time: {total_training_time/60:.2f} minutes")

print("\nVALIDATION PERFORMANCE:")
print(f"  Mean AUC: {statistics['mean_val_auc']:.4f} ± {statistics['std_val_auc']:.4f}")
print(f"  Best AUC: {statistics['max_val_auc']:.4f} (Run {best_run_id})")
print(f"  AUC range: [{statistics['min_val_auc']:.4f}, {statistics['max_val_auc']:.4f}]")

print("\nOUTPUTS GENERATED:")
print(f"  Training results: {results_path}")
print(f"  Best model: {best_model_path}")
print(f"  All models: {MODELS_DIR}/ (10 models)")
print(f"  Feature importance: {importance_csv_path}")

print("\nNEXT STEPS:")
print("  Cell 3: Baseline 2 - XGBoost with SelectKBest")
print("  Cell 5: Baseline 1 Testing - Test set evaluation")

print("\n" + "="*80)
print("READY FOR NEXT BASELINE EXPERIMENT")
print("="*80)

BASELINE 1 TRAINING: XGBOOST WITH ALL FEATURES

[STEP 1/7] Verifying Cell 1 completion...
--------------------------------------------------
VERIFIED: Training data (1270 samples x 25 features)
VERIFIED: Validation data (318 samples)
VERIFIED: XGBoost unified config loaded
VERIFIED: Number of runs = 10

[STEP 2/7] Setting up Baseline 1 experiment...
--------------------------------------------------
Experiment directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_1_setup/baseline_1_all_features
Models directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_1_setup/baseline_1_all_features/models

Experiment configuration:
  Method: No feature selection (all 25 features)
  Number of runs: 10
  Random seeds: 42 to 51
  XGBoost config: n_estimators=500, max_depth=8, learning_rate=0.05

[STEP 3/7] Training 10 independent models...
--------------------------------------------------

Run 1/10 (seed=42)
---------------

XGBoostError: Invalid Input: 'gpu_hist', valid values are: {'approx', 'auto', 'exact', 'hist'}

In [None]:
# @title Cell 3: Baseline 2 Training - XGBoost with SelectKBest

"""
ASD Detection Project: Baseline Experiments
Cell 3: Baseline 2 - XGBoost with SelectKBest (Filter Method)
Phase 1: K-value optimization (k=[8,12,16,20])
Phase 2: 10 independent runs with optimal k
Statistical feature selection using ANOVA F-test
"""

print("="*80)
print("BASELINE 2 TRAINING: XGBOOST WITH SELECTKBEST")
print("="*80)

# ==========================================
# 1. VERIFY PREREQUISITES
# ==========================================

print("\n[STEP 1/8] Verifying Cell 1 completion...")
print("-" * 50)

try:
    assert 'X_train' in dir() and X_train.shape[1] == 25
    assert 'X_val' in dir() and X_val.shape[0] == 318
    assert 'XGBOOST_UNIFIED_CONFIG' in dir()
    assert 'SELECTKBEST_CONFIG' in dir()
    assert 'CONFIG' in dir() and CONFIG['n_runs'] == 10
    print(f"VERIFIED: Training data ({X_train.shape[0]} samples x {X_train.shape[1]} features)")
    print(f"VERIFIED: Validation data ({X_val.shape[0]} samples)")
    print(f"VERIFIED: SelectKBest config (k_values={SELECTKBEST_CONFIG['k_values']})")
    print(f"VERIFIED: Number of runs = {CONFIG['n_runs']}")
except (NameError, AssertionError) as e:
    raise RuntimeError("ERROR: Cell 1 must be executed first") from e

# ==========================================
# 2. EXPERIMENT SETUP
# ==========================================

print("\n[STEP 2/8] Setting up Baseline 2 experiment...")
print("-" * 50)

BASELINE_2_DIR = f"{PROJECT_PATHS['root']}/03_Results/output_notebook_02/cell_3_baseline2"
MODELS_DIR = f"{BASELINE_2_DIR}/models"
os.makedirs(MODELS_DIR, exist_ok=True)

print(f"Experiment directory: {BASELINE_2_DIR}")
print(f"Models directory: {MODELS_DIR}")

print("\nExperiment configuration:")
print(f"  Method: SelectKBest (Filter - ANOVA F-test)")
print(f"  K-values to test: {SELECTKBEST_CONFIG['k_values']}")
print(f"  Number of runs: {CONFIG['n_runs']}")
print(f"  Selection: Deterministic (same features per k)")

# ==========================================
# 3. PHASE 1: K-VALUE OPTIMIZATION
# ==========================================

print("\n[STEP 3/8] Phase 1: K-value optimization...")
print("-" * 50)

k_value_results = {}

for k in SELECTKBEST_CONFIG['k_values']:
    print(f"\nTesting k={k} features...")
    print("-" * 40)

    # Initialize SelectKBest
    selector = SelectKBest(score_func=SELECTKBEST_CONFIG['scoring_function'], k=k)

    # Fit on training data and transform
    X_train_selected = selector.fit_transform(X_train, y_train)
    X_val_selected = selector.transform(X_val)

    # Get selected features
    selected_mask = selector.get_support()
    selected_features = X_train.columns[selected_mask].tolist()
    f_scores = selector.scores_[selected_mask]

    print(f"  Selected {len(selected_features)} features")
    print(f"  Top 3: {selected_features[:3]}")

    # Train single XGBoost model with unified config
    model = xgb.XGBClassifier(**XGBOOST_UNIFIED_CONFIG)
    model.fit(X_train_selected, y_train, verbose=False)

    # Evaluate on validation set
    y_val_pred = model.predict(X_val_selected)
    y_val_proba = model.predict_proba(X_val_selected)[:, 1]
    val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)

    print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
    print(f"  Validation Accuracy: {val_metrics['accuracy']:.4f}")

    # Store results
    k_value_results[k] = {
        'val_auc': val_metrics['roc_auc'],
        'val_metrics': val_metrics,
        'selected_features': selected_features,
        'f_scores': f_scores.tolist(),
        'selector': selector
    }

# Determine optimal k
optimal_k = max(k_value_results.keys(), key=lambda k: k_value_results[k]['val_auc'])

print(f"\n[OPTIMAL K DETERMINED]")
print(f"  Best k-value: {optimal_k}")
print(f"  Validation AUC: {k_value_results[optimal_k]['val_auc']:.4f}")

print("\nK-value optimization results:")
for k in SELECTKBEST_CONFIG['k_values']:
    marker = " <- OPTIMAL" if k == optimal_k else ""
    print(f"  k={k:2d}: AUC = {k_value_results[k]['val_auc']:.4f}{marker}")

# ==========================================
# 4. PREPARE OPTIMAL FEATURE SELECTION
# ==========================================

print("\n[STEP 4/8] Preparing feature selection with optimal k...")
print("-" * 50)

# Use selector from optimal k (already fitted in Phase 1)
optimal_selector = k_value_results[optimal_k]['selector']
selected_features = k_value_results[optimal_k]['selected_features']

print(f"Selected features (k={optimal_k}):")
for i, feature in enumerate(selected_features[:10], 1):
    print(f"  {i:2d}. {feature}")
if len(selected_features) > 10:
    print(f"  ... and {len(selected_features) - 10} more")

# Save feature selection details
feature_selection_details = pd.DataFrame({
    'feature': selected_features,
    'f_score': k_value_results[optimal_k]['f_scores']
}).sort_values('f_score', ascending=False)

feature_details_path = f"{BASELINE_2_DIR}/feature_selection_details.csv"
feature_selection_details.to_csv(feature_details_path, index=False)
print(f"\nFeature selection details saved: {feature_details_path}")

# ==========================================
# 5. PHASE 2: MULTIPLE RUNS WITH OPTIMAL K
# ==========================================

print("\n[STEP 5/8] Phase 2: Training 10 models with optimal k...")
print("-" * 50)

training_results = {
    'experiment_info': {
        'name': 'Baseline 2: XGBoost with SelectKBest',
        'description': 'Filter method - ANOVA F-test statistical ranking',
        'timestamp': datetime.now().isoformat(),
        'n_runs': CONFIG['n_runs']
    },
    'k_value_optimization': {
        'k_values_tested': SELECTKBEST_CONFIG['k_values'],
        'results': {
            str(k): {
                'val_auc': float(results['val_auc']),
                'selected_features': results['selected_features']
            } for k, results in k_value_results.items()
        },
        'optimal_k': optimal_k,
        'optimal_val_auc': float(k_value_results[optimal_k]['val_auc'])
    },
    'feature_selection': {
        'method': 'SelectKBest - f_classif',
        'n_features_selected': optimal_k,
        'selected_features': selected_features,
        'f_scores': {feature: float(score) for feature, score in
                     zip(selected_features, k_value_results[optimal_k]['f_scores'])}
    },
    'configuration': {
        'xgboost_config': XGBOOST_UNIFIED_CONFIG,
        'selectkbest_config': {k: v for k, v in SELECTKBEST_CONFIG.items() if k != 'scoring_function'}
    },
    'training_results': {
        'runs': [],
        'statistics': {}
    },
    'best_model': {}
}

# Transform data using optimal selector (same for all runs)
X_train_selected = optimal_selector.transform(X_train)
X_val_selected = optimal_selector.transform(X_val)

print(f"Feature selection applied: {X_train.shape[1]} -> {optimal_k} features")
print(f"Same features used across all {CONFIG['n_runs']} runs")

start_time_total = time.time()
validation_aucs = []

for run_id in range(CONFIG['n_runs']):
    print(f"\nRun {run_id + 1}/{CONFIG['n_runs']} (seed={CONFIG['random_state'] + run_id})")
    print("-" * 40)

    start_time_run = time.time()

    # Initialize model with unique random seed
    model_config = XGBOOST_UNIFIED_CONFIG.copy()
    model_config['random_state'] = CONFIG['random_state'] + run_id

    model = xgb.XGBClassifier(**model_config)

    # Train on selected features
    print(f"  Training on {optimal_k} selected features...")
    model.fit(X_train_selected, y_train, verbose=False)

    # Predict on validation set
    y_val_pred = model.predict(X_val_selected)
    y_val_proba = model.predict_proba(X_val_selected)[:, 1]

    # Calculate validation metrics
    val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)

    # Calculate training time
    training_time = time.time() - start_time_run

    # Save model
    model_path = f"{MODELS_DIR}/run_{run_id}_model.json"
    model.save_model(model_path)

    # Store results
    run_result = {
        'run_id': run_id,
        'random_state': CONFIG['random_state'] + run_id,
        'n_features_selected': optimal_k,
        'training_time_seconds': round(training_time, 2),
        'validation_metrics': val_metrics,
        'model_path': model_path
    }

    training_results['training_results']['runs'].append(run_result)
    validation_aucs.append(val_metrics['roc_auc'])

    print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
    print(f"  Validation Accuracy: {val_metrics['accuracy']:.4f}")
    print(f"  Training time: {training_time:.2f}s")
    print(f"  Model saved: {model_path}")

total_training_time = time.time() - start_time_total

print(f"\nTotal training time: {total_training_time:.2f}s ({total_training_time/60:.2f} min)")

# ==========================================
# 6. STATISTICAL SUMMARY
# ==========================================

print("\n[STEP 6/8] Computing statistical summary...")
print("-" * 50)

validation_aucs_array = np.array(validation_aucs)

statistics = {
    'mean_val_auc': float(np.mean(validation_aucs_array)),
    'std_val_auc': float(np.std(validation_aucs_array, ddof=1)),
    'min_val_auc': float(np.min(validation_aucs_array)),
    'max_val_auc': float(np.max(validation_aucs_array)),
    'median_val_auc': float(np.median(validation_aucs_array)),
    'best_run_id': int(np.argmax(validation_aucs_array)),
    'total_training_time_seconds': round(total_training_time, 2)
}

training_results['training_results']['statistics'] = statistics

print("Validation AUC statistics across 10 runs:")
print(f"  Mean:   {statistics['mean_val_auc']:.4f}")
print(f"  Std:    {statistics['std_val_auc']:.4f}")
print(f"  Min:    {statistics['min_val_auc']:.4f}")
print(f"  Max:    {statistics['max_val_auc']:.4f}")
print(f"  Median: {statistics['median_val_auc']:.4f}")
print(f"\nBest run: Run {statistics['best_run_id']} "
      f"(AUC = {statistics['max_val_auc']:.4f})")

# ==========================================
# 7. IDENTIFY AND SAVE BEST MODEL
# ==========================================

print("\n[STEP 7/8] Saving best model...")
print("-" * 50)

best_run_id = statistics['best_run_id']
best_run_result = training_results['training_results']['runs'][best_run_id]

# Load and save best model separately
best_model = xgb.XGBClassifier()
best_model.load_model(best_run_result['model_path'])

best_model_path = f"{BASELINE_2_DIR}/baseline_2_best_model.json"
best_model.save_model(best_model_path)

training_results['best_model'] = {
    'run_id': best_run_id,
    'random_state': best_run_result['random_state'],
    'validation_auc': best_run_result['validation_metrics']['roc_auc'],
    'validation_metrics': best_run_result['validation_metrics'],
    'model_path': best_model_path
}

print(f"Best model: Run {best_run_id}")
print(f"  Validation AUC: {best_run_result['validation_metrics']['roc_auc']:.4f}")
print(f"  Validation Accuracy: {best_run_result['validation_metrics']['accuracy']:.4f}")
print(f"  Features used: {optimal_k}")
print(f"  Saved to: {best_model_path}")

# ==========================================
# 8. SAVE TRAINING RESULTS
# ==========================================

print("\n[STEP 8/8] Saving training results...")
print("-" * 50)

results_path = f"{BASELINE_2_DIR}/baseline_2_training_results_CPU.json"
save_results(training_results, results_path)

print(f"Training results saved: {results_path}")

# Generate summary statistics file
summary = {
    'experiment': 'Baseline 2: XGBoost with SelectKBest',
    'feature_selection': {
        'method': 'Filter (ANOVA F-test)',
        'k_values_tested': SELECTKBEST_CONFIG['k_values'],
        'optimal_k': optimal_k,
        'original_features': X_train.shape[1],
        'reduction_percent': round((1 - optimal_k/X_train.shape[1]) * 100, 1)
    },
    'n_runs': CONFIG['n_runs'],
    'validation_auc': {
        'mean': statistics['mean_val_auc'],
        'std': statistics['std_val_auc'],
        'range': [statistics['min_val_auc'], statistics['max_val_auc']]
    },
    'best_run': {
        'run_id': best_run_id,
        'validation_auc': statistics['max_val_auc']
    },
    'training_time_total_minutes': round(total_training_time / 60, 2)
}

summary_path = f"{BASELINE_2_DIR}/training_summary.json"
save_results(summary, summary_path)
print(f"Training summary saved: {summary_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("BASELINE 2 TRAINING COMPLETED SUCCESSFULLY")
print("="*80)

print("\nEXPERIMENT SUMMARY:")
print(f"  Method: SelectKBest (Filter - ANOVA F-test)")
print(f"  Feature selection: {X_train.shape[1]} -> {optimal_k} features")
print(f"  Reduction: {(1 - optimal_k/X_train.shape[1]) * 100:.1f}%")
print(f"  Training runs: {CONFIG['n_runs']}")
print(f"  Total training time: {total_training_time/60:.2f} minutes")

print("\nK-VALUE OPTIMIZATION:")
for k in SELECTKBEST_CONFIG['k_values']:
    marker = " <- OPTIMAL" if k == optimal_k else ""
    print(f"  k={k:2d}: AUC = {k_value_results[k]['val_auc']:.4f}{marker}")

print("\nVALIDATION PERFORMANCE:")
print(f"  Mean AUC: {statistics['mean_val_auc']:.4f} ± {statistics['std_val_auc']:.4f}")
print(f"  Best AUC: {statistics['max_val_auc']:.4f} (Run {best_run_id})")
print(f"  AUC range: [{statistics['min_val_auc']:.4f}, {statistics['max_val_auc']:.4f}]")

print("\nSELECTED FEATURES (Top 10):")
for i, feature in enumerate(selected_features[:10], 1):
    print(f"  {i:2d}. {feature}")
if len(selected_features) > 10:
    print(f"  ... and {len(selected_features) - 10} more")

print("\nOUTPUTS GENERATED:")
print(f"  Training results: {results_path}")
print(f"  Best model: {best_model_path}")
print(f"  All models: {MODELS_DIR}/ (10 models)")
print(f"  Feature selection: {feature_details_path}")

print("\nNEXT STEPS:")
print("  Cell 4: Baseline 3 - XGBoost with RFECV")
print("  Cell 6: Baseline 2 Testing - Test set evaluation")

print("\n" + "="*80)
print("READY FOR NEXT BASELINE EXPERIMENT")
print("="*80)

BASELINE 2 TRAINING: XGBOOST WITH SELECTKBEST

[STEP 1/8] Verifying Cell 1 completion...
--------------------------------------------------
VERIFIED: Training data (1270 samples x 25 features)
VERIFIED: Validation data (318 samples)
VERIFIED: SelectKBest config (k_values=[8, 12, 16, 20])
VERIFIED: Number of runs = 10

[STEP 2/8] Setting up Baseline 2 experiment...
--------------------------------------------------
Experiment directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_3_baseline2
Models directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_3_baseline2/models

Experiment configuration:
  Method: SelectKBest (Filter - ANOVA F-test)
  K-values to test: [8, 12, 16, 20]
  Number of runs: 10
  Selection: Deterministic (same features per k)

[STEP 3/8] Phase 1: K-value optimization...
--------------------------------------------------

Testing k=8 features...
----------------------------------------
  Sele

In [None]:
# @title Cell 4: Baseline 3 Training - XGBoost with RFECV (FIXED VERSION)

"""
ASD Detection Project: Baseline Experiments
Cell 4: Baseline 3 - XGBoost with RFECV (Wrapper Method) - FIXED
10 independent RFECV runs (each performs feature selection separately)
Model-guided iterative feature elimination with cross-validation

FIXES APPLIED:
- Disabled parallel processing in RFECV to avoid joblib conflicts
- Adjusted XGBoost threading parameters
- Added better error handling
- Improved stability for long-running processes
"""

print("="*80)
print("BASELINE 3 TRAINING: XGBOOST WITH RFECV (FIXED VERSION)")
print("="*80)

# ==========================================
# 1. VERIFY PREREQUISITES
# ==========================================

print("\n[STEP 1/7] Verifying Cell 1 completion...")
print("-" * 50)

try:
    assert 'X_train' in dir() and X_train.shape[1] == 25
    assert 'X_val' in dir() and X_val.shape[0] == 318
    assert 'XGBOOST_UNIFIED_CONFIG' in dir()
    assert 'XGBOOST_RFECV_CONFIG' in dir()
    assert 'RFECV_CONFIG' in dir()
    assert 'CONFIG' in dir() and CONFIG['n_runs'] == 10
    print(f"VERIFIED: Training data ({X_train.shape[0]} samples x {X_train.shape[1]} features)")
    print(f"VERIFIED: Validation data ({X_val.shape[0]} samples)")
    print(f"VERIFIED: RFECV config loaded")
    print(f"VERIFIED: Number of runs = {CONFIG['n_runs']}")
except (NameError, AssertionError) as e:
    raise RuntimeError("ERROR: Cell 1 must be executed first") from e

# ==========================================
# 2. EXPERIMENT SETUP
# ==========================================

print("\n[STEP 2/7] Setting up Baseline 3 experiment...")
print("-" * 50)

BASELINE_3_DIR = f"{PROJECT_PATHS['root']}/03_Results/output_notebook_02/cell_4_baseline3"
MODELS_DIR = f"{BASELINE_3_DIR}/models"
os.makedirs(MODELS_DIR, exist_ok=True)

print(f"Experiment directory: {BASELINE_3_DIR}")
print(f"Models directory: {MODELS_DIR}")

print("\nExperiment configuration:")
print(f"  Method: RFECV (Wrapper - model-guided)")
print(f"  RFECV estimator: n_estimators={XGBOOST_RFECV_CONFIG['n_estimators']}, "
      f"max_depth={XGBOOST_RFECV_CONFIG['max_depth']}, "
      f"lr={XGBOOST_RFECV_CONFIG['learning_rate']}")
print(f"  Final model: n_estimators={XGBOOST_UNIFIED_CONFIG['n_estimators']}, "
      f"max_depth={XGBOOST_UNIFIED_CONFIG['max_depth']}, "
      f"lr={XGBOOST_UNIFIED_CONFIG['learning_rate']}")
print(f"  Tree method: hist (CPU-optimized, GPU params removed)")
print(f"  RFECV step: {RFECV_CONFIG['step']} features")
print(f"  RFECV CV: {RFECV_CONFIG['cv_folds']} folds")
print(f"  Min features: {RFECV_CONFIG['min_features_to_select']}")
print(f"  Number of runs: {CONFIG['n_runs']}")
print(f"  Parallel processing: DISABLED (n_jobs=1, nthread=1 for stability)")

# ==========================================
# 3. TRAINING LOOP - 10 INDEPENDENT RFECV RUNS
# ==========================================

print("\n[STEP 3/7] Running 10 independent RFECV experiments...")
print("-" * 50)

training_results = {
    'experiment_info': {
        'name': 'Baseline 3: XGBoost with RFECV',
        'description': 'Wrapper method - recursive feature elimination with CV (CPU-only, Fixed version)',
        'timestamp': datetime.now().isoformat(),
        'n_runs': CONFIG['n_runs'],
        'notes': 'n_jobs=1, nthread=1, tree_method=hist (CPU-only for stability)'
    },
    'rfecv_configuration': {
        'step': RFECV_CONFIG['step'],
        'cv_folds': RFECV_CONFIG['cv_folds'],
        'min_features_to_select': RFECV_CONFIG['min_features_to_select'],
        'scoring': RFECV_CONFIG['scoring'],
        'n_jobs': 1  # FIXED: Sequential processing
    },
    'configuration': {
        'xgboost_rfecv_config': XGBOOST_RFECV_CONFIG,
        'xgboost_unified_config': XGBOOST_UNIFIED_CONFIG
    },
    'training_results': {
        'runs': [],
        'statistics': {}
    },
    'feature_stability': {},
    'best_model': {}
}

start_time_total = time.time()
validation_aucs = []
all_selected_features = []
all_optimal_n = []

for run_id in range(CONFIG['n_runs']):
    print(f"\nRun {run_id + 1}/{CONFIG['n_runs']} (seed={CONFIG['random_state'] + run_id})")
    print("-" * 40)

    start_time_run = time.time()

    try:
        # Phase 1: RFECV Feature Selection
        print(f"  Phase 1: RFECV feature selection...")

        # Configure RFECV estimator - FIXED: Remove n_jobs and GPU params
        rfecv_estimator_config = XGBOOST_RFECV_CONFIG.copy()
        rfecv_estimator_config['random_state'] = CONFIG['random_state'] + run_id

        # FIXED: Remove GPU and parallel parameters
        params_to_remove = ['n_jobs', 'nthread', 'tree_method', 'gpu_id',
                           'predictor', 'enable_categorical']
        for param in params_to_remove:
            if param in rfecv_estimator_config:
                del rfecv_estimator_config[param]

        # FIXED: Set safe CPU-only parameters
        rfecv_estimator_config['tree_method'] = 'hist'  # Fast CPU method
        rfecv_estimator_config['nthread'] = 1  # Single thread for stability

        rfecv_estimator = xgb.XGBClassifier(**rfecv_estimator_config)

        # Initialize RFECV - FIXED: n_jobs=1 to avoid parallel processing conflicts
        rfecv = RFECV(
            estimator=rfecv_estimator,
            step=RFECV_CONFIG['step'],
            cv=RFECV_CONFIG['cv_folds'],
            scoring=RFECV_CONFIG['scoring'],
            min_features_to_select=RFECV_CONFIG['min_features_to_select'],
            n_jobs=1,  # FIXED: Sequential processing
            verbose=0
        )

        # Fit RFECV on training data
        rfecv_start = time.time()
        print(f"    Starting RFECV (this may take several minutes)...")
        rfecv.fit(X_train, y_train)
        rfecv_time = time.time() - rfecv_start

        # Extract RFECV results
        optimal_n_features = rfecv.n_features_
        selected_mask = rfecv.support_
        feature_ranking = rfecv.ranking_
        cv_scores = rfecv.cv_results_['mean_test_score']
        convergence_iteration = len(cv_scores)

        # Get selected features
        selected_features = X_train.columns[selected_mask].tolist()

        print(f"    RFECV completed in {rfecv_time:.1f}s ({rfecv_time/60:.1f} min)")
        print(f"    Optimal features: {optimal_n_features}/{X_train.shape[1]}")
        print(f"    Best CV AUC: {np.max(cv_scores):.4f}")
        print(f"    Convergence iterations: {convergence_iteration}")

        # Phase 2: Train Final Model with Selected Features
        print(f"  Phase 2: Training final model...")

        # Transform data using selected features
        X_train_selected = rfecv.transform(X_train)
        X_val_selected = rfecv.transform(X_val)

        # Configure final model - FIXED: Clean CPU-only config
        final_model_config = XGBOOST_UNIFIED_CONFIG.copy()
        final_model_config['random_state'] = CONFIG['random_state'] + run_id

        # FIXED: Remove GPU and parallel parameters
        params_to_remove = ['n_jobs', 'nthread', 'tree_method', 'gpu_id',
                           'predictor', 'enable_categorical']
        for param in params_to_remove:
            if param in final_model_config:
                del final_model_config[param]

        # FIXED: Set safe CPU-only parameters
        final_model_config['tree_method'] = 'hist'  # Fast CPU method
        final_model_config['nthread'] = 1  # Single thread for stability

        final_model = xgb.XGBClassifier(**final_model_config)

        # Train final model
        final_model.fit(X_train_selected, y_train, verbose=False)

        # Predict on validation set
        y_val_pred = final_model.predict(X_val_selected)
        y_val_proba = final_model.predict_proba(X_val_selected)[:, 1]

        # Calculate validation metrics
        val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)

        # Calculate total training time
        training_time = time.time() - start_time_run

        # Save RFECV details
        rfecv_details_path = f"{MODELS_DIR}/run_{run_id}_rfecv_details.json"
        rfecv_details = {
            'run_id': run_id,
            'optimal_n_features': int(optimal_n_features),
            'selected_features': selected_features,
            'feature_ranking': {feat: int(rank) for feat, rank in zip(X_train.columns, feature_ranking)},
            'cv_scores_trajectory': [float(score) for score in cv_scores],
            'convergence_iterations': convergence_iteration,
            'rfecv_time_seconds': round(rfecv_time, 2)
        }
        save_results(rfecv_details, rfecv_details_path)

        # Save final model
        model_path = f"{MODELS_DIR}/run_{run_id}_model.json"
        final_model.save_model(model_path)

        # Store results
        run_result = {
            'run_id': run_id,
            'random_state': CONFIG['random_state'] + run_id,
            'rfecv_results': {
                'optimal_n_features': int(optimal_n_features),
                'selected_features': selected_features,
                'convergence_iterations': convergence_iteration,
                'best_cv_score': float(np.max(cv_scores)),
                'cv_scores_trajectory': [float(score) for score in cv_scores],
                'rfecv_time_seconds': round(rfecv_time, 2)
            },
            'training_time_seconds': round(training_time, 2),
            'validation_metrics': val_metrics,
            'model_path': model_path,
            'rfecv_details_path': rfecv_details_path
        }

        training_results['training_results']['runs'].append(run_result)
        validation_aucs.append(val_metrics['roc_auc'])
        all_selected_features.append(set(selected_features))
        all_optimal_n.append(optimal_n_features)

        print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
        print(f"  Validation Accuracy: {val_metrics['accuracy']:.4f}")
        print(f"  Total time: {training_time:.2f}s ({training_time/60:.1f} min)")
        print(f"  Model saved: {model_path}")
        print(f"  ✓ Run {run_id + 1} completed successfully")

    except Exception as e:
        print(f"  ✗ ERROR in Run {run_id + 1}: {str(e)}")
        print(f"  Traceback: {e}")
        # Log error but continue with next run
        error_log_path = f"{MODELS_DIR}/run_{run_id}_error.txt"
        with open(error_log_path, 'w') as f:
            f.write(f"Error in Run {run_id}\n")
            f.write(f"Error: {str(e)}\n")
            import traceback
            f.write(traceback.format_exc())
        print(f"  Error logged to: {error_log_path}")
        continue

total_training_time = time.time() - start_time_total

print(f"\nTotal training time: {total_training_time:.2f}s ({total_training_time/60:.2f} min)")
print(f"Successful runs: {len(validation_aucs)}/{CONFIG['n_runs']}")

# Check if we have enough successful runs
if len(validation_aucs) == 0:
    raise RuntimeError("All runs failed. Please check error logs.")

# ==========================================
# 4. STATISTICAL SUMMARY
# ==========================================

print("\n[STEP 4/7] Computing statistical summary...")
print("-" * 50)

validation_aucs_array = np.array(validation_aucs)
optimal_n_array = np.array(all_optimal_n)

statistics = {
    'optimal_n_mean': float(np.mean(optimal_n_array)),
    'optimal_n_std': float(np.std(optimal_n_array, ddof=1)) if len(optimal_n_array) > 1 else 0.0,
    'optimal_n_range': [int(np.min(optimal_n_array)), int(np.max(optimal_n_array))],
    'mean_val_auc': float(np.mean(validation_aucs_array)),
    'std_val_auc': float(np.std(validation_aucs_array, ddof=1)) if len(validation_aucs_array) > 1 else 0.0,
    'min_val_auc': float(np.min(validation_aucs_array)),
    'max_val_auc': float(np.max(validation_aucs_array)),
    'median_val_auc': float(np.median(validation_aucs_array)),
    'best_run_id': int(np.argmax(validation_aucs_array)),
    'total_training_time_seconds': round(total_training_time, 2),
    'successful_runs': len(validation_aucs),
    'total_runs': CONFIG['n_runs']
}

training_results['training_results']['statistics'] = statistics

print("Optimal number of features across successful runs:")
print(f"  Mean:   {statistics['optimal_n_mean']:.1f}")
print(f"  Std:    {statistics['optimal_n_std']:.1f}")
print(f"  Range:  [{statistics['optimal_n_range'][0]}, {statistics['optimal_n_range'][1]}]")

print("\nValidation AUC statistics across successful runs:")
print(f"  Mean:   {statistics['mean_val_auc']:.4f}")
print(f"  Std:    {statistics['std_val_auc']:.4f}")
print(f"  Min:    {statistics['min_val_auc']:.4f}")
print(f"  Max:    {statistics['max_val_auc']:.4f}")
print(f"  Median: {statistics['median_val_auc']:.4f}")
print(f"\nBest run: Run {statistics['best_run_id']} "
      f"(AUC = {statistics['max_val_auc']:.4f}, n_features = {all_optimal_n[statistics['best_run_id']]})")

# ==========================================
# 5. FEATURE STABILITY ANALYSIS
# ==========================================

print("\n[STEP 5/7] Analyzing feature selection stability...")
print("-" * 50)

# Calculate selection frequency
all_features = X_train.columns.tolist()
selection_frequency = {}

for feature in all_features:
    count = sum(1 for selected_set in all_selected_features if feature in selected_set)
    selection_frequency[feature] = count / len(all_selected_features)

# Sort by frequency
sorted_features = sorted(selection_frequency.items(), key=lambda x: x[1], reverse=True)

# Identify core and stable features
core_features = [feat for feat, freq in sorted_features if freq == 1.0]
stable_features = [feat for feat, freq in sorted_features if freq >= 0.8]

# Calculate mean Jaccard similarity
jaccard_similarities = []
for i in range(len(all_selected_features)):
    for j in range(i+1, len(all_selected_features)):
        intersection = len(all_selected_features[i] & all_selected_features[j])
        union = len(all_selected_features[i] | all_selected_features[j])
        jaccard = intersection / union if union > 0 else 0
        jaccard_similarities.append(jaccard)

mean_jaccard = np.mean(jaccard_similarities) if jaccard_similarities else 0

training_results['feature_stability'] = {
    'selection_frequency': selection_frequency,
    'core_features': core_features,
    'stable_features': stable_features,
    'mean_jaccard_similarity': float(mean_jaccard)
}

print(f"Core features (100% selection): {len(core_features)}")
for feature in core_features:
    print(f"  - {feature}")

print(f"\nStable features (>=80% selection): {len(stable_features)}")
for feature in stable_features[:10]:
    freq = selection_frequency[feature]
    print(f"  - {feature}: {freq*100:.0f}%")
if len(stable_features) > 10:
    print(f"  ... and {len(stable_features) - 10} more")

print(f"\nMean Jaccard similarity: {mean_jaccard:.3f}")

# Save feature stability details
stability_df = pd.DataFrame([
    {'feature': feat, 'selection_frequency': freq}
    for feat, freq in sorted_features
])
stability_csv_path = f"{BASELINE_3_DIR}/feature_stability_analysis.csv"
stability_df.to_csv(stability_csv_path, index=False)
print(f"\nFeature stability saved: {stability_csv_path}")

# ==========================================
# 6. IDENTIFY AND SAVE BEST MODEL
# ==========================================

print("\n[STEP 6/7] Saving best model...")
print("-" * 50)

best_run_id = statistics['best_run_id']
best_run_result = training_results['training_results']['runs'][best_run_id]

# Load and save best model separately
best_model = xgb.XGBClassifier()
best_model.load_model(best_run_result['model_path'])

best_model_path = f"{BASELINE_3_DIR}/baseline_3_best_model.json"
best_model.save_model(best_model_path)

training_results['best_model'] = {
    'run_id': best_run_id,
    'random_state': best_run_result['random_state'],
    'n_features_selected': best_run_result['rfecv_results']['optimal_n_features'],
    'selected_features': best_run_result['rfecv_results']['selected_features'],
    'validation_auc': best_run_result['validation_metrics']['roc_auc'],
    'validation_metrics': best_run_result['validation_metrics'],
    'model_path': best_model_path
}

print(f"Best model: Run {best_run_id}")
print(f"  Validation AUC: {best_run_result['validation_metrics']['roc_auc']:.4f}")
print(f"  Validation Accuracy: {best_run_result['validation_metrics']['accuracy']:.4f}")
print(f"  Features selected: {best_run_result['rfecv_results']['optimal_n_features']}")
print(f"  Saved to: {best_model_path}")

# ==========================================
# 7. SAVE TRAINING RESULTS
# ==========================================

print("\n[STEP 7/7] Saving training results...")
print("-" * 50)

results_path = f"{BASELINE_3_DIR}/baseline_3_training_results.json"
save_results(training_results, results_path)

print(f"Training results saved: {results_path}")

# Generate summary statistics file
summary = {
    'experiment': 'Baseline 3: XGBoost with RFECV (Fixed)',
    'feature_selection': {
        'method': 'Wrapper (RFECV - model-guided)',
        'optimal_n_mean': statistics['optimal_n_mean'],
        'optimal_n_range': statistics['optimal_n_range'],
        'original_features': X_train.shape[1],
        'reduction_mean_percent': round((1 - statistics['optimal_n_mean']/X_train.shape[1]) * 100, 1)
    },
    'n_runs': CONFIG['n_runs'],
    'successful_runs': statistics['successful_runs'],
    'validation_auc': {
        'mean': statistics['mean_val_auc'],
        'std': statistics['std_val_auc'],
        'range': [statistics['min_val_auc'], statistics['max_val_auc']]
    },
    'feature_stability': {
        'core_features_count': len(core_features),
        'stable_features_count': len(stable_features),
        'mean_jaccard_similarity': mean_jaccard
    },
    'best_run': {
        'run_id': best_run_id,
        'validation_auc': statistics['max_val_auc'],
        'n_features': all_optimal_n[best_run_id]
    },
    'training_time_total_minutes': round(total_training_time / 60, 2)
}

summary_path = f"{BASELINE_3_DIR}/training_summary.json"
save_results(summary, summary_path)
print(f"Training summary saved: {summary_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("BASELINE 3 TRAINING COMPLETED")
print("="*80)

print("\nEXPERIMENT SUMMARY:")
print(f"  Method: RFECV (Wrapper - model-guided)")
print(f"  Feature selection: Independent per run")
print(f"  Successful runs: {statistics['successful_runs']}/{CONFIG['n_runs']}")
print(f"  Total training time: {total_training_time/60:.2f} minutes")

print("\nFEATURE SELECTION RESULTS:")
print(f"  Optimal n (mean): {statistics['optimal_n_mean']:.1f} ± {statistics['optimal_n_std']:.1f}")
print(f"  Optimal n (range): [{statistics['optimal_n_range'][0]}, {statistics['optimal_n_range'][1]}]")
print(f"  Reduction (mean): {(1 - statistics['optimal_n_mean']/X_train.shape[1]) * 100:.1f}%")
print(f"  Core features: {len(core_features)} (selected in 100% runs)")
print(f"  Stable features: {len(stable_features)} (selected in >=80% runs)")

print("\nVALIDATION PERFORMANCE:")
print(f"  Mean AUC: {statistics['mean_val_auc']:.4f} ± {statistics['std_val_auc']:.4f}")
print(f"  Best AUC: {statistics['max_val_auc']:.4f} (Run {best_run_id})")
print(f"  AUC range: [{statistics['min_val_auc']:.4f}, {statistics['max_val_auc']:.4f}]")

print("\nCORE FEATURES (100% selection):")
if core_features:
    for feature in core_features:
        print(f"  - {feature}")
else:
    print("  (None - expected with wrapper method variability)")

print("\nOUTPUTS GENERATED:")
print(f"  Training results: {results_path}")
print(f"  Best model: {best_model_path}")
print(f"  All models: {MODELS_DIR}/ (models + RFECV details)")
print(f"  Feature stability: {stability_csv_path}")

print("\nNEXT STEPS:")
print("  - Review feature selection results")
print("  - Compare with other baseline methods")
print("  - Proceed to testing phase")

print("\n" + "="*80)
print("READY FOR ANALYSIS")
print("="*80)

BASELINE 3 TRAINING: XGBOOST WITH RFECV (FIXED VERSION)

[STEP 1/7] Verifying Cell 1 completion...
--------------------------------------------------
VERIFIED: Training data (1270 samples x 25 features)
VERIFIED: Validation data (318 samples)
VERIFIED: RFECV config loaded
VERIFIED: Number of runs = 10

[STEP 2/7] Setting up Baseline 3 experiment...
--------------------------------------------------
Experiment directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_4_baseline3
Models directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_4_baseline3/models

Experiment configuration:
  Method: RFECV (Wrapper - model-guided)
  RFECV estimator: n_estimators=150, max_depth=6, lr=0.1
  Final model: n_estimators=500, max_depth=8, lr=0.05
  Tree method: hist (CPU-optimized, GPU params removed)
  RFECV step: 5 features
  RFECV CV: 3 folds
  Min features: 8
  Number of runs: 10
  Parallel processing: DISABLED (n_jobs=1, n

In [None]:
# @title Cell 5: Baseline 1 Testing - Test Set Evaluation

"""
ASD Detection Project: Baseline Experiments
Cell 5: Baseline 1 Testing - Test set evaluation for XGBoost with All Features
Evaluate all 10 trained models on held-out test set
"""

print("="*80)
print("BASELINE 1 TESTING: TEST SET EVALUATION")
print("="*80)

# ==========================================
# 1. VERIFY PREREQUISITES
# ==========================================

print("\n[STEP 1/7] Verifying prerequisites...")
print("-" * 50)

try:
    assert 'X_test' in dir() and X_test.shape[0] == 397
    assert 'y_test' in dir() and len(y_test) == 397
    assert X_test.shape[1] == 25
    print(f"VERIFIED: Test data ({X_test.shape[0]} samples x {X_test.shape[1]} features)")
    print(f"VERIFIED: Test set integrity maintained")
except (NameError, AssertionError) as e:
    raise RuntimeError("ERROR: Cell 1 must be executed first") from e

# ==========================================
# 2. LOAD TRAINING RESULTS
# ==========================================

print("\n[STEP 2/7] Loading training results from Cell 2...")
print("-" * 50)

BASELINE_1_TRAINING_DIR = f"{PROJECT_PATHS['root']}/03_Results/output_notebook_02/cell_2_baseline1/training"
BASELINE_1_TESTING_DIR = f"{PROJECT_PATHS['root']}/03_Results/output_notebook_02/cell_2_baseline1/testing"
os.makedirs(BASELINE_1_TESTING_DIR, exist_ok=True)

training_results_path = f"{BASELINE_1_TRAINING_DIR}/baseline_1_training_results.json"

if not os.path.exists(training_results_path):
    raise FileNotFoundError(
        f"Training results not found: {training_results_path}\n"
        "ERROR: Cell 2 (Baseline 1 Training) must be executed first"
    )

with open(training_results_path, 'r') as f:
    training_results = json.load(f)

print(f"Training results loaded: {training_results_path}")

# Verify training completed
n_runs_trained = len(training_results['training_results']['runs'])
if n_runs_trained != 10:
    raise ValueError(f"Expected 10 training runs, found {n_runs_trained}")

print(f"Verified: {n_runs_trained} training runs completed")
print(f"Training mean validation AUC: {training_results['training_results']['statistics']['mean_val_auc']:.4f}")

# ==========================================
# 3. LOAD ALL TRAINED MODELS
# ==========================================

print("\n[STEP 3/7] Loading all trained models...")
print("-" * 50)

MODELS_DIR = f"{BASELINE_1_TRAINING_DIR}/models"

if not os.path.exists(MODELS_DIR):
    raise FileNotFoundError(f"Models directory not found: {MODELS_DIR}")

loaded_models = []

for run_id in range(10):
    model_path = f"{MODELS_DIR}/run_{run_id}_model.json"

    # Check if model file exists
    if not os.path.exists(model_path):
        raise FileNotFoundError(
            f"Model file not found: {model_path}\n"
            f"ERROR: Training run {run_id} incomplete"
        )

    # Load model
    try:
        model = xgb.XGBClassifier()
        model.load_model(model_path)
        loaded_models.append({
            'run_id': run_id,
            'model': model,
            'model_path': model_path
        })
        print(f"  Loaded: run_{run_id}_model.json")
    except Exception as e:
        raise RuntimeError(f"Failed to load model {run_id}: {e}")

print(f"\nSuccessfully loaded all {len(loaded_models)} models")

# ==========================================
# 4. EVALUATE ALL MODELS ON TEST SET
# ==========================================

print("\n[STEP 4/7] Evaluating all models on test set...")
print("-" * 50)

test_results = {
    'experiment_info': {
        'name': 'Baseline 1 Testing: XGBoost with All Features',
        'description': 'Test set evaluation for all 10 trained models',
        'timestamp': datetime.now().isoformat(),
        'test_set_size': len(y_test),
        'n_features': X_test.shape[1]
    },
    'training_summary': {
        'n_runs': n_runs_trained,
        'validation_auc_mean': training_results['training_results']['statistics']['mean_val_auc'],
        'validation_auc_std': training_results['training_results']['statistics']['std_val_auc']
    },
    'test_results': {
        'runs': []
    }
}

test_aucs = []
test_accuracies = []

for model_info in loaded_models:
    run_id = model_info['run_id']
    model = model_info['model']

    print(f"\nRun {run_id}:")
    print("-" * 40)

    # Get validation results from training
    training_run = training_results['training_results']['runs'][run_id]
    val_metrics = training_run['validation_metrics']

    # Predict on test set (all 25 features)
    y_test_pred = model.predict(X_test)
    y_test_proba = model.predict_proba(X_test)[:, 1]

    # Calculate test metrics
    test_metrics = calculate_metrics(y_test, y_test_pred, y_test_proba)

    # Store results
    run_result = {
        'run_id': run_id,
        'random_state': training_run['random_state'],
        'validation_metrics': val_metrics,
        'test_metrics': test_metrics,
        'generalization_gap': {
            'auc_gap': float(val_metrics['roc_auc'] - test_metrics['roc_auc']),
            'accuracy_gap': float(val_metrics['accuracy'] - test_metrics['accuracy'])
        }
    }

    test_results['test_results']['runs'].append(run_result)
    test_aucs.append(test_metrics['roc_auc'])
    test_accuracies.append(test_metrics['accuracy'])

    print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
    print(f"  Test AUC:       {test_metrics['roc_auc']:.4f}")
    print(f"  Gap:            {run_result['generalization_gap']['auc_gap']:.4f}")
    print(f"  Test Accuracy:  {test_metrics['accuracy']:.4f}")

print("\nAll models evaluated on test set")

# ==========================================
# 5. STATISTICAL SUMMARY
# ==========================================

print("\n[STEP 5/7] Computing statistical summary...")
print("-" * 50)

test_aucs_array = np.array(test_aucs)
test_accuracies_array = np.array(test_accuracies)

# Calculate 95% confidence intervals (percentile-based)
ci_95_lower = float(np.percentile(test_aucs_array, 2.5))
ci_95_upper = float(np.percentile(test_aucs_array, 97.5))

statistics = {
    'mean_test_auc': float(np.mean(test_aucs_array)),
    'std_test_auc': float(np.std(test_aucs_array, ddof=1)),
    'min_test_auc': float(np.min(test_aucs_array)),
    'max_test_auc': float(np.max(test_aucs_array)),
    'median_test_auc': float(np.median(test_aucs_array)),
    'ci_95_lower': ci_95_lower,
    'ci_95_upper': ci_95_upper,
    'best_run_index': int(np.argmax(test_aucs_array)),
    'all_metrics_summary': {
        'accuracy': {
            'mean': float(np.mean(test_accuracies_array)),
            'std': float(np.std(test_accuracies_array, ddof=1))
        }
    }
}

# Add all metrics summary
for metric in ['precision_macro', 'recall_macro', 'f1_macro', 'log_loss']:
    metric_values = [run['test_metrics'][metric] for run in test_results['test_results']['runs']]
    statistics['all_metrics_summary'][metric] = {
        'mean': float(np.mean(metric_values)),
        'std': float(np.std(metric_values, ddof=1))
    }

test_results['test_results']['statistics'] = statistics

print("Test AUC statistics across 10 runs:")
print(f"  Mean:   {statistics['mean_test_auc']:.4f}")
print(f"  Std:    {statistics['std_test_auc']:.4f}")
print(f"  Min:    {statistics['min_test_auc']:.4f}")
print(f"  Max:    {statistics['max_test_auc']:.4f}")
print(f"  Median: {statistics['median_test_auc']:.4f}")
print(f"  95% CI: [{ci_95_lower:.4f}, {ci_95_upper:.4f}]")

print("\nAll metrics summary:")
for metric, values in statistics['all_metrics_summary'].items():
    print(f"  {metric}: {values['mean']:.4f} ± {values['std']:.4f}")

# ==========================================
# 6. OVERFITTING DETECTION
# ==========================================

print("\n[STEP 6/7] Detecting overfitting...")
print("-" * 50)

OVERFITTING_THRESHOLD = 0.03

val_auc_mean = test_results['training_summary']['validation_auc_mean']
test_auc_mean = statistics['mean_test_auc']
overall_gap = val_auc_mean - test_auc_mean

# Per-run overfitting detection
overfitting_runs = []
for run in test_results['test_results']['runs']:
    gap = run['generalization_gap']['auc_gap']
    if gap > OVERFITTING_THRESHOLD:
        overfitting_runs.append({
            'run_id': run['run_id'],
            'gap': gap,
            'validation_auc': run['validation_metrics']['roc_auc'],
            'test_auc': run['test_metrics']['roc_auc']
        })

overfitting_analysis = {
    'threshold': OVERFITTING_THRESHOLD,
    'overall_gap': float(overall_gap),
    'overall_status': 'OVERFITTING' if overall_gap > OVERFITTING_THRESHOLD else 'GOOD_GENERALIZATION',
    'n_overfitting_runs': len(overfitting_runs),
    'overfitting_runs': overfitting_runs
}

test_results['overfitting_analysis'] = overfitting_analysis

print(f"Overfitting threshold: {OVERFITTING_THRESHOLD}")
print(f"Overall generalization gap: {overall_gap:.4f}")
print(f"Overall status: {overfitting_analysis['overall_status']}")

if overfitting_runs:
    print(f"\nWARNING: {len(overfitting_runs)} run(s) show overfitting (gap > {OVERFITTING_THRESHOLD}):")
    for run in overfitting_runs:
        print(f"  Run {run['run_id']}: gap = {run['gap']:.4f} "
              f"(val={run['validation_auc']:.4f}, test={run['test_auc']:.4f})")
else:
    print(f"\nAll runs show good generalization (gap <= {OVERFITTING_THRESHOLD})")

# ==========================================
# 7. SAVE TEST RESULTS
# ==========================================

print("\n[STEP 7/7] Saving test results...")
print("-" * 50)

# Add best performance details
best_run_idx = statistics['best_run_index']
best_run = test_results['test_results']['runs'][best_run_idx]

test_results['best_performance'] = {
    'run_id': best_run_idx,
    'random_state': best_run['random_state'],
    'test_auc': best_run['test_metrics']['roc_auc'],
    'all_test_metrics': best_run['test_metrics'],
    'validation_auc': best_run['validation_metrics']['roc_auc'],
    'generalization_gap': best_run['generalization_gap']['auc_gap']
}

# Save results
results_path = f"{BASELINE_1_TESTING_DIR}/baseline_1_test_results_CPU.json"
save_results(test_results, results_path)

print(f"Test results saved: {results_path}")

# Generate summary
summary = {
    'experiment': 'Baseline 1 Testing: XGBoost with All Features',
    'n_features': X_test.shape[1],
    'test_set_size': len(y_test),
    'n_models_tested': len(loaded_models),
    'performance': {
        'validation_auc_mean': val_auc_mean,
        'test_auc_mean': test_auc_mean,
        'generalization_gap': overall_gap
    },
    'best_run': {
        'run_id': best_run_idx,
        'test_auc': best_run['test_metrics']['roc_auc']
    },
    'overfitting_status': overfitting_analysis['overall_status']
}

summary_path = f"{BASELINE_1_TESTING_DIR}/testing_summary.json"
save_results(summary, summary_path)
print(f"Testing summary saved: {summary_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("BASELINE 1 TESTING COMPLETED SUCCESSFULLY")
print("="*80)

print("\nTEST SET EVALUATION:")
print(f"  Method: XGBoost with all features (no selection)")
print(f"  Features: {X_test.shape[1]} features")
print(f"  Test samples: {len(y_test)}")
print(f"  Models tested: {len(loaded_models)}")

print("\nVALIDATION PERFORMANCE (from training):")
print(f"  Mean AUC: {val_auc_mean:.4f} ± {test_results['training_summary']['validation_auc_std']:.4f}")

print("\nTEST PERFORMANCE:")
print(f"  Mean AUC: {statistics['mean_test_auc']:.4f} ± {statistics['std_test_auc']:.4f}")
print(f"  95% CI: [{ci_95_lower:.4f}, {ci_95_upper:.4f}]")
print(f"  Best AUC: {statistics['max_test_auc']:.4f} (Run {best_run_idx})")
print(f"  AUC range: [{statistics['min_test_auc']:.4f}, {statistics['max_test_auc']:.4f}]")

print("\nGENERALIZATION ANALYSIS:")
print(f"  Overall gap (val - test): {overall_gap:.4f}")
print(f"  Status: {overfitting_analysis['overall_status']}")
if overfitting_runs:
    print(f"  WARNING: {len(overfitting_runs)}/10 runs show overfitting")
else:
    print(f"  All runs generalize well")

print("\nALL METRICS (Test Set):")
print(f"  Accuracy:       {statistics['all_metrics_summary']['accuracy']['mean']:.4f} ± {statistics['all_metrics_summary']['accuracy']['std']:.4f}")
print(f"  Precision:      {statistics['all_metrics_summary']['precision_macro']['mean']:.4f} ± {statistics['all_metrics_summary']['precision_macro']['std']:.4f}")
print(f"  Recall:         {statistics['all_metrics_summary']['recall_macro']['mean']:.4f} ± {statistics['all_metrics_summary']['recall_macro']['std']:.4f}")
print(f"  F1-Score:       {statistics['all_metrics_summary']['f1_macro']['mean']:.4f} ± {statistics['all_metrics_summary']['f1_macro']['std']:.4f}")

print("\nOUTPUTS GENERATED:")
print(f"  Test results: {results_path}")
print(f"  Testing summary: {summary_path}")
print(f"  Output directory: {BASELINE_1_TESTING_DIR}")

print("\nNEXT STEPS:")
print("  Cell 6: Baseline 2 Testing - SelectKBest test set evaluation")
print("  Cell 7: Baseline 3 Testing - RFECV test set evaluation")
print("  Cell 8: Comprehensive Comparison & Statistical Analysis")

print("\n" + "="*80)
print("READY FOR NEXT BASELINE TESTING")
print("="*80)

BASELINE 1 TESTING: TEST SET EVALUATION

[STEP 1/7] Verifying prerequisites...
--------------------------------------------------
VERIFIED: Test data (397 samples x 25 features)
VERIFIED: Test set integrity maintained

[STEP 2/7] Loading training results from Cell 2...
--------------------------------------------------
Training results loaded: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_2_baseline1/training/baseline_1_training_results.json
Verified: 10 training runs completed
Training mean validation AUC: 0.9964

[STEP 3/7] Loading all trained models...
--------------------------------------------------
  Loaded: run_0_model.json
  Loaded: run_1_model.json
  Loaded: run_2_model.json
  Loaded: run_3_model.json
  Loaded: run_4_model.json
  Loaded: run_5_model.json
  Loaded: run_6_model.json
  Loaded: run_7_model.json
  Loaded: run_8_model.json
  Loaded: run_9_model.json

Successfully loaded all 10 models

[STEP 4/7] Evaluating all models on test set.

In [None]:
# @title Cell 6: Baseline 2 Testing - Test Set Evaluation

"""
ASD Detection Project: Baseline Experiments
Cell 6: Baseline 2 Testing - Test set evaluation for XGBoost with SelectKBest
Evaluate all 10 trained models on held-out test set with selected features
"""

print("="*80)
print("BASELINE 2 TESTING: TEST SET EVALUATION")
print("="*80)

# ==========================================
# 1. VERIFY PREREQUISITES
# ==========================================

print("\n[STEP 1/8] Verifying prerequisites...")
print("-" * 50)

try:
    assert 'X_test' in dir() and X_test.shape[0] == 397
    assert 'y_test' in dir() and len(y_test) == 397
    assert X_test.shape[1] == 25
    print(f"VERIFIED: Test data ({X_test.shape[0]} samples x {X_test.shape[1]} features)")
    print(f"VERIFIED: Test set integrity maintained")
except (NameError, AssertionError) as e:
    raise RuntimeError("ERROR: Cell 1 must be executed first") from e

# ==========================================
# 2. LOAD TRAINING RESULTS
# ==========================================

print("\n[STEP 2/8] Loading training results from Cell 3...")
print("-" * 50)

BASELINE_2_TRAINING_DIR = f"{PROJECT_PATHS['root']}/03_Results/output_notebook_02/cell_3_baseline2/training"
BASELINE_2_TESTING_DIR = f"{PROJECT_PATHS['root']}/03_Results/output_notebook_02/cell_3_baseline2/testing"
os.makedirs(BASELINE_2_TESTING_DIR, exist_ok=True)

training_results_path = f"{BASELINE_2_TRAINING_DIR}/baseline_2_training_results.json"

if not os.path.exists(training_results_path):
    raise FileNotFoundError(
        f"Training results not found: {training_results_path}\n"
        "ERROR: Cell 3 (Baseline 2 Training) must be executed first"
    )

with open(training_results_path, 'r') as f:
    training_results = json.load(f)

print(f"Training results loaded: {training_results_path}")

# Verify training completed
n_runs_trained = len(training_results['training_results']['runs'])
if n_runs_trained != 10:
    raise ValueError(f"Expected 10 training runs, found {n_runs_trained}")

print(f"Verified: {n_runs_trained} training runs completed")
print(f"Training mean validation AUC: {training_results['training_results']['statistics']['mean_val_auc']:.4f}")

# ==========================================
# 3. EXTRACT FEATURE SELECTION DETAILS
# ==========================================

print("\n[STEP 3/8] Extracting feature selection details...")
print("-" * 50)

optimal_k = training_results['feature_selection']['n_features_selected']
selected_features = training_results['feature_selection']['selected_features']
feature_reduction_percent = (1 - optimal_k / X_test.shape[1]) * 100

print(f"Feature selection method: SelectKBest (f_classif)")
print(f"Optimal k: {optimal_k}")
print(f"Original features: {X_test.shape[1]}")
print(f"Selected features: {optimal_k}")
print(f"Feature reduction: {feature_reduction_percent:.1f}%")

print(f"\nSelected features:")
for i, feature in enumerate(selected_features[:10], 1):
    print(f"  {i:2d}. {feature}")
if len(selected_features) > 10:
    print(f"  ... and {len(selected_features) - 10} more")

# ==========================================
# 4. VERIFY FEATURE CONSISTENCY
# ==========================================

print("\n[STEP 4/8] Verifying feature consistency across runs...")
print("-" * 50)

# Verify all runs used same number of features
for run in training_results['training_results']['runs']:
    if run['n_features_selected'] != optimal_k:
        raise ValueError(
            f"Inconsistent feature count: Run {run['run_id']} has "
            f"{run['n_features_selected']} features, expected {optimal_k}"
        )

print(f"VERIFIED: All runs used {optimal_k} features (deterministic selection)")

# Verify selected features exist in test set
missing_features = set(selected_features) - set(X_test.columns)
if missing_features:
    raise ValueError(f"Selected features not in test set: {missing_features}")

print(f"VERIFIED: All selected features available in test set")

# ==========================================
# 5. TRANSFORM TEST SET
# ==========================================

print("\n[STEP 5/8] Transforming test set with selected features...")
print("-" * 50)

X_test_selected = X_test[selected_features]

print(f"Original test set: {X_test.shape[0]} samples x {X_test.shape[1]} features")
print(f"Transformed test set: {X_test_selected.shape[0]} samples x {X_test_selected.shape[1]} features")
print(f"Feature selection applied successfully")

# ==========================================
# 6. LOAD ALL TRAINED MODELS
# ==========================================

print("\n[STEP 6/8] Loading all trained models...")
print("-" * 50)

MODELS_DIR = f"{BASELINE_2_TRAINING_DIR}/models"

if not os.path.exists(MODELS_DIR):
    raise FileNotFoundError(f"Models directory not found: {MODELS_DIR}")

loaded_models = []

for run_id in range(10):
    model_path = f"{MODELS_DIR}/run_{run_id}_model.json"

    # Check if model file exists
    if not os.path.exists(model_path):
        raise FileNotFoundError(
            f"Model file not found: {model_path}\n"
            f"ERROR: Training run {run_id} incomplete"
        )

    # Load model
    try:
        model = xgb.XGBClassifier()
        model.load_model(model_path)
        loaded_models.append({
            'run_id': run_id,
            'model': model,
            'model_path': model_path
        })
        print(f"  Loaded: run_{run_id}_model.json")
    except Exception as e:
        raise RuntimeError(f"Failed to load model {run_id}: {e}")

print(f"\nSuccessfully loaded all {len(loaded_models)} models")

# ==========================================
# 7. EVALUATE ALL MODELS ON TEST SET
# ==========================================

print("\n[STEP 7/8] Evaluating all models on test set...")
print("-" * 50)

test_results = {
    'experiment_info': {
        'name': 'Baseline 2 Testing: XGBoost with SelectKBest',
        'description': 'Test set evaluation for all 10 trained models with selected features',
        'timestamp': datetime.now().isoformat(),
        'test_set_size': len(y_test),
        'n_features_original': X_test.shape[1],
        'n_features_selected': optimal_k
    },
    'feature_selection_summary': {
        'method': 'SelectKBest',
        'n_features_selected': optimal_k,
        'selected_features': selected_features,
        'feature_reduction_percent': round(feature_reduction_percent, 1)
    },
    'training_summary': {
        'n_runs': n_runs_trained,
        'validation_auc_mean': training_results['training_results']['statistics']['mean_val_auc'],
        'validation_auc_std': training_results['training_results']['statistics']['std_val_auc']
    },
    'test_results': {
        'runs': []
    }
}

test_aucs = []
test_accuracies = []

for model_info in loaded_models:
    run_id = model_info['run_id']
    model = model_info['model']

    print(f"\nRun {run_id}:")
    print("-" * 40)

    # Get validation results from training
    training_run = training_results['training_results']['runs'][run_id]
    val_metrics = training_run['validation_metrics']

    # Predict on test set (selected features only)
    y_test_pred = model.predict(X_test_selected)
    y_test_proba = model.predict_proba(X_test_selected)[:, 1]

    # Calculate test metrics
    test_metrics = calculate_metrics(y_test, y_test_pred, y_test_proba)

    # Store results
    run_result = {
        'run_id': run_id,
        'random_state': training_run['random_state'],
        'n_features_used': optimal_k,
        'validation_metrics': val_metrics,
        'test_metrics': test_metrics,
        'generalization_gap': {
            'auc_gap': float(val_metrics['roc_auc'] - test_metrics['roc_auc']),
            'accuracy_gap': float(val_metrics['accuracy'] - test_metrics['accuracy'])
        }
    }

    test_results['test_results']['runs'].append(run_result)
    test_aucs.append(test_metrics['roc_auc'])
    test_accuracies.append(test_metrics['accuracy'])

    print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
    print(f"  Test AUC:       {test_metrics['roc_auc']:.4f}")
    print(f"  Gap:            {run_result['generalization_gap']['auc_gap']:.4f}")
    print(f"  Test Accuracy:  {test_metrics['accuracy']:.4f}")

print("\nAll models evaluated on test set")

# ==========================================
# 8. STATISTICAL SUMMARY & ANALYSIS
# ==========================================

print("\n[STEP 8/8] Computing statistical summary and analysis...")
print("-" * 50)

test_aucs_array = np.array(test_aucs)
test_accuracies_array = np.array(test_accuracies)

# Calculate 95% confidence intervals (percentile-based)
ci_95_lower = float(np.percentile(test_aucs_array, 2.5))
ci_95_upper = float(np.percentile(test_aucs_array, 97.5))

statistics = {
    'mean_test_auc': float(np.mean(test_aucs_array)),
    'std_test_auc': float(np.std(test_aucs_array, ddof=1)),
    'min_test_auc': float(np.min(test_aucs_array)),
    'max_test_auc': float(np.max(test_aucs_array)),
    'median_test_auc': float(np.median(test_aucs_array)),
    'ci_95_lower': ci_95_lower,
    'ci_95_upper': ci_95_upper,
    'best_run_index': int(np.argmax(test_aucs_array)),
    'all_metrics_summary': {
        'accuracy': {
            'mean': float(np.mean(test_accuracies_array)),
            'std': float(np.std(test_accuracies_array, ddof=1))
        }
    }
}

# Add all metrics summary
for metric in ['precision_macro', 'recall_macro', 'f1_macro', 'log_loss']:
    metric_values = [run['test_metrics'][metric] for run in test_results['test_results']['runs']]
    statistics['all_metrics_summary'][metric] = {
        'mean': float(np.mean(metric_values)),
        'std': float(np.std(metric_values, ddof=1))
    }

test_results['test_results']['statistics'] = statistics

print("Test AUC statistics across 10 runs:")
print(f"  Mean:   {statistics['mean_test_auc']:.4f}")
print(f"  Std:    {statistics['std_test_auc']:.4f}")
print(f"  Min:    {statistics['min_test_auc']:.4f}")
print(f"  Max:    {statistics['max_test_auc']:.4f}")
print(f"  Median: {statistics['median_test_auc']:.4f}")
print(f"  95% CI: [{ci_95_lower:.4f}, {ci_95_upper:.4f}]")

print("\nAll metrics summary:")
for metric, values in statistics['all_metrics_summary'].items():
    print(f"  {metric}: {values['mean']:.4f} ± {values['std']:.4f}")

# Overfitting detection
OVERFITTING_THRESHOLD = 0.03

val_auc_mean = test_results['training_summary']['validation_auc_mean']
test_auc_mean = statistics['mean_test_auc']
overall_gap = val_auc_mean - test_auc_mean

overfitting_runs = []
for run in test_results['test_results']['runs']:
    gap = run['generalization_gap']['auc_gap']
    if gap > OVERFITTING_THRESHOLD:
        overfitting_runs.append({
            'run_id': run['run_id'],
            'gap': gap,
            'validation_auc': run['validation_metrics']['roc_auc'],
            'test_auc': run['test_metrics']['roc_auc']
        })

overfitting_analysis = {
    'threshold': OVERFITTING_THRESHOLD,
    'overall_gap': float(overall_gap),
    'overall_status': 'OVERFITTING' if overall_gap > OVERFITTING_THRESHOLD else 'GOOD_GENERALIZATION',
    'n_overfitting_runs': len(overfitting_runs),
    'overfitting_runs': overfitting_runs
}

test_results['overfitting_analysis'] = overfitting_analysis

print("\nOverfitting detection:")
print(f"  Threshold: {OVERFITTING_THRESHOLD}")
print(f"  Overall gap: {overall_gap:.4f}")
print(f"  Status: {overfitting_analysis['overall_status']}")

if overfitting_runs:
    print(f"\n  WARNING: {len(overfitting_runs)} run(s) show overfitting (gap > {OVERFITTING_THRESHOLD}):")
    for run in overfitting_runs:
        print(f"    Run {run['run_id']}: gap = {run['gap']:.4f} "
              f"(val={run['validation_auc']:.4f}, test={run['test_auc']:.4f})")
else:
    print(f"\n  All runs show good generalization (gap <= {OVERFITTING_THRESHOLD})")

# Best performance
best_run_idx = statistics['best_run_index']
best_run = test_results['test_results']['runs'][best_run_idx]

test_results['best_performance'] = {
    'run_id': best_run_idx,
    'random_state': best_run['random_state'],
    'n_features_used': optimal_k,
    'test_auc': best_run['test_metrics']['roc_auc'],
    'all_test_metrics': best_run['test_metrics'],
    'validation_auc': best_run['validation_metrics']['roc_auc'],
    'generalization_gap': best_run['generalization_gap']['auc_gap']
}

# Save results
results_path = f"{BASELINE_2_TESTING_DIR}/baseline_2_test_results_CPU.json"
save_results(test_results, results_path)

print(f"\nTest results saved: {results_path}")

# Generate summary
summary = {
    'experiment': 'Baseline 2 Testing: XGBoost with SelectKBest',
    'feature_selection': {
        'method': 'SelectKBest (f_classif)',
        'n_features_selected': optimal_k,
        'feature_reduction_percent': round(feature_reduction_percent, 1)
    },
    'test_set_size': len(y_test),
    'n_models_tested': len(loaded_models),
    'performance': {
        'validation_auc_mean': val_auc_mean,
        'test_auc_mean': test_auc_mean,
        'generalization_gap': overall_gap
    },
    'best_run': {
        'run_id': best_run_idx,
        'test_auc': best_run['test_metrics']['roc_auc']
    },
    'overfitting_status': overfitting_analysis['overall_status']
}

summary_path = f"{BASELINE_2_TESTING_DIR}/testing_summary.json"
save_results(summary, summary_path)
print(f"Testing summary saved: {summary_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("BASELINE 2 TESTING COMPLETED SUCCESSFULLY")
print("="*80)

print("\nTEST SET EVALUATION:")
print(f"  Method: XGBoost with SelectKBest feature selection")
print(f"  Features: {optimal_k}/{X_test.shape[1]} features (reduction: {feature_reduction_percent:.1f}%)")
print(f"  Test samples: {len(y_test)}")
print(f"  Models tested: {len(loaded_models)}")

print("\nVALIDATION PERFORMANCE (from training):")
print(f"  Mean AUC: {val_auc_mean:.4f} ± {test_results['training_summary']['validation_auc_std']:.4f}")

print("\nTEST PERFORMANCE:")
print(f"  Mean AUC: {statistics['mean_test_auc']:.4f} ± {statistics['std_test_auc']:.4f}")
print(f"  95% CI: [{ci_95_lower:.4f}, {ci_95_upper:.4f}]")
print(f"  Best AUC: {statistics['max_test_auc']:.4f} (Run {best_run_idx})")
print(f"  AUC range: [{statistics['min_test_auc']:.4f}, {statistics['max_test_auc']:.4f}]")

print("\nGENERALIZATION ANALYSIS:")
print(f"  Overall gap (val - test): {overall_gap:.4f}")
print(f"  Status: {overfitting_analysis['overall_status']}")
if overfitting_runs:
    print(f"  WARNING: {len(overfitting_runs)}/10 runs show overfitting")
else:
    print(f"  All runs generalize well")

print("\nALL METRICS (Test Set):")
print(f"  Accuracy:       {statistics['all_metrics_summary']['accuracy']['mean']:.4f} ± {statistics['all_metrics_summary']['accuracy']['std']:.4f}")
print(f"  Precision:      {statistics['all_metrics_summary']['precision_macro']['mean']:.4f} ± {statistics['all_metrics_summary']['precision_macro']['std']:.4f}")
print(f"  Recall:         {statistics['all_metrics_summary']['recall_macro']['mean']:.4f} ± {statistics['all_metrics_summary']['recall_macro']['std']:.4f}")
print(f"  F1-Score:       {statistics['all_metrics_summary']['f1_macro']['mean']:.4f} ± {statistics['all_metrics_summary']['f1_macro']['std']:.4f}")

print("\nOUTPUTS GENERATED:")
print(f"  Test results: {results_path}")
print(f"  Testing summary: {summary_path}")
print(f"  Output directory: {BASELINE_2_TESTING_DIR}")

print("\nNEXT STEPS:")
print("  Cell 7: Baseline 3 Testing - RFECV test set evaluation")
print("  Cell 8: Comprehensive Comparison & Statistical Analysis")

print("\n" + "="*80)
print("READY FOR NEXT BASELINE TESTING")
print("="*80)

BASELINE 2 TESTING: TEST SET EVALUATION

[STEP 1/8] Verifying prerequisites...
--------------------------------------------------
VERIFIED: Test data (397 samples x 25 features)
VERIFIED: Test set integrity maintained

[STEP 2/8] Loading training results from Cell 3...
--------------------------------------------------
Training results loaded: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_3_baseline2/training/baseline_2_training_results.json
Verified: 10 training runs completed
Training mean validation AUC: 0.9964

[STEP 3/8] Extracting feature selection details...
--------------------------------------------------
Feature selection method: SelectKBest (f_classif)
Optimal k: 20
Original features: 25
Selected features: 20
Feature reduction: 20.0%

Selected features:
   1. A1
   2. A2
   3. A3
   4. A4
   5. A5
   6. A6
   7. A7
   8. A8
   9. A9
  10. Social_Responsiveness_Scale
  ... and 10 more

[STEP 4/8] Verifying feature consistency across runs..

In [None]:
# @title Cell 7: Baseline 3 Testing - Test Set Evaluation (CPU)

"""
ASD Detection Project: Baseline Experiments
Cell 7: Baseline 3 Testing - Test set evaluation for XGBoost with RFECV
Evaluate all 10 trained models on held-out test set with selected features
CPU-compatible version matching new training output structure
"""

print("="*80)
print("BASELINE 3 TESTING: TEST SET EVALUATION (CPU)")
print("="*80)

# ==========================================
# 1. VERIFY PREREQUISITES
# ==========================================

print("\n[STEP 1/8] Verifying prerequisites...")
print("-" * 50)

try:
    assert 'X_test' in dir() and X_test.shape[0] == 397
    assert 'y_test' in dir() and len(y_test) == 397
    assert X_test.shape[1] == 25
    print(f"VERIFIED: Test data ({X_test.shape[0]} samples x {X_test.shape[1]} features)")
    print(f"VERIFIED: Test set integrity maintained")
except (NameError, AssertionError) as e:
    raise RuntimeError("ERROR: Cell 1 must be executed first") from e

# ==========================================
# 2. LOAD TRAINING RESULTS
# ==========================================

print("\n[STEP 2/8] Loading training results from Cell 4...")
print("-" * 50)

BASELINE_3_TRAINING_DIR = "/content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_4_baseline3/training"
TESTING_DIR = "/content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_4_baseline3/testing"
os.makedirs(TESTING_DIR, exist_ok=True)

training_results_path = f"{BASELINE_3_TRAINING_DIR}/baseline_3_training_results.json"

if not os.path.exists(training_results_path):
    raise FileNotFoundError(
        f"Training results not found: {training_results_path}\n"
        "ERROR: Cell 4 (Baseline 3 Training) must be executed first"
    )

with open(training_results_path, 'r') as f:
    training_results = json.load(f)

print(f"Training results loaded: {training_results_path}")

# Verify training completed
n_runs_trained = len(training_results['training_results']['runs'])
successful_runs = training_results['training_results']['statistics'].get('successful_runs', n_runs_trained)

if successful_runs == 0:
    raise ValueError("No successful training runs found")

print(f"Verified: {successful_runs}/{n_runs_trained} training runs completed successfully")
print(f"Training mean validation AUC: {training_results['training_results']['statistics']['mean_val_auc']:.4f}")

# ==========================================
# 3. EXTRACT FEATURE SELECTION DETAILS
# ==========================================

print("\n[STEP 3/8] Extracting feature selection details...")
print("-" * 50)

# Get feature selection statistics from training results
stats = training_results['training_results']['statistics']
mean_optimal_features = stats['mean_optimal_features']
std_optimal_features = stats['std_optimal_features']

print(f"Feature selection method: RFECV (wrapper - model-guided)")
print(f"Optimal features (mean): {mean_optimal_features:.1f} ± {std_optimal_features:.1f}")
print(f"Original features: {X_test.shape[1]}")

# Check if all runs selected same number of features
feature_counts = [run['rfecv_results']['optimal_n_features']
                  for run in training_results['training_results']['runs']]
all_same_count = len(set(feature_counts)) == 1

if all_same_count:
    n_features_selected = feature_counts[0]
    print(f"All runs selected: {n_features_selected} features (100% stability)")
else:
    n_features_selected = int(round(mean_optimal_features))
    print(f"Feature selection varies: {min(feature_counts)} to {max(feature_counts)} features")
    print(f"Using mean: {n_features_selected} features")

feature_reduction_percent = (1 - mean_optimal_features / X_test.shape[1]) * 100
print(f"Feature reduction: {feature_reduction_percent:.1f}%")

# ==========================================
# 4. ANALYZE FEATURE STABILITY
# ==========================================

print("\n[STEP 4/8] Analyzing feature stability across runs...")
print("-" * 50)

# Extract selected features from each run
all_selected_features = []
for run in training_results['training_results']['runs']:
    features = run['rfecv_results']['selected_features']
    all_selected_features.append(set(features))

# Count feature frequency
feature_frequency = {}
for feature_set in all_selected_features:
    for feature in feature_set:
        feature_frequency[feature] = feature_frequency.get(feature, 0) + 1

# Sort by frequency
sorted_features = sorted(feature_frequency.items(), key=lambda x: x[1], reverse=True)

# Core features (appear in all runs)
core_features = [f for f, freq in sorted_features if freq == len(all_selected_features)]

print(f"Feature stability analysis (across {len(all_selected_features)} runs):")

if core_features:
    print(f"  Core features (100% frequency): {len(core_features)} features")
    for i, feature in enumerate(core_features[:10], 1):
        print(f"    {i:2d}. {feature}")
    if len(core_features) > 10:
        print(f"    ... and {len(core_features) - 10} more")
else:
    print(f"  No core features (selection varies across runs)")
    print(f"  Most frequent features:")
    for i, (feature, freq) in enumerate(sorted_features[:10], 1):
        print(f"    {i:2d}. {feature}: {freq}/{len(all_selected_features)} runs ({100*freq/len(all_selected_features):.0f}%)")

# Determine reference features for testing
# Use most frequent features up to mean count
reference_features = [f for f, _ in sorted_features[:n_features_selected]]

print(f"\nUsing {len(reference_features)} most frequent features for evaluation")

# Verify reference features exist in test set
missing_features = set(reference_features) - set(X_test.columns)
if missing_features:
    raise ValueError(f"Reference features not in test set: {missing_features}")

print(f"VERIFIED: All reference features available in test set")

# ==========================================
# 5. LOAD ALL TRAINED MODELS
# ==========================================

print("\n[STEP 5/8] Loading all trained models...")
print("-" * 50)

MODELS_DIR = f"{BASELINE_3_TRAINING_DIR}/models"

if not os.path.exists(MODELS_DIR):
    raise FileNotFoundError(f"Models directory not found: {MODELS_DIR}")

loaded_models = []

for run in training_results['training_results']['runs']:
    run_id = run['run_id']

    # Construct model path using updated MODELS_DIR (not from training results JSON)
    model_path = f"{MODELS_DIR}/run_{run_id}_model.json"

    # Check if model file exists
    if not os.path.exists(model_path):
        print(f"  WARNING: Model file not found: {model_path}")
        print(f"  Skipping run {run_id}")
        continue

    # Load model
    try:
        model = xgb.XGBClassifier()
        model.load_model(model_path)

        # Get selected features for this run
        selected_features = run['rfecv_results']['selected_features']

        loaded_models.append({
            'run_id': run_id,
            'model': model,
            'model_path': model_path,
            'selected_features': selected_features,
            'n_features': len(selected_features)
        })
        print(f"  Loaded: run_{run_id}_model.json ({len(selected_features)} features)")
    except Exception as e:
        print(f"  WARNING: Failed to load model {run_id}: {e}")
        print(f"  Skipping run {run_id}")
        continue

if len(loaded_models) == 0:
    raise RuntimeError("No models could be loaded successfully")

print(f"\nSuccessfully loaded {len(loaded_models)}/{len(training_results['training_results']['runs'])} models")

# ==========================================
# 6. EVALUATE ALL MODELS ON TEST SET
# ==========================================

print("\n[STEP 6/8] Evaluating all models on test set...")
print("-" * 50)

test_results = {
    'experiment_info': {
        'name': 'Baseline 3 Testing: XGBoost with RFECV (CPU)',
        'description': 'Test set evaluation for trained models with RFECV selected features',
        'timestamp': datetime.now().isoformat(),
        'test_set_size': len(y_test),
        'n_features_original': X_test.shape[1],
        'n_features_mean': mean_optimal_features,
        'compute_backend': 'CPU'
    },
    'feature_selection_summary': {
        'method': 'RFECV (wrapper - model-guided)',
        'mean_features_selected': mean_optimal_features,
        'std_features_selected': std_optimal_features,
        'feature_reduction_percent': round(feature_reduction_percent, 1),
        'core_features_count': len(core_features),
        'core_features': core_features
    },
    'training_summary': {
        'n_runs': n_runs_trained,
        'successful_runs': successful_runs,
        'validation_auc_mean': stats['mean_val_auc'],
        'validation_auc_std': stats['std_val_auc']
    },
    'test_results': {
        'runs': []
    }
}

test_aucs = []
test_accuracies = []

for model_info in loaded_models:
    run_id = model_info['run_id']
    model = model_info['model']
    selected_features = model_info['selected_features']
    n_features = model_info['n_features']

    print(f"\nRun {run_id}:")
    print("-" * 40)

    # Get validation results from training
    training_run = next(r for r in training_results['training_results']['runs'] if r['run_id'] == run_id)
    val_metrics = training_run['validation_metrics']

    # Transform test set with this run's selected features
    X_test_selected = X_test[selected_features]

    # Predict on test set
    y_test_pred = model.predict(X_test_selected)
    y_test_proba = model.predict_proba(X_test_selected)[:, 1]

    # Calculate test metrics
    test_metrics = calculate_metrics(y_test, y_test_pred, y_test_proba)

    # Store results
    run_result = {
        'run_id': run_id,
        'random_state': training_run['random_state'],
        'n_features_used': n_features,
        'validation_metrics': val_metrics,
        'test_metrics': test_metrics,
        'generalization_gap': {
            'auc_gap': float(val_metrics['roc_auc'] - test_metrics['roc_auc']),
            'accuracy_gap': float(val_metrics['accuracy'] - test_metrics['accuracy'])
        }
    }

    test_results['test_results']['runs'].append(run_result)
    test_aucs.append(test_metrics['roc_auc'])
    test_accuracies.append(test_metrics['accuracy'])

    print(f"  Features used:  {n_features}")
    print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
    print(f"  Test AUC:       {test_metrics['roc_auc']:.4f}")
    print(f"  Gap:            {run_result['generalization_gap']['auc_gap']:.4f}")
    print(f"  Test Accuracy:  {test_metrics['accuracy']:.4f}")

print(f"\nAll {len(loaded_models)} models evaluated on test set")

# ==========================================
# 7. STATISTICAL SUMMARY & ANALYSIS
# ==========================================

print("\n[STEP 7/8] Computing statistical summary and analysis...")
print("-" * 50)

test_aucs_array = np.array(test_aucs)
test_accuracies_array = np.array(test_accuracies)

# Calculate 95% confidence intervals
ci_95_lower = float(np.percentile(test_aucs_array, 2.5))
ci_95_upper = float(np.percentile(test_aucs_array, 97.5))

statistics = {
    'mean_test_auc': float(np.mean(test_aucs_array)),
    'std_test_auc': float(np.std(test_aucs_array, ddof=1)),
    'min_test_auc': float(np.min(test_aucs_array)),
    'max_test_auc': float(np.max(test_aucs_array)),
    'median_test_auc': float(np.median(test_aucs_array)),
    'ci_95_lower': ci_95_lower,
    'ci_95_upper': ci_95_upper,
    'best_run_index': int(np.argmax(test_aucs_array)),
    'models_evaluated': len(loaded_models),
    'all_metrics_summary': {
        'accuracy': {
            'mean': float(np.mean(test_accuracies_array)),
            'std': float(np.std(test_accuracies_array, ddof=1))
        }
    }
}

# Add all metrics summary
for metric in ['precision_macro', 'recall_macro', 'f1_macro', 'log_loss']:
    metric_values = [run['test_metrics'][metric] for run in test_results['test_results']['runs']]
    statistics['all_metrics_summary'][metric] = {
        'mean': float(np.mean(metric_values)),
        'std': float(np.std(metric_values, ddof=1))
    }

test_results['test_results']['statistics'] = statistics

print("Test AUC statistics across evaluated runs:")
print(f"  Mean:   {statistics['mean_test_auc']:.4f}")
print(f"  Std:    {statistics['std_test_auc']:.4f}")
print(f"  Min:    {statistics['min_test_auc']:.4f}")
print(f"  Max:    {statistics['max_test_auc']:.4f}")
print(f"  Median: {statistics['median_test_auc']:.4f}")
print(f"  95% CI: [{ci_95_lower:.4f}, {ci_95_upper:.4f}]")

print("\nAll metrics summary (Test Set):")
for metric, values in statistics['all_metrics_summary'].items():
    print(f"  {metric:20s}: {values['mean']:.4f} ± {values['std']:.4f}")

# ==========================================
# 8. GENERALIZATION ANALYSIS
# ==========================================

print("\n[STEP 8/8] Analyzing generalization performance...")
print("-" * 50)

OVERFITTING_THRESHOLD = 0.03

val_auc_mean = test_results['training_summary']['validation_auc_mean']
test_auc_mean = statistics['mean_test_auc']
overall_gap = val_auc_mean - test_auc_mean

overfitting_runs = []
for run in test_results['test_results']['runs']:
    gap = run['generalization_gap']['auc_gap']
    if gap > OVERFITTING_THRESHOLD:
        overfitting_runs.append({
            'run_id': run['run_id'],
            'gap': gap,
            'validation_auc': run['validation_metrics']['roc_auc'],
            'test_auc': run['test_metrics']['roc_auc']
        })

overfitting_analysis = {
    'threshold': OVERFITTING_THRESHOLD,
    'overall_gap': float(overall_gap),
    'overall_status': 'OVERFITTING' if overall_gap > OVERFITTING_THRESHOLD else 'GOOD_GENERALIZATION',
    'n_overfitting_runs': len(overfitting_runs),
    'overfitting_runs': overfitting_runs
}

test_results['overfitting_analysis'] = overfitting_analysis

print("Generalization analysis:")
print(f"  Threshold: {OVERFITTING_THRESHOLD}")
print(f"  Overall gap (val - test): {overall_gap:.4f}")
print(f"  Status: {overfitting_analysis['overall_status']}")

if overfitting_runs:
    print(f"\n  WARNING: {len(overfitting_runs)} run(s) show overfitting (gap > {OVERFITTING_THRESHOLD}):")
    for run in overfitting_runs:
        print(f"    Run {run['run_id']}: gap = {run['gap']:.4f} "
              f"(val={run['validation_auc']:.4f}, test={run['test_auc']:.4f})")
else:
    print(f"\n  All runs show good generalization (gap <= {OVERFITTING_THRESHOLD})")

# Best performance
best_run_idx_in_tested = statistics['best_run_index']
best_run = test_results['test_results']['runs'][best_run_idx_in_tested]

test_results['best_performance'] = {
    'run_id': best_run['run_id'],
    'random_state': best_run['random_state'],
    'n_features_used': best_run['n_features_used'],
    'test_auc': best_run['test_metrics']['roc_auc'],
    'all_test_metrics': best_run['test_metrics'],
    'validation_auc': best_run['validation_metrics']['roc_auc'],
    'generalization_gap': best_run['generalization_gap']['auc_gap']
}

# Save results
results_path = f"{TESTING_DIR}/baseline_3_test_results.json"
save_results(test_results, results_path)

print(f"\nTest results saved: {results_path}")

# Generate summary
summary = {
    'experiment': 'Baseline 3 Testing: XGBoost with RFECV (CPU)',
    'feature_selection': {
        'method': 'RFECV (wrapper - model-guided)',
        'mean_features_selected': mean_optimal_features,
        'std_features_selected': std_optimal_features,
        'feature_reduction_percent': round(feature_reduction_percent, 1)
    },
    'test_set_size': len(y_test),
    'n_models_tested': len(loaded_models),
    'compute_backend': 'CPU',
    'performance': {
        'validation_auc_mean': val_auc_mean,
        'test_auc_mean': test_auc_mean,
        'generalization_gap': overall_gap
    },
    'best_run': {
        'run_id': best_run['run_id'],
        'test_auc': best_run['test_metrics']['roc_auc']
    },
    'overfitting_status': overfitting_analysis['overall_status']
}

summary_path = f"{TESTING_DIR}/testing_summary.json"
save_results(summary, summary_path)
print(f"Testing summary saved: {summary_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("BASELINE 3 TESTING COMPLETED SUCCESSFULLY (CPU)")
print("="*80)

print("\nTEST SET EVALUATION:")
print(f"  Method: XGBoost with RFECV feature selection")
print(f"  Features (mean): {mean_optimal_features:.1f} ± {std_optimal_features:.1f}")
print(f"  Feature reduction: {feature_reduction_percent:.1f}%")
print(f"  Test samples: {len(y_test)}")
print(f"  Models tested: {len(loaded_models)}/{n_runs_trained}")
print(f"  Compute backend: CPU")

print("\nVALIDATION PERFORMANCE (from training):")
print(f"  Mean AUC: {val_auc_mean:.4f} ± {test_results['training_summary']['validation_auc_std']:.4f}")

print("\nTEST PERFORMANCE:")
print(f"  Mean AUC: {statistics['mean_test_auc']:.4f} ± {statistics['std_test_auc']:.4f}")
print(f"  95% CI: [{ci_95_lower:.4f}, {ci_95_upper:.4f}]")
print(f"  Best AUC: {statistics['max_test_auc']:.4f} (Run {best_run['run_id']})")
print(f"  AUC range: [{statistics['min_test_auc']:.4f}, {statistics['max_test_auc']:.4f}]")

print("\nGENERALIZATION ANALYSIS:")
print(f"  Overall gap (val - test): {overall_gap:.4f}")
print(f"  Status: {overfitting_analysis['overall_status']}")
if overfitting_runs:
    print(f"  WARNING: {len(overfitting_runs)}/{len(loaded_models)} runs show overfitting")
else:
    print(f"  All runs generalize well")

print("\nALL METRICS (Test Set):")
print(f"  Accuracy:       {statistics['all_metrics_summary']['accuracy']['mean']:.4f} ± {statistics['all_metrics_summary']['accuracy']['std']:.4f}")
print(f"  Precision:      {statistics['all_metrics_summary']['precision_macro']['mean']:.4f} ± {statistics['all_metrics_summary']['precision_macro']['std']:.4f}")
print(f"  Recall:         {statistics['all_metrics_summary']['recall_macro']['mean']:.4f} ± {statistics['all_metrics_summary']['recall_macro']['std']:.4f}")
print(f"  F1-Score:       {statistics['all_metrics_summary']['f1_macro']['mean']:.4f} ± {statistics['all_metrics_summary']['f1_macro']['std']:.4f}")

print("\nOUTPUTS GENERATED:")
print(f"  Test results: {results_path}")
print(f"  Testing summary: {summary_path}")
print(f"  Output directory: {TESTING_DIR}")

print("\nNEXT STEPS:")
print("  Cell 8: Comprehensive Comparison & Statistical Analysis")
print("  Compare all three baselines (Baseline 1, 2, 3)")

print("\n" + "="*80)
print("BASELINE 3 TESTING READY FOR COMPARISON")
print("="*80)

BASELINE 3 TESTING: TEST SET EVALUATION (CPU)

[STEP 1/8] Verifying prerequisites...
--------------------------------------------------
VERIFIED: Test data (397 samples x 25 features)
VERIFIED: Test set integrity maintained

[STEP 2/8] Loading training results from Cell 4...
--------------------------------------------------
Training results loaded: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_4_baseline3/training/baseline_3_training_results.json
Verified: 10/10 training runs completed successfully
Training mean validation AUC: 0.9964

[STEP 3/8] Extracting feature selection details...
--------------------------------------------------
Feature selection method: RFECV (wrapper - model-guided)
Optimal features (mean): 25.0 ± 0.0
Original features: 25
All runs selected: 25 features (100% stability)
Feature reduction: 0.0%

[STEP 4/8] Analyzing feature stability across runs...
--------------------------------------------------
Feature stability analysis

try cpu

In [None]:
# @title Cell 2: Baseline 1 Training - XGBoost with All Features (CPU)

"""
ASD Detection Project: Baseline Experiments
Cell 2: Baseline 1 - XGBoost trained with all features (no feature selection)
10 independent runs with unified XGBoost configuration
CPU-compatible implementation for reproducibility
Control group for feature selection comparison
"""

print("="*80)
print("BASELINE 1 TRAINING: XGBOOST WITH ALL FEATURES (CPU)")
print("="*80)

# ==========================================
# 1. VERIFY PREREQUISITES
# ==========================================

print("\n[STEP 1/7] Verifying Cell 1 completion...")
print("-" * 50)

try:
    assert 'X_train' in dir() and X_train.shape[1] == 25
    assert 'X_val' in dir() and X_val.shape[0] == 318
    assert 'CONFIG' in dir() and CONFIG['n_runs'] == 10
    print(f"VERIFIED: Training data ({X_train.shape[0]} samples x {X_train.shape[1]} features)")
    print(f"VERIFIED: Validation data ({X_val.shape[0]} samples)")
    print(f"VERIFIED: Number of runs = {CONFIG['n_runs']}")
except (NameError, AssertionError) as e:
    raise RuntimeError("ERROR: Cell 1 must be executed first") from e

# ==========================================
# 2. CPU-COMPATIBLE XGBOOST CONFIG
# ==========================================

print("\n[STEP 2/7] Setting up CPU-compatible XGBoost configuration...")
print("-" * 50)

# CPU-compatible unified configuration
XGBOOST_UNIFIED_CONFIG_CPU = {
    'n_estimators': 500,
    'max_depth': 8,
    'learning_rate': 0.05,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'tree_method': 'hist',              # CPU-compatible histogram method
    'predictor': 'cpu_predictor',       # Explicit CPU predictor
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'verbosity': 0,
    'random_state': 42  # Will be overridden per run
}

print("CPU-compatible XGBoost configuration:")
print(f"  tree_method: {XGBOOST_UNIFIED_CONFIG_CPU['tree_method']}")
print(f"  predictor: {XGBOOST_UNIFIED_CONFIG_CPU['predictor']}")
print(f"  n_estimators: {XGBOOST_UNIFIED_CONFIG_CPU['n_estimators']}")
print(f"  max_depth: {XGBOOST_UNIFIED_CONFIG_CPU['max_depth']}")
print(f"  learning_rate: {XGBOOST_UNIFIED_CONFIG_CPU['learning_rate']}")

# ==========================================
# 3. EXPERIMENT SETUP
# ==========================================

print("\n[STEP 3/7] Setting up Baseline 1 experiment...")
print("-" * 50)

BASELINE_1_DIR = f"{PROJECT_PATHS['baseline_results']}/baseline_1_all_features"
MODELS_DIR = f"{BASELINE_1_DIR}/models"
os.makedirs(MODELS_DIR, exist_ok=True)

print(f"Experiment directory: {BASELINE_1_DIR}")
print(f"Models directory: {MODELS_DIR}")

print("\nExperiment configuration:")
print(f"  Method: No feature selection (all {X_train.shape[1]} features)")
print(f"  Number of runs: {CONFIG['n_runs']}")
print(f"  Random seeds: {CONFIG['random_state']} to {CONFIG['random_state'] + CONFIG['n_runs'] - 1}")
print(f"  Hardware: CPU-based computation")

# ==========================================
# 4. TRAINING LOOP - 10 INDEPENDENT RUNS
# ==========================================

print("\n[STEP 4/7] Training 10 independent models...")
print("-" * 50)

training_results = {
    'experiment_info': {
        'name': 'Baseline 1: XGBoost with All Features (CPU)',
        'description': 'Control group - no feature selection applied, CPU implementation',
        'timestamp': datetime.now().isoformat(),
        'n_runs': CONFIG['n_runs'],
        'gpu_available': False,
        'compute_backend': 'CPU'
    },
    'configuration': {
        'xgboost_config': XGBOOST_UNIFIED_CONFIG_CPU,
        'n_features': X_train.shape[1],
        'feature_names': X_train.columns.tolist()
    },
    'training_results': {
        'runs': [],
        'statistics': {}
    },
    'best_model': {}
}

start_time_total = time.time()
validation_aucs = []

for run_id in range(CONFIG['n_runs']):
    print(f"\nRun {run_id + 1}/{CONFIG['n_runs']} (seed={CONFIG['random_state'] + run_id})")
    print("-" * 40)

    start_time_run = time.time()

    # Initialize model with unique random seed
    model_config = XGBOOST_UNIFIED_CONFIG_CPU.copy()
    model_config['random_state'] = CONFIG['random_state'] + run_id

    model = xgb.XGBClassifier(**model_config)

    # Train on all features
    print(f"  Training on {X_train.shape[1]} features...")
    model.fit(X_train, y_train, verbose=False)

    # Predict on validation set
    y_val_pred = model.predict(X_val)
    y_val_proba = model.predict_proba(X_val)[:, 1]

    # Calculate validation metrics
    val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)

    # Calculate training time
    training_time = time.time() - start_time_run

    # Save model
    model_path = f"{MODELS_DIR}/run_{run_id}_model.json"
    model.save_model(model_path)

    # Store results
    run_result = {
        'run_id': run_id,
        'random_state': CONFIG['random_state'] + run_id,
        'training_time_seconds': round(training_time, 2),
        'validation_metrics': val_metrics,
        'model_path': model_path
    }

    training_results['training_results']['runs'].append(run_result)
    validation_aucs.append(val_metrics['roc_auc'])

    print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
    print(f"  Validation Accuracy: {val_metrics['accuracy']:.4f}")
    print(f"  Training time: {training_time:.2f}s")
    print(f"  Model saved: {model_path}")

total_training_time = time.time() - start_time_total

print(f"\nTotal training time: {total_training_time:.2f}s ({total_training_time/60:.2f} min)")

# ==========================================
# 5. STATISTICAL SUMMARY
# ==========================================

print("\n[STEP 5/7] Computing statistical summary...")
print("-" * 50)

validation_aucs_array = np.array(validation_aucs)

statistics = {
    'mean_val_auc': float(np.mean(validation_aucs_array)),
    'std_val_auc': float(np.std(validation_aucs_array, ddof=1)),
    'min_val_auc': float(np.min(validation_aucs_array)),
    'max_val_auc': float(np.max(validation_aucs_array)),
    'median_val_auc': float(np.median(validation_aucs_array)),
    'best_run_id': int(np.argmax(validation_aucs_array)),
    'total_training_time_seconds': round(total_training_time, 2)
}

training_results['training_results']['statistics'] = statistics

print("Validation AUC statistics across 10 runs:")
print(f"  Mean:   {statistics['mean_val_auc']:.4f}")
print(f"  Std:    {statistics['std_val_auc']:.4f}")
print(f"  Min:    {statistics['min_val_auc']:.4f}")
print(f"  Max:    {statistics['max_val_auc']:.4f}")
print(f"  Median: {statistics['median_val_auc']:.4f}")
print(f"\nBest run: Run {statistics['best_run_id']} "
      f"(AUC = {statistics['max_val_auc']:.4f})")

# ==========================================
# 6. IDENTIFY AND SAVE BEST MODEL
# ==========================================

print("\n[STEP 6/7] Saving best model...")
print("-" * 50)

best_run_id = statistics['best_run_id']
best_run_result = training_results['training_results']['runs'][best_run_id]

# Load and save best model separately
best_model = xgb.XGBClassifier()
best_model.load_model(best_run_result['model_path'])

best_model_path = f"{BASELINE_1_DIR}/baseline_1_best_model.json"
best_model.save_model(best_model_path)

training_results['best_model'] = {
    'run_id': best_run_id,
    'random_state': best_run_result['random_state'],
    'validation_auc': best_run_result['validation_metrics']['roc_auc'],
    'validation_metrics': best_run_result['validation_metrics'],
    'model_path': best_model_path
}

print(f"Best model: Run {best_run_id}")
print(f"  Validation AUC: {best_run_result['validation_metrics']['roc_auc']:.4f}")
print(f"  Validation Accuracy: {best_run_result['validation_metrics']['accuracy']:.4f}")
print(f"  Saved to: {best_model_path}")

# ==========================================
# 7. FEATURE IMPORTANCE ANALYSIS
# ==========================================

print("\n[STEP 7/7] Analyzing feature importance (best model)...")
print("-" * 50)

feature_importance = best_model.feature_importances_
feature_names = X_train.columns.tolist()

importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': feature_importance
}).sort_values('importance', ascending=False)

print("\nTop 10 most important features:")
for idx, (_, row) in enumerate(importance_df.head(10).iterrows(), 1):
    print(f"  {idx:2d}. {row['feature']:<25s} {row['importance']:.4f}")

importance_csv_path = f"{BASELINE_1_DIR}/feature_importance.csv"
importance_df.to_csv(importance_csv_path, index=False)
print(f"\nFeature importance saved: {importance_csv_path}")

# ==========================================
# 8. SAVE TRAINING RESULTS
# ==========================================

print("\n[STEP 8/7] Saving training results...")
print("-" * 50)

results_path = f"{BASELINE_1_DIR}/baseline_1_training_results_CPU.json"
save_results(training_results, results_path)

print(f"Training results saved: {results_path}")

# Generate summary statistics file
summary = {
    'experiment': 'Baseline 1: XGBoost with All Features (CPU)',
    'n_features': X_train.shape[1],
    'n_runs': CONFIG['n_runs'],
    'compute_backend': 'CPU',
    'validation_auc': {
        'mean': statistics['mean_val_auc'],
        'std': statistics['std_val_auc'],
        'range': [statistics['min_val_auc'], statistics['max_val_auc']]
    },
    'best_run': {
        'run_id': best_run_id,
        'validation_auc': statistics['max_val_auc']
    },
    'training_time_total_minutes': round(total_training_time / 60, 2)
}

summary_path = f"{BASELINE_1_DIR}/training_summary_CPU.json"
save_results(summary, summary_path)
print(f"Training summary saved: {summary_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("BASELINE 1 TRAINING COMPLETED SUCCESSFULLY (CPU)")
print("="*80)

print("\nEXPERIMENT SUMMARY:")
print(f"  Method: XGBoost with all features (no selection)")
print(f"  Features used: {X_train.shape[1]} features")
print(f"  Training runs: {CONFIG['n_runs']}")
print(f"  Compute backend: CPU (tree_method='hist')")
print(f"  Total training time: {total_training_time/60:.2f} minutes")

print("\nVALIDATION PERFORMANCE:")
print(f"  Mean AUC: {statistics['mean_val_auc']:.4f} ± {statistics['std_val_auc']:.4f}")
print(f"  Best AUC: {statistics['max_val_auc']:.4f} (Run {best_run_id})")
print(f"  AUC range: [{statistics['min_val_auc']:.4f}, {statistics['max_val_auc']:.4f}]")

print("\nOUTPUTS GENERATED:")
print(f"  Training results: {results_path}")
print(f"  Best model: {best_model_path}")
print(f"  All models: {MODELS_DIR}/ (10 models)")
print(f"  Feature importance: {importance_csv_path}")

print("\nNEXT STEPS:")
print("  Cell 3: Baseline 2 - XGBoost with SelectKBest (CPU)")
print("  Cell 5: Baseline 1 Testing - Test set evaluation")

print("\n" + "="*80)
print("READY FOR NEXT BASELINE EXPERIMENT")
print("="*80)

BASELINE 1 TRAINING: XGBOOST WITH ALL FEATURES (CPU)

[STEP 1/7] Verifying Cell 1 completion...
--------------------------------------------------
VERIFIED: Training data (1270 samples x 25 features)
VERIFIED: Validation data (318 samples)
VERIFIED: Number of runs = 10

[STEP 2/7] Setting up CPU-compatible XGBoost configuration...
--------------------------------------------------
CPU-compatible XGBoost configuration:
  tree_method: hist
  predictor: cpu_predictor
  n_estimators: 500
  max_depth: 8
  learning_rate: 0.05

[STEP 3/7] Setting up Baseline 1 experiment...
--------------------------------------------------
Experiment directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_1_setup/baseline_1_all_features
Models directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_1_setup/baseline_1_all_features/models

Experiment configuration:
  Method: No feature selection (all 25 features)
  Number of runs: 10
  R

In [None]:
# @title Cell 3: Baseline 2 Training - XGBoost with SelectKBest (CPU)

"""
ASD Detection Project: Baseline Experiments
Cell 3: Baseline 2 - XGBoost with SelectKBest (Filter Method)
Phase 1: K-value optimization (k=[8,12,16,20])
Phase 2: 10 independent runs with optimal k
CPU-compatible implementation for reproducibility
Statistical feature selection using ANOVA F-test
"""

print("="*80)
print("BASELINE 2 TRAINING: XGBOOST WITH SELECTKBEST (CPU)")
print("="*80)

# ==========================================
# 1. VERIFY PREREQUISITES
# ==========================================

print("\n[STEP 1/8] Verifying Cell 1 completion...")
print("-" * 50)

try:
    assert 'X_train' in dir() and X_train.shape[1] == 25
    assert 'X_val' in dir() and X_val.shape[0] == 318
    assert 'SELECTKBEST_CONFIG' in dir()
    assert 'CONFIG' in dir() and CONFIG['n_runs'] == 10
    print(f"VERIFIED: Training data ({X_train.shape[0]} samples x {X_train.shape[1]} features)")
    print(f"VERIFIED: Validation data ({X_val.shape[0]} samples)")
    print(f"VERIFIED: SelectKBest config (k_values={SELECTKBEST_CONFIG['k_values']})")
    print(f"VERIFIED: Number of runs = {CONFIG['n_runs']}")
except (NameError, AssertionError) as e:
    raise RuntimeError("ERROR: Cell 1 must be executed first") from e

# ==========================================
# 2. CPU-COMPATIBLE XGBOOST CONFIG
# ==========================================

print("\n[STEP 2/8] Setting up CPU-compatible XGBoost configuration...")
print("-" * 50)

# CPU-compatible unified configuration
XGBOOST_UNIFIED_CONFIG_CPU = {
    'n_estimators': 500,
    'max_depth': 8,
    'learning_rate': 0.05,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'tree_method': 'hist',              # CPU-compatible histogram method
    'predictor': 'cpu_predictor',       # Explicit CPU predictor
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'verbosity': 0,
    'random_state': 42  # Will be overridden per run
}

print("CPU-compatible XGBoost configuration:")
print(f"  tree_method: {XGBOOST_UNIFIED_CONFIG_CPU['tree_method']}")
print(f"  predictor: {XGBOOST_UNIFIED_CONFIG_CPU['predictor']}")
print(f"  n_estimators: {XGBOOST_UNIFIED_CONFIG_CPU['n_estimators']}")
print(f"  max_depth: {XGBOOST_UNIFIED_CONFIG_CPU['max_depth']}")

# ==========================================
# 3. EXPERIMENT SETUP
# ==========================================

print("\n[STEP 3/8] Setting up Baseline 2 experiment...")
print("-" * 50)

BASELINE_2_DIR = f"{PROJECT_PATHS['baseline_results']}/baseline_2_selectkbest"
MODELS_DIR = f"{BASELINE_2_DIR}/models"
os.makedirs(MODELS_DIR, exist_ok=True)

print(f"Experiment directory: {BASELINE_2_DIR}")
print(f"Models directory: {MODELS_DIR}")

print("\nExperiment configuration:")
print(f"  Method: SelectKBest (Filter - ANOVA F-test)")
print(f"  K-values to test: {SELECTKBEST_CONFIG['k_values']}")
print(f"  Number of runs: {CONFIG['n_runs']}")
print(f"  Hardware: CPU-based computation")
print(f"  Selection: Deterministic (same features per k)")

# ==========================================
# 4. PHASE 1: K-VALUE OPTIMIZATION
# ==========================================

print("\n[STEP 4/8] Phase 1: K-value optimization...")
print("-" * 50)

k_value_results = {}

for k in SELECTKBEST_CONFIG['k_values']:
    print(f"\nTesting k={k} features...")
    print("-" * 40)

    # Initialize SelectKBest
    selector = SelectKBest(score_func=SELECTKBEST_CONFIG['scoring_function'], k=k)

    # Fit on training data and transform
    X_train_selected = selector.fit_transform(X_train, y_train)
    X_val_selected = selector.transform(X_val)

    # Get selected features
    selected_mask = selector.get_support()
    selected_features = X_train.columns[selected_mask].tolist()
    f_scores = selector.scores_[selected_mask]

    print(f"  Selected {len(selected_features)} features")
    print(f"  Top 3: {selected_features[:3]}")

    # Train single XGBoost model with unified config
    model = xgb.XGBClassifier(**XGBOOST_UNIFIED_CONFIG_CPU)
    model.fit(X_train_selected, y_train, verbose=False)

    # Evaluate on validation set
    y_val_pred = model.predict(X_val_selected)
    y_val_proba = model.predict_proba(X_val_selected)[:, 1]
    val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)

    print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
    print(f"  Validation Accuracy: {val_metrics['accuracy']:.4f}")

    # Store results
    k_value_results[k] = {
        'val_auc': val_metrics['roc_auc'],
        'val_metrics': val_metrics,
        'selected_features': selected_features,
        'f_scores': f_scores.tolist(),
        'selector': selector
    }

# Determine optimal k
optimal_k = max(k_value_results.keys(), key=lambda k: k_value_results[k]['val_auc'])

print(f"\n[OPTIMAL K DETERMINED]")
print(f"  Best k-value: {optimal_k}")
print(f"  Validation AUC: {k_value_results[optimal_k]['val_auc']:.4f}")

print("\nK-value optimization results:")
for k in SELECTKBEST_CONFIG['k_values']:
    marker = " <- OPTIMAL" if k == optimal_k else ""
    print(f"  k={k:2d}: AUC = {k_value_results[k]['val_auc']:.4f}{marker}")

# ==========================================
# 5. PREPARE OPTIMAL FEATURE SELECTION
# ==========================================

print("\n[STEP 5/8] Preparing feature selection with optimal k...")
print("-" * 50)

# Use selector from optimal k (already fitted in Phase 1)
optimal_selector = k_value_results[optimal_k]['selector']
selected_features = k_value_results[optimal_k]['selected_features']

print(f"Selected features (k={optimal_k}):")
for i, feature in enumerate(selected_features[:10], 1):
    print(f"  {i:2d}. {feature}")
if len(selected_features) > 10:
    print(f"  ... and {len(selected_features) - 10} more")

# Save feature selection details
feature_selection_details = pd.DataFrame({
    'feature': selected_features,
    'f_score': k_value_results[optimal_k]['f_scores']
}).sort_values('f_score', ascending=False)

feature_details_path = f"{BASELINE_2_DIR}/feature_selection_details.csv"
feature_selection_details.to_csv(feature_details_path, index=False)
print(f"\nFeature selection details saved: {feature_details_path}")

# ==========================================
# 6. PHASE 2: MULTIPLE RUNS WITH OPTIMAL K
# ==========================================

print("\n[STEP 6/8] Phase 2: Training 10 models with optimal k...")
print("-" * 50)

training_results = {
    'experiment_info': {
        'name': 'Baseline 2: XGBoost with SelectKBest (CPU)',
        'description': 'Filter method - ANOVA F-test statistical ranking, CPU implementation',
        'timestamp': datetime.now().isoformat(),
        'n_runs': CONFIG['n_runs'],
        'gpu_available': False,
        'compute_backend': 'CPU'
    },
    'k_value_optimization': {
        'k_values_tested': SELECTKBEST_CONFIG['k_values'],
        'results': {
            str(k): {
                'val_auc': float(results['val_auc']),
                'selected_features': results['selected_features']
            } for k, results in k_value_results.items()
        },
        'optimal_k': optimal_k,
        'optimal_val_auc': float(k_value_results[optimal_k]['val_auc'])
    },
    'feature_selection': {
        'method': 'SelectKBest - f_classif',
        'n_features_selected': optimal_k,
        'selected_features': selected_features,
        'f_scores': {feature: float(score) for feature, score in
                     zip(selected_features, k_value_results[optimal_k]['f_scores'])}
    },
    'configuration': {
        'xgboost_config': XGBOOST_UNIFIED_CONFIG_CPU,
        'selectkbest_config': {k: v for k, v in SELECTKBEST_CONFIG.items() if k != 'scoring_function'}
    },
    'training_results': {
        'runs': [],
        'statistics': {}
    },
    'best_model': {}
}

# Transform data using optimal selector (same for all runs)
X_train_selected = optimal_selector.transform(X_train)
X_val_selected = optimal_selector.transform(X_val)

print(f"Feature selection applied: {X_train.shape[1]} -> {optimal_k} features")
print(f"Same features used across all {CONFIG['n_runs']} runs")

start_time_total = time.time()
validation_aucs = []

for run_id in range(CONFIG['n_runs']):
    print(f"\nRun {run_id + 1}/{CONFIG['n_runs']} (seed={CONFIG['random_state'] + run_id})")
    print("-" * 40)

    start_time_run = time.time()

    # Initialize model with unique random seed
    model_config = XGBOOST_UNIFIED_CONFIG_CPU.copy()
    model_config['random_state'] = CONFIG['random_state'] + run_id

    model = xgb.XGBClassifier(**model_config)

    # Train on selected features
    print(f"  Training on {optimal_k} selected features...")
    model.fit(X_train_selected, y_train, verbose=False)

    # Predict on validation set
    y_val_pred = model.predict(X_val_selected)
    y_val_proba = model.predict_proba(X_val_selected)[:, 1]

    # Calculate validation metrics
    val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)

    # Calculate training time
    training_time = time.time() - start_time_run

    # Save model
    model_path = f"{MODELS_DIR}/run_{run_id}_model.json"
    model.save_model(model_path)

    # Store results
    run_result = {
        'run_id': run_id,
        'random_state': CONFIG['random_state'] + run_id,
        'n_features_selected': optimal_k,
        'training_time_seconds': round(training_time, 2),
        'validation_metrics': val_metrics,
        'model_path': model_path
    }

    training_results['training_results']['runs'].append(run_result)
    validation_aucs.append(val_metrics['roc_auc'])

    print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
    print(f"  Validation Accuracy: {val_metrics['accuracy']:.4f}")
    print(f"  Training time: {training_time:.2f}s")
    print(f"  Model saved: {model_path}")

total_training_time = time.time() - start_time_total

print(f"\nTotal training time: {total_training_time:.2f}s ({total_training_time/60:.2f} min)")

# ==========================================
# 7. STATISTICAL SUMMARY
# ==========================================

print("\n[STEP 7/8] Computing statistical summary...")
print("-" * 50)

validation_aucs_array = np.array(validation_aucs)

statistics = {
    'mean_val_auc': float(np.mean(validation_aucs_array)),
    'std_val_auc': float(np.std(validation_aucs_array, ddof=1)),
    'min_val_auc': float(np.min(validation_aucs_array)),
    'max_val_auc': float(np.max(validation_aucs_array)),
    'median_val_auc': float(np.median(validation_aucs_array)),
    'best_run_id': int(np.argmax(validation_aucs_array)),
    'total_training_time_seconds': round(total_training_time, 2)
}

training_results['training_results']['statistics'] = statistics

print("Validation AUC statistics across 10 runs:")
print(f"  Mean:   {statistics['mean_val_auc']:.4f}")
print(f"  Std:    {statistics['std_val_auc']:.4f}")
print(f"  Min:    {statistics['min_val_auc']:.4f}")
print(f"  Max:    {statistics['max_val_auc']:.4f}")
print(f"  Median: {statistics['median_val_auc']:.4f}")
print(f"\nBest run: Run {statistics['best_run_id']} "
      f"(AUC = {statistics['max_val_auc']:.4f})")

# ==========================================
# 8. IDENTIFY AND SAVE BEST MODEL
# ==========================================

print("\n[STEP 8/8] Saving best model and results...")
print("-" * 50)

best_run_id = statistics['best_run_id']
best_run_result = training_results['training_results']['runs'][best_run_id]

# Load and save best model separately
best_model = xgb.XGBClassifier()
best_model.load_model(best_run_result['model_path'])

best_model_path = f"{BASELINE_2_DIR}/baseline_2_best_model.json"
best_model.save_model(best_model_path)

training_results['best_model'] = {
    'run_id': best_run_id,
    'random_state': best_run_result['random_state'],
    'validation_auc': best_run_result['validation_metrics']['roc_auc'],
    'validation_metrics': best_run_result['validation_metrics'],
    'model_path': best_model_path
}

print(f"Best model: Run {best_run_id}")
print(f"  Validation AUC: {best_run_result['validation_metrics']['roc_auc']:.4f}")
print(f"  Validation Accuracy: {best_run_result['validation_metrics']['accuracy']:.4f}")
print(f"  Features used: {optimal_k}")
print(f"  Saved to: {best_model_path}")

# Save training results
results_path = f"{BASELINE_2_DIR}/baseline_2_training_results_CPU.json"
save_results(training_results, results_path)
print(f"\nTraining results saved: {results_path}")

# Generate summary statistics file
summary = {
    'experiment': 'Baseline 2: XGBoost with SelectKBest (CPU)',
    'feature_selection': {
        'method': 'Filter (ANOVA F-test)',
        'k_values_tested': SELECTKBEST_CONFIG['k_values'],
        'optimal_k': optimal_k,
        'original_features': X_train.shape[1],
        'reduction_percent': round((1 - optimal_k/X_train.shape[1]) * 100, 1)
    },
    'n_runs': CONFIG['n_runs'],
    'compute_backend': 'CPU',
    'validation_auc': {
        'mean': statistics['mean_val_auc'],
        'std': statistics['std_val_auc'],
        'range': [statistics['min_val_auc'], statistics['max_val_auc']]
    },
    'best_run': {
        'run_id': best_run_id,
        'validation_auc': statistics['max_val_auc']
    },
    'training_time_total_minutes': round(total_training_time / 60, 2)
}

summary_path = f"{BASELINE_2_DIR}/training_summary_CPU.json"
save_results(summary, summary_path)
print(f"Training summary saved: {summary_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("BASELINE 2 TRAINING COMPLETED SUCCESSFULLY (CPU)")
print("="*80)

print("\nEXPERIMENT SUMMARY:")
print(f"  Method: SelectKBest (Filter - ANOVA F-test)")
print(f"  Feature selection: {X_train.shape[1]} -> {optimal_k} features")
print(f"  Reduction: {(1 - optimal_k/X_train.shape[1]) * 100:.1f}%")
print(f"  Training runs: {CONFIG['n_runs']}")
print(f"  Compute backend: CPU (tree_method='hist')")
print(f"  Total training time: {total_training_time/60:.2f} minutes")

print("\nK-VALUE OPTIMIZATION:")
for k in SELECTKBEST_CONFIG['k_values']:
    marker = " <- OPTIMAL" if k == optimal_k else ""
    print(f"  k={k:2d}: AUC = {k_value_results[k]['val_auc']:.4f}{marker}")

print("\nVALIDATION PERFORMANCE:")
print(f"  Mean AUC: {statistics['mean_val_auc']:.4f} ± {statistics['std_val_auc']:.4f}")
print(f"  Best AUC: {statistics['max_val_auc']:.4f} (Run {best_run_id})")
print(f"  AUC range: [{statistics['min_val_auc']:.4f}, {statistics['max_val_auc']:.4f}]")

print("\nSELECTED FEATURES (Top 10):")
for i, feature in enumerate(selected_features[:10], 1):
    print(f"  {i:2d}. {feature}")
if len(selected_features) > 10:
    print(f"  ... and {len(selected_features) - 10} more")

print("\nOUTPUTS GENERATED:")
print(f"  Training results: {results_path}")
print(f"  Best model: {best_model_path}")
print(f"  All models: {MODELS_DIR}/ (10 models)")
print(f"  Feature selection: {feature_details_path}")

print("\nNEXT STEPS:")
print("  Cell 4: Baseline 3 - XGBoost with RFECV (CPU)")
print("  Cell 6: Baseline 2 Testing - Test set evaluation")

print("\n" + "="*80)
print("READY FOR NEXT BASELINE EXPERIMENT")
print("="*80)

BASELINE 2 TRAINING: XGBOOST WITH SELECTKBEST (CPU)

[STEP 1/8] Verifying Cell 1 completion...
--------------------------------------------------
VERIFIED: Training data (1270 samples x 25 features)
VERIFIED: Validation data (318 samples)
VERIFIED: SelectKBest config (k_values=[8, 12, 16, 20])
VERIFIED: Number of runs = 10

[STEP 2/8] Setting up CPU-compatible XGBoost configuration...
--------------------------------------------------
CPU-compatible XGBoost configuration:
  tree_method: hist
  predictor: cpu_predictor
  n_estimators: 500
  max_depth: 8

[STEP 3/8] Setting up Baseline 2 experiment...
--------------------------------------------------
Experiment directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_1_setup/baseline_2_selectkbest
Models directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_1_setup/baseline_2_selectkbest/models

Experiment configuration:
  Method: SelectKBest (Filter - ANOVA F-te

In [None]:
# @title Cell 4: Baseline 3 Training - XGBoost with RFECV (CPU)

"""
ASD Detection Project: Baseline Experiments
Cell 4: Baseline 3 - XGBoost with RFECV (Wrapper Method)
10 independent RFECV runs (each performs feature selection separately)
Model-guided iterative feature elimination with cross-validation
CPU-compatible implementation for reproducibility and stability
"""

print("="*80)
print("BASELINE 3 TRAINING: XGBOOST WITH RFECV (CPU)")
print("="*80)

# ==========================================
# 1. VERIFY PREREQUISITES
# ==========================================

print("\n[STEP 1/7] Verifying Cell 1 completion...")
print("-" * 50)

try:
    assert 'X_train' in dir() and X_train.shape[1] == 25
    assert 'X_val' in dir() and X_val.shape[0] == 318
    assert 'RFECV_CONFIG' in dir()
    assert 'CONFIG' in dir() and CONFIG['n_runs'] == 10
    print(f"VERIFIED: Training data ({X_train.shape[0]} samples x {X_train.shape[1]} features)")
    print(f"VERIFIED: Validation data ({X_val.shape[0]} samples)")
    print(f"VERIFIED: RFECV config loaded")
    print(f"VERIFIED: Number of runs = {CONFIG['n_runs']}")
except (NameError, AssertionError) as e:
    raise RuntimeError("ERROR: Cell 1 must be executed first") from e

# ==========================================
# 2. CPU-COMPATIBLE XGBOOST CONFIGS
# ==========================================

print("\n[STEP 2/7] Setting up CPU-compatible XGBoost configurations...")
print("-" * 50)

# Lightweight config for RFECV estimator (faster iterations)
XGBOOST_RFECV_CONFIG_CPU = {
    'n_estimators': 150,
    'max_depth': 6,
    'learning_rate': 0.1,
    'subsample': 1.0,
    'colsample_bytree': 1.0,
    'tree_method': 'hist',          # CPU-compatible histogram method
    'predictor': 'cpu_predictor',   # Explicit CPU predictor
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'verbosity': 0,
    'nthread': 1,                   # Single thread for stability
    'random_state': 42  # Will be overridden per run
}

# Full config for final model training
XGBOOST_UNIFIED_CONFIG_CPU = {
    'n_estimators': 500,
    'max_depth': 8,
    'learning_rate': 0.05,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'tree_method': 'hist',          # CPU-compatible histogram method
    'predictor': 'cpu_predictor',   # Explicit CPU predictor
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'verbosity': 0,
    'nthread': 1,                   # Single thread for stability
    'random_state': 42  # Will be overridden per run
}

print("CPU-compatible RFECV estimator configuration:")
print(f"  tree_method: {XGBOOST_RFECV_CONFIG_CPU['tree_method']}")
print(f"  predictor: {XGBOOST_RFECV_CONFIG_CPU['predictor']}")
print(f"  n_estimators: {XGBOOST_RFECV_CONFIG_CPU['n_estimators']} (lightweight)")
print(f"  nthread: {XGBOOST_RFECV_CONFIG_CPU['nthread']} (sequential)")

print("\nCPU-compatible final model configuration:")
print(f"  tree_method: {XGBOOST_UNIFIED_CONFIG_CPU['tree_method']}")
print(f"  predictor: {XGBOOST_UNIFIED_CONFIG_CPU['predictor']}")
print(f"  n_estimators: {XGBOOST_UNIFIED_CONFIG_CPU['n_estimators']} (full)")

# ==========================================
# 3. EXPERIMENT SETUP
# ==========================================

print("\n[STEP 3/7] Setting up Baseline 3 experiment...")
print("-" * 50)

BASELINE_3_DIR = f"{PROJECT_PATHS['baseline_results']}/baseline_3_rfecv"
MODELS_DIR = f"{BASELINE_3_DIR}/models"
os.makedirs(MODELS_DIR, exist_ok=True)

print(f"Experiment directory: {BASELINE_3_DIR}")
print(f"Models directory: {MODELS_DIR}")

print("\nExperiment configuration:")
print(f"  Method: RFECV (Wrapper - model-guided)")
print(f"  RFECV step: {RFECV_CONFIG['step']} features")
print(f"  RFECV CV: {RFECV_CONFIG['cv_folds']} folds")
print(f"  Min features: {RFECV_CONFIG['min_features_to_select']}")
print(f"  Number of runs: {CONFIG['n_runs']}")
print(f"  Hardware: CPU-based computation")
print(f"  Parallel processing: DISABLED (n_jobs=1, nthread=1 for stability)")

# ==========================================
# 4. TRAINING LOOP - 10 INDEPENDENT RFECV RUNS
# ==========================================

print("\n[STEP 4/7] Running 10 independent RFECV experiments...")
print("-" * 50)
print("Note: Each run may take 10-15 minutes due to cross-validation")

training_results = {
    'experiment_info': {
        'name': 'Baseline 3: XGBoost with RFECV (CPU)',
        'description': 'Wrapper method - recursive feature elimination with CV, CPU implementation',
        'timestamp': datetime.now().isoformat(),
        'n_runs': CONFIG['n_runs'],
        'gpu_available': False,
        'compute_backend': 'CPU',
        'notes': 'n_jobs=1, nthread=1, tree_method=hist for stability and reproducibility'
    },
    'rfecv_configuration': {
        'step': RFECV_CONFIG['step'],
        'cv_folds': RFECV_CONFIG['cv_folds'],
        'min_features_to_select': RFECV_CONFIG['min_features_to_select'],
        'scoring': RFECV_CONFIG['scoring'],
        'n_jobs': 1
    },
    'configuration': {
        'xgboost_rfecv_config': XGBOOST_RFECV_CONFIG_CPU,
        'xgboost_unified_config': XGBOOST_UNIFIED_CONFIG_CPU
    },
    'training_results': {
        'runs': [],
        'statistics': {}
    },
    'feature_stability': {},
    'best_model': {}
}

start_time_total = time.time()
validation_aucs = []
all_selected_features = []
all_optimal_n = []

for run_id in range(CONFIG['n_runs']):
    print(f"\nRun {run_id + 1}/{CONFIG['n_runs']} (seed={CONFIG['random_state'] + run_id})")
    print("-" * 40)

    start_time_run = time.time()

    try:
        # Phase 1: RFECV Feature Selection
        print(f"  Phase 1: RFECV feature selection...")

        # Configure RFECV estimator
        rfecv_estimator_config = XGBOOST_RFECV_CONFIG_CPU.copy()
        rfecv_estimator_config['random_state'] = CONFIG['random_state'] + run_id

        rfecv_estimator = xgb.XGBClassifier(**rfecv_estimator_config)

        # Initialize RFECV with n_jobs=1 for stability
        rfecv = RFECV(
            estimator=rfecv_estimator,
            step=RFECV_CONFIG['step'],
            cv=RFECV_CONFIG['cv_folds'],
            scoring=RFECV_CONFIG['scoring'],
            min_features_to_select=RFECV_CONFIG['min_features_to_select'],
            n_jobs=1,
            verbose=0
        )

        # Fit RFECV on training data
        rfecv_start = time.time()
        print(f"    Starting RFECV (this may take several minutes)...")
        rfecv.fit(X_train, y_train)
        rfecv_time = time.time() - rfecv_start

        # Extract RFECV results
        optimal_n_features = rfecv.n_features_
        selected_mask = rfecv.support_
        feature_ranking = rfecv.ranking_
        cv_scores = rfecv.cv_results_['mean_test_score']
        convergence_iteration = len(cv_scores)

        # Get selected features
        selected_features = X_train.columns[selected_mask].tolist()

        print(f"    RFECV completed in {rfecv_time:.1f}s ({rfecv_time/60:.1f} min)")
        print(f"    Optimal features: {optimal_n_features}/{X_train.shape[1]}")
        print(f"    Best CV AUC: {np.max(cv_scores):.4f}")
        print(f"    Convergence iterations: {convergence_iteration}")

        # Phase 2: Train Final Model with Selected Features
        print(f"  Phase 2: Training final model...")

        # Transform data using selected features
        X_train_selected = rfecv.transform(X_train)
        X_val_selected = rfecv.transform(X_val)

        # Configure final model
        final_model_config = XGBOOST_UNIFIED_CONFIG_CPU.copy()
        final_model_config['random_state'] = CONFIG['random_state'] + run_id

        final_model = xgb.XGBClassifier(**final_model_config)

        # Train final model
        final_model.fit(X_train_selected, y_train, verbose=False)

        # Predict on validation set
        y_val_pred = final_model.predict(X_val_selected)
        y_val_proba = final_model.predict_proba(X_val_selected)[:, 1]

        # Calculate validation metrics
        val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)

        # Calculate total training time
        training_time = time.time() - start_time_run

        # Save RFECV details
        rfecv_details_path = f"{MODELS_DIR}/run_{run_id}_rfecv_details.json"
        rfecv_details = {
            'run_id': run_id,
            'optimal_n_features': int(optimal_n_features),
            'selected_features': selected_features,
            'feature_ranking': {feat: int(rank) for feat, rank in zip(X_train.columns, feature_ranking)},
            'cv_scores_trajectory': [float(score) for score in cv_scores],
            'convergence_iterations': convergence_iteration,
            'rfecv_time_seconds': round(rfecv_time, 2)
        }
        save_results(rfecv_details, rfecv_details_path)

        # Save final model
        model_path = f"{MODELS_DIR}/run_{run_id}_model.json"
        final_model.save_model(model_path)

        # Store results
        run_result = {
            'run_id': run_id,
            'random_state': CONFIG['random_state'] + run_id,
            'rfecv_results': {
                'optimal_n_features': int(optimal_n_features),
                'selected_features': selected_features,
                'convergence_iterations': convergence_iteration,
                'best_cv_score': float(np.max(cv_scores)),
                'cv_scores_trajectory': [float(score) for score in cv_scores],
                'rfecv_time_seconds': round(rfecv_time, 2)
            },
            'training_time_seconds': round(training_time, 2),
            'validation_metrics': val_metrics,
            'model_path': model_path,
            'rfecv_details_path': rfecv_details_path
        }

        training_results['training_results']['runs'].append(run_result)
        validation_aucs.append(val_metrics['roc_auc'])
        all_selected_features.append(set(selected_features))
        all_optimal_n.append(optimal_n_features)

        print(f"  Validation AUC: {val_metrics['roc_auc']:.4f}")
        print(f"  Validation Accuracy: {val_metrics['accuracy']:.4f}")
        print(f"  Total time: {training_time:.2f}s ({training_time/60:.1f} min)")
        print(f"  Model saved: {model_path}")
        print(f"  Run {run_id + 1} completed successfully")

    except Exception as e:
        print(f"  ERROR in Run {run_id + 1}: {str(e)}")
        print(f"  Skipping this run and continuing...")
        continue

total_training_time = time.time() - start_time_total

print(f"\nTotal training time: {total_training_time:.2f}s ({total_training_time/60:.2f} min)")

# ==========================================
# 5. STATISTICAL SUMMARY
# ==========================================

print("\n[STEP 5/7] Computing statistical summary...")
print("-" * 50)

if len(validation_aucs) > 0:
    validation_aucs_array = np.array(validation_aucs)
    optimal_n_array = np.array(all_optimal_n)

    statistics = {
        'mean_val_auc': float(np.mean(validation_aucs_array)),
        'std_val_auc': float(np.std(validation_aucs_array, ddof=1)),
        'min_val_auc': float(np.min(validation_aucs_array)),
        'max_val_auc': float(np.max(validation_aucs_array)),
        'median_val_auc': float(np.median(validation_aucs_array)),
        'best_run_id': int(np.argmax(validation_aucs_array)),
        'mean_optimal_features': float(np.mean(optimal_n_array)),
        'std_optimal_features': float(np.std(optimal_n_array, ddof=1)),
        'total_training_time_seconds': round(total_training_time, 2),
        'successful_runs': len(validation_aucs)
    }

    training_results['training_results']['statistics'] = statistics

    print("Validation AUC statistics across successful runs:")
    print(f"  Mean:   {statistics['mean_val_auc']:.4f}")
    print(f"  Std:    {statistics['std_val_auc']:.4f}")
    print(f"  Min:    {statistics['min_val_auc']:.4f}")
    print(f"  Max:    {statistics['max_val_auc']:.4f}")
    print(f"  Median: {statistics['median_val_auc']:.4f}")

    print("\nFeature selection statistics:")
    print(f"  Mean features selected: {statistics['mean_optimal_features']:.1f}")
    print(f"  Std features: {statistics['std_optimal_features']:.1f}")

    print(f"\nBest run: Run {statistics['best_run_id']} "
          f"(AUC = {statistics['max_val_auc']:.4f})")
else:
    print("ERROR: No successful runs completed")
    raise RuntimeError("All RFECV runs failed")

# ==========================================
# 6. FEATURE STABILITY ANALYSIS
# ==========================================

print("\n[STEP 6/7] Analyzing feature stability...")
print("-" * 50)

# Count feature selection frequency
feature_frequency = {}
for feature_set in all_selected_features:
    for feature in feature_set:
        feature_frequency[feature] = feature_frequency.get(feature, 0) + 1

# Sort by frequency
sorted_features = sorted(feature_frequency.items(), key=lambda x: x[1], reverse=True)

print(f"Feature stability analysis (across {len(all_selected_features)} runs):")
print(f"  Core features (100% frequency):")
core_features = [f for f, freq in sorted_features if freq == len(all_selected_features)]
for feature in core_features:
    print(f"    - {feature}")

if len(core_features) == 0:
    print("    None (RFECV selections vary across runs)")
    print(f"  Most frequent features:")
    for feature, freq in sorted_features[:10]:
        print(f"    - {feature}: {freq}/{len(all_selected_features)} runs ({100*freq/len(all_selected_features):.0f}%)")

training_results['feature_stability'] = {
    'feature_frequency': feature_frequency,
    'core_features': core_features,
    'sorted_by_frequency': [{'feature': f, 'frequency': freq} for f, freq in sorted_features]
}

# ==========================================
# 7. IDENTIFY AND SAVE BEST MODEL
# ==========================================

print("\n[STEP 7/7] Saving best model and results...")
print("-" * 50)

best_run_id = statistics['best_run_id']
best_run_result = training_results['training_results']['runs'][best_run_id]

# Load and save best model separately
best_model = xgb.XGBClassifier()
best_model.load_model(best_run_result['model_path'])

best_model_path = f"{BASELINE_3_DIR}/baseline_3_best_model.json"
best_model.save_model(best_model_path)

training_results['best_model'] = {
    'run_id': best_run_id,
    'random_state': best_run_result['random_state'],
    'validation_auc': best_run_result['validation_metrics']['roc_auc'],
    'validation_metrics': best_run_result['validation_metrics'],
    'optimal_n_features': best_run_result['rfecv_results']['optimal_n_features'],
    'selected_features': best_run_result['rfecv_results']['selected_features'],
    'model_path': best_model_path
}

print(f"Best model: Run {best_run_id}")
print(f"  Validation AUC: {best_run_result['validation_metrics']['roc_auc']:.4f}")
print(f"  Validation Accuracy: {best_run_result['validation_metrics']['accuracy']:.4f}")
print(f"  Features selected: {best_run_result['rfecv_results']['optimal_n_features']}")
print(f"  Saved to: {best_model_path}")

# Save training results
results_path = f"{BASELINE_3_DIR}/baseline_3_training_results_CPU.json"
save_results(training_results, results_path)
print(f"\nTraining results saved: {results_path}")

# Generate summary statistics file
summary = {
    'experiment': 'Baseline 3: XGBoost with RFECV (CPU)',
    'feature_selection': {
        'method': 'Wrapper (Recursive Feature Elimination with CV)',
        'original_features': X_train.shape[1],
        'mean_features_selected': statistics['mean_optimal_features'],
        'std_features_selected': statistics['std_optimal_features'],
        'reduction_percent': round((1 - statistics['mean_optimal_features']/X_train.shape[1]) * 100, 1)
    },
    'n_runs': CONFIG['n_runs'],
    'successful_runs': statistics['successful_runs'],
    'compute_backend': 'CPU',
    'validation_auc': {
        'mean': statistics['mean_val_auc'],
        'std': statistics['std_val_auc'],
        'range': [statistics['min_val_auc'], statistics['max_val_auc']]
    },
    'best_run': {
        'run_id': best_run_id,
        'validation_auc': statistics['max_val_auc'],
        'features_selected': best_run_result['rfecv_results']['optimal_n_features']
    },
    'training_time_total_minutes': round(total_training_time / 60, 2)
}

summary_path = f"{BASELINE_3_DIR}/training_summary_CPU.json"
save_results(summary, summary_path)
print(f"Training summary saved: {summary_path}")

# ==========================================
# COMPLETION SUMMARY
# ==========================================

print("\n" + "="*80)
print("BASELINE 3 TRAINING COMPLETED SUCCESSFULLY (CPU)")
print("="*80)

print("\nEXPERIMENT SUMMARY:")
print(f"  Method: RFECV (Wrapper - model-guided)")
print(f"  Feature selection: {X_train.shape[1]} -> {statistics['mean_optimal_features']:.1f} ± {statistics['std_optimal_features']:.1f} features")
print(f"  Reduction: {(1 - statistics['mean_optimal_features']/X_train.shape[1]) * 100:.1f}%")
print(f"  Training runs: {statistics['successful_runs']}/{CONFIG['n_runs']} successful")
print(f"  Compute backend: CPU (tree_method='hist', n_jobs=1)")
print(f"  Total training time: {total_training_time/60:.2f} minutes")

print("\nVALIDATION PERFORMANCE:")
print(f"  Mean AUC: {statistics['mean_val_auc']:.4f} ± {statistics['std_val_auc']:.4f}")
print(f"  Best AUC: {statistics['max_val_auc']:.4f} (Run {best_run_id})")
print(f"  AUC range: [{statistics['min_val_auc']:.4f}, {statistics['max_val_auc']:.4f}]")

print("\nMOST STABLE FEATURES:")
for feature, freq in sorted_features[:10]:
    print(f"  - {feature}: {freq}/{len(all_selected_features)} runs ({100*freq/len(all_selected_features):.0f}%)")

print("\nOUTPUTS GENERATED:")
print(f"  Training results: {results_path}")
print(f"  Best model: {best_model_path}")
print(f"  All models: {MODELS_DIR}/ ({statistics['successful_runs']} models)")
print(f"  RFECV details: {MODELS_DIR}/*_rfecv_details.json")

print("\nNEXT STEPS:")
print("  Cell 7: Baseline 3 Testing - Test set evaluation")
print("  Cell 8: Comprehensive comparison across all methods")

print("\n" + "="*80)
print("READY FOR TESTING PHASE")
print("="*80)

BASELINE 3 TRAINING: XGBOOST WITH RFECV (CPU)

[STEP 1/7] Verifying Cell 1 completion...
--------------------------------------------------
VERIFIED: Training data (1270 samples x 25 features)
VERIFIED: Validation data (318 samples)
VERIFIED: RFECV config loaded
VERIFIED: Number of runs = 10

[STEP 2/7] Setting up CPU-compatible XGBoost configurations...
--------------------------------------------------
CPU-compatible RFECV estimator configuration:
  tree_method: hist
  predictor: cpu_predictor
  n_estimators: 150 (lightweight)
  nthread: 1 (sequential)

CPU-compatible final model configuration:
  tree_method: hist
  predictor: cpu_predictor
  n_estimators: 500 (full)

[STEP 3/7] Setting up Baseline 3 experiment...
--------------------------------------------------
Experiment directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02/cell_1_setup/baseline_3_rfecv
Models directory: /content/drive/MyDrive/ASD_GWO_XGBoost_Project/03_Results/output_notebook_02