In [1]:
# Standard library imports
import os
import sys
import time
from datetime import datetime
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor, as_completed
from multiprocessing import cpu_count

# Disable GPU (CUDA drivers not available in dev container)
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress TensorFlow warnings

# Set environment variable to limit thread usage per worker
os.environ['OMP_NUM_THREADS'] = '2'

# Third party imports
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline

# Add models directory to path for custom transformers
models_path = Path('../models').resolve()
sys.path.insert(0, str(models_path))

# Import custom transformers from models directory
from logistic_regression_transformers import IDColumnDropper, IQRClipper, ConstantFeatureRemover

# Import ensemble modules from functions package
from functions import ensemble_database
from functions.ensemble_hill_climbing import (
    generate_random_pipeline,
    calculate_ensemble_diversity,
    quick_optimize_pipeline,
    adaptive_simulated_annealing_acceptance,
    update_temperature,
    compute_pipeline_hash,
    log_iteration
)
from functions.ensemble_stage2_model import (
    build_stage2_dnn,
    train_stage2_dnn,
    save_checkpoint,
    load_checkpoint,
    evaluate_ensemble
)

# Configure TensorFlow
tf.get_logger().setLevel('ERROR')

# Detect available CPUs
n_cpus = cpu_count()
print(f"TensorFlow version: {tf.__version__}")
print(f"Available CPUs: {n_cpus}")
print(f"GPU disabled: CUDA drivers not available in dev container")

2025-12-07 02:54:38.952523: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765076078.974403  122710 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765076078.981230  122710 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


TensorFlow version: 2.18.0
Available CPUs: 24
GPU disabled: CUDA drivers not available in dev container


### Configuration

In [2]:
# Random state for reproducibility (only for fixed data splits)
RANDOM_STATE = 315

# Parallel training configuration
BATCH_SIZE = 10  # Train this many candidates in parallel
N_WORKERS = min(10, n_cpus)  # Use up to 10 workers (one per candidate)

# Hill climbing configuration
MAX_ITERATIONS = 500
PLATEAU_ITERATIONS = 100  # Stop if no improvement for this many iterations
BASE_TEMPERATURE = 0.01
TEMPERATURE_DECAY = 0.995

# Stage 2 DNN configuration
STAGE2_BATCH_SIZE_MODELS = 10  # Retrain DNN every N accepted models
STAGE2_EPOCHS = 100
STAGE2_BATCH_SIZE = 128
STAGE2_PATIENCE = 10

# Checkpoint configuration
RESUME_FROM_CHECKPOINT = False  # Set to True to resume from saved checkpoint

# Paths
DATA_DIR = Path('../data')
MODELS_DIR = Path('../models')
ENSEMBLE_DIR = MODELS_DIR / 'ensemble_stage1_models'
CHECKPOINT_PATH = MODELS_DIR / 'ensemble_checkpoint.pkl'

# Create directories
DATA_DIR.mkdir(parents=True, exist_ok=True)
ENSEMBLE_DIR.mkdir(parents=True, exist_ok=True)

# Reset and initialize SQLite database for logging
print("Resetting database for fresh training run...")
ensemble_database.reset_database()
ensemble_database.init_database()

print(f"\nConfiguration:")
print(f"  Random state: {RANDOM_STATE} (for fixed data splits only)")
print(f"  Parallel batch size: {BATCH_SIZE} candidates")
print(f"  Parallel workers: {N_WORKERS}")
print(f"  Max iterations: {MAX_ITERATIONS}")
print(f"  Plateau threshold: {PLATEAU_ITERATIONS}")
print(f"  Stage 2 batch size: {STAGE2_BATCH_SIZE_MODELS} models")
print(f"  Resume from checkpoint: {RESUME_FROM_CHECKPOINT}")
print(f"  Ensemble directory: {ENSEMBLE_DIR}")
print(f"  Database: {ensemble_database.DB_PATH}")

Resetting database for fresh training run...
Database initialized at: /workspaces/diabetes-prediction/data/ensemble_training.db
Database initialized at: /workspaces/diabetes-prediction/data/ensemble_training.db

Configuration:
  Random state: 315 (for fixed data splits only)
  Parallel batch size: 10 candidates
  Parallel workers: 10
  Max iterations: 500
  Plateau threshold: 100
  Stage 2 batch size: 10 models
  Resume from checkpoint: False
  Ensemble directory: ../models/ensemble_stage1_models
  Database: /workspaces/diabetes-prediction/data/ensemble_training.db


### Data Loading

In [3]:
# Load training data
train_df_path = 'https://gperdrizet.github.io/FSA_devops/assets/data/unit3/diabetes_prediction_train.csv'
train_df = pd.read_csv(train_df_path)
train_df.drop_duplicates(inplace=True)

print(f"Training data shape: {train_df.shape}")
print(f"Class distribution:")
print(train_df['diagnosed_diabetes'].value_counts(normalize=True))

# Define label and features
label = 'diagnosed_diabetes'

# Feature definitions (from logistic regression notebook)
numerical_features = [
    'age', 'alcohol_consumption_per_week', 'physical_activity_minutes_per_week',
    'diet_score', 'sleep_hours_per_day', 'screen_time_hours_per_day', 'bmi',
    'waist_to_hip_ratio', 'systolic_bp', 'diastolic_bp', 'heart_rate',
    'cholesterol_total', 'hdl_cholesterol', 'ldl_cholesterol', 'triglycerides',
    'family_history_diabetes', 'hypertension_history', 'cardiovascular_history'
]

ordinal_features = ['education_level', 'income_level']
education_categories = [['No formal', 'Highschool', 'Graduate', 'Postgraduate']]
income_categories = [['Low', 'Lower-Middle', 'Middle', 'Upper-Middle', 'High']]

nominal_features = ['gender', 'ethnicity', 'smoking_status', 'employment_status']

Training data shape: (700000, 26)
Class distribution:
diagnosed_diabetes
1.0    0.623296
0.0    0.376704
Name: proportion, dtype: float64


### Create Base Preprocessor

This preprocessor will be shared across all stage 1 models for consistent encoding.

In [4]:
# Create numerical pipeline
numerical_pipeline = Pipeline([
    ('clipper', IQRClipper(iqr_multiplier=2.0)),
    ('scaler', StandardScaler())
])

# Create encoders
ordinal_encoder = OrdinalEncoder(
    categories=education_categories + income_categories,
    handle_unknown='use_encoded_value',
    unknown_value=-1
)

onehot_encoder = OneHotEncoder(
    drop='first',
    sparse_output=False,
    handle_unknown='ignore'
)

# Create base preprocessor
base_preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_pipeline, numerical_features),
        ('ord', ordinal_encoder, ordinal_features),
        ('nom', onehot_encoder, nominal_features)
    ]
)

print("Base preprocessor created")
print(f"  Numerical features: {len(numerical_features)}")
print(f"  Ordinal features: {len(ordinal_features)}")
print(f"  Nominal features: {len(nominal_features)}")

Base preprocessor created
  Numerical features: 18
  Ordinal features: 2
  Nominal features: 4


## Initialize or Resume Ensemble

### Option 1: Initialize Founder Model (if starting fresh)

In [5]:
if not RESUME_FROM_CHECKPOINT:
    print("=" * 80)
    print("INITIALIZING FOUNDER MODEL")
    print("=" * 80)
    
    ensemble_models = []
    
    # Prepare features and labels
    X_full = train_df.drop(columns=[label])
    y_full = train_df[label]
    
    # Create FIXED three-way split:
    # 1. Training pool (60%) - for training stage 1 models (random samples from this)
    # 2. Stage 1 validation (20%) - for evaluating stage 1 models and training stage 2
    # 3. Stage 2 validation (20%) - for evaluating stage 2 model (held out)
    
    # First split: training pool vs validation
    X_train_pool, X_val_combined, y_train_pool, y_val_combined = train_test_split(
        X_full, 
        y_full, 
        test_size=0.4,  # 40% for validation (will split into 2x 20%)
        random_state=RANDOM_STATE,  # Fixed split
        stratify=y_full
    )
    
    # Second split: stage 1 validation vs stage 2 validation
    X_val_s1, X_val_s2, y_val_s1, y_val_s2 = train_test_split(
        X_val_combined,
        y_val_combined,
        test_size=0.5,  # Split 40% into 2x 20%
        random_state=RANDOM_STATE,  # Fixed split
        stratify=y_val_combined
    )
    
    print(f"\nFixed data split:")
    print("-" * 80)
    print(f"  Training pool: {len(X_train_pool):,} samples (60%)")
    print(f"  Stage 1 validation: {len(X_val_s1):,} samples (20%) - for stage 1 eval & stage 2 training")
    print(f"  Stage 2 validation: {len(X_val_s2):,} samples (20%) - for stage 2 eval (HELD OUT)")
    
    # Random sample size for founder training
    rng = np.random.RandomState(RANDOM_STATE)
    founder_sample_size = rng.randint(10000, 50001)
    
    # Sample from training pool (no random_state = different sample each time)
    X_train, _, y_train, _ = train_test_split(
        X_train_pool,
        y_train_pool,
        train_size=founder_sample_size,
        stratify=y_train_pool
    )
    
    print(f"\nTraining founder model")
    print("-" * 80)
    print(f"  Training samples: {len(X_train):,}")
    
    # Generate random pipeline for founder
    pipeline, metadata = generate_random_pipeline(
        iteration=0,
        random_state=RANDOM_STATE,
        base_preprocessor=base_preprocessor
    )
    
    print(f"  Pipeline config:")
    print(f"    Classifier: {metadata['classifier_type']}")
    print(f"    Transformers: {metadata['transformers_used']}")
    print(f"    Use PCA: {metadata['use_pca']}")
    
    # Train on training sample
    print(f"  Training pipeline...")
    start_time = time.time()
    
    # Just fit the model - no CV, no hyperparameter optimization
    fitted_pipeline = pipeline.fit(X_train, y_train)
    
    training_time = time.time() - start_time
    print(f"  Training complete ({training_time:.1f}s)")
    
    # Generate predictions on FIXED stage 1 validation set
    if hasattr(fitted_pipeline, 'predict_proba'):
        val_pred_s1 = fitted_pipeline.predict_proba(X_val_s1)[:, 1]
    else:
        val_pred_s1 = fitted_pipeline.decision_function(X_val_s1)
    
    # Calculate stage 1 validation AUC
    val_auc_s1 = roc_auc_score(y_val_s1, val_pred_s1)
    
    print(f"  Stage 1 validation AUC: {val_auc_s1:.6f}")
    
    # Save founder model
    ensemble_models.append(fitted_pipeline)
    model_path = ENSEMBLE_DIR / 'founder_model.joblib'
    joblib.dump(fitted_pipeline, model_path)
    
    # Log founder
    pipeline_hash = compute_pipeline_hash(fitted_pipeline, metadata)
    ensemble_id = "founder"
    log_iteration(
        iteration=0,
        accepted=True,
        rejection_reason='founder',
        pipeline_hash=pipeline_hash,
        stage1_val_auc=val_auc_s1,
        stage2_val_auc=val_auc_s1,  # No stage 2 yet, use stage 1 score
        ensemble_size=1,
        diversity_score=0.0,
        temperature=BASE_TEMPERATURE,
        metadata=metadata,
        ensemble_id=ensemble_id
    )
    
    print(f"\n{'=' * 80}")
    print("FOUNDER MODEL COMPLETE")
    print(f"{'=' * 80}")
    print(f"Stage 1 validation AUC: {val_auc_s1:.6f}")

INITIALIZING FOUNDER MODEL

Fixed data split:
--------------------------------------------------------------------------------
  Training pool: 420,000 samples (60%)
  Stage 1 validation: 140,000 samples (20%) - for stage 1 eval & stage 2 training
  Stage 2 validation: 140,000 samples (20%) - for stage 2 eval (HELD OUT)

Training founder model
--------------------------------------------------------------------------------
  Training samples: 39,283
  Pipeline config:
    Classifier: extra_trees
    Transformers: ['ratio', 'log']
    Use PCA: True
  Training pipeline...
  Training complete (1.1s)
  Stage 1 validation AUC: 0.537828

FOUNDER MODEL COMPLETE
Stage 1 validation AUC: 0.537828


### Initialize Ensemble (if starting fresh)

Uses batch-based DNN training: simple mean for first 10 models, then DNN training every 10 accepted models with transfer learning.


In [6]:
if not RESUME_FROM_CHECKPOINT:
    print(f"\n{'=' * 80}")
    print("INITIALIZING ENSEMBLE")
    print(f"{'=' * 80}")
    
    # No stage 2 DNN yet - will train after first batch of models
    stage2_model = None
    
    # Evaluate founder using simple prediction (no ensemble yet)
    if hasattr(fitted_pipeline, 'predict_proba'):
        val_pred_s2 = fitted_pipeline.predict_proba(X_val_s2)[:, 1]
    else:
        val_pred_s2 = fitted_pipeline.decision_function(X_val_s2)
    
    founder_auc = roc_auc_score(y_val_s2, val_pred_s2)
    
    print(f"\nFounder model performance:")
    print(f"  Single model ROC-AUC: {founder_auc:.6f}")
    print(f"  Stage 2 DNN will be trained after {STAGE2_BATCH_SIZE_MODELS} accepted models")
    
    # Set initial best score
    best_ensemble_score = founder_auc
    
    # Initialize hill climbing variables
    start_iteration = 1  # Start from iteration 1 (founder is 0)
    temperature = BASE_TEMPERATURE
    acceptance_history = []



INITIALIZING ENSEMBLE

Founder model performance:
  Single model ROC-AUC: 0.539989
  Stage 2 DNN will be trained after 10 accepted models


## Hill Climbing Loop

Iteratively add diverse models with simulated annealing acceptance.

## Helper Functions for Parallel Training

In [7]:
def train_single_candidate(args):
    """Train a single candidate pipeline in a separate process.
    
    Parameters
    ----------
    args : tuple
        (iteration, X_train, y_train, X_val_s1, y_val_s1, base_preprocessor, random_state)
    
    Returns
    -------
    dict : Dictionary containing:
        - iteration: iteration number
        - fitted_pipeline: trained pipeline
        - metadata: pipeline configuration
        - val_auc_s1: stage 1 validation AUC
        - pipeline_hash: unique pipeline hash
        - training_time: time to train (seconds)
    """
    iteration, X_train, y_train, X_val_s1, y_val_s1, base_preprocessor, random_state = args
    
    start_time = time.time()
    
    # Generate random pipeline
    pipeline, metadata = generate_random_pipeline(
        iteration=iteration,
        random_state=random_state,
        base_preprocessor=base_preprocessor
    )
    
    # Train pipeline
    fitted_pipeline = pipeline.fit(X_train, y_train)
    
    # Evaluate on stage 1 validation
    if hasattr(fitted_pipeline, 'predict_proba'):
        val_pred_s1 = fitted_pipeline.predict_proba(X_val_s1)[:, 1]
    else:
        val_pred_s1 = fitted_pipeline.decision_function(X_val_s1)
    
    val_auc_s1 = roc_auc_score(y_val_s1, val_pred_s1)
    
    # Compute hash
    pipeline_hash = compute_pipeline_hash(fitted_pipeline, metadata)
    
    training_time = time.time() - start_time
    
    return {
        'iteration': iteration,
        'fitted_pipeline': fitted_pipeline,
        'metadata': metadata,
        'val_auc_s1': val_auc_s1,
        'pipeline_hash': pipeline_hash,
        'training_time': training_time
    }

print("Helper functions defined")

Helper functions defined


In [None]:
print(f"\n{'=' * 80}")
print("STARTING PARALLEL HILL CLIMBING LOOP")
print(f"{'=' * 80}")
print(f"Batch size: {BATCH_SIZE} candidates trained in parallel")
print(f"Workers: {N_WORKERS} parallel processes")

iterations_since_improvement = 0
iteration = start_iteration

while iteration < MAX_ITERATIONS and iterations_since_improvement < PLATEAU_ITERATIONS:
    print(f"\n{'=' * 80}")
    print(f"BATCH Starting at iteration {iteration + 1}")
    print(f"{'=' * 80}")
    print(f"Current ensemble size: {len(ensemble_models)}")
    print(f"Best score: {best_ensemble_score:.6f}")
    print(f"Temperature: {temperature:.6f}")
    print(f"Iterations since improvement: {iterations_since_improvement}/{PLATEAU_ITERATIONS}")
    
    # Prepare batch of training jobs
    batch_jobs = []
    for i in range(BATCH_SIZE):
        current_iter = iteration + i
        if current_iter >= MAX_ITERATIONS:
            break
        
        # Random sample size for this iteration
        rng = np.random.RandomState(RANDOM_STATE + current_iter)
        iteration_sample_size = rng.randint(10000, 50001)
        
        # Sample from training pool (no random_state = different sample each time)
        X_train, _, y_train, _ = train_test_split(
            X_train_pool,
            y_train_pool,
            train_size=iteration_sample_size,
            stratify=y_train_pool
        )
        
        batch_jobs.append((
            current_iter,
            X_train,
            y_train,
            X_val_s1,
            y_val_s1,
            base_preprocessor,
            RANDOM_STATE + current_iter
        ))
    
    print(f"\nTraining {len(batch_jobs)} candidates in parallel...")
    batch_start_time = time.time()
    
    # Train candidates in parallel
    trained_candidates = []
    with ProcessPoolExecutor(max_workers=N_WORKERS) as executor:
        futures = {executor.submit(train_single_candidate, job): job for job in batch_jobs}
        
        for future in as_completed(futures):
            try:
                result = future.result()
                trained_candidates.append(result)
                print(f"  âœ“ Iteration {result['iteration'] + 1}: {result['metadata']['classifier_type']} "
                      f"AUC={result['val_auc_s1']:.6f} ({result['training_time']:.1f}s)")
            except Exception as e:
                job = futures[future]
                print(f"  âœ— Iteration {job[0] + 1} failed: {e}")
    
    batch_time = time.time() - batch_start_time
    print(f"\nBatch training complete ({batch_time:.1f}s, {batch_time/len(trained_candidates):.1f}s per model)")
    
    # Sort by iteration number for consistent processing
    trained_candidates.sort(key=lambda x: x['iteration'])
    
    # Process each trained candidate sequentially for acceptance/rejection
    for result in trained_candidates:
        current_iter = result['iteration']
        fitted_pipeline = result['fitted_pipeline']
        metadata = result['metadata']
        val_auc_s1 = result['val_auc_s1']
        pipeline_hash = result['pipeline_hash']
        
        print(f"\n{'-' * 80}")
        print(f"Processing Iteration {current_iter + 1}")
        print(f"  Classifier: {metadata['classifier_type']}")
        print(f"  Transformers: {', '.join(metadata['transformers_used']) if metadata['transformers_used'] else 'None'}")
        print(f"  Stage 1 validation AUC: {val_auc_s1:.6f}")
        
        # Add to candidate pool and evaluate ensemble
        print(f"  Evaluating ensemble with candidate...")
        candidate_ensemble = ensemble_models + [fitted_pipeline]
        
        # Determine how many models have been trained with the current DNN
        if stage2_model is None:
            n_dnn_trained = 0
        else:
            n_dnn_trained = stage2_model.input_shape[1]
        
        # Calculate number of "new" models (accepted but not yet in DNN)
        n_new_models = len(candidate_ensemble) - n_dnn_trained
        
        # Hybrid scoring: DNN for old models + simple mean for new models
        if n_dnn_trained == 0:
            # No DNN yet - use simple mean for all
            all_preds = []
            for model in candidate_ensemble:
                if hasattr(model, 'predict_proba'):
                    pred = model.predict_proba(X_val_s2)[:, 1]
                else:
                    pred = model.decision_function(X_val_s2)
                all_preds.append(pred)
            
            ensemble_pred = np.mean(all_preds, axis=0)
            candidate_score = roc_auc_score(y_val_s2, ensemble_pred)
            aggregation_method = "simple mean (all)"
        
        elif n_new_models == 0:
            # All models are in the DNN - use DNN only
            candidate_score = evaluate_ensemble(
                stage1_models=candidate_ensemble,
                stage2_model=stage2_model,
                X=X_val_s2,
                y=y_val_s2
            )
            aggregation_method = "DNN (all)"
        
        else:
            # Hybrid: DNN for first n_dnn_trained models + simple mean for new models
            # Get DNN predictions for old models
            dnn_models = ensemble_models[:n_dnn_trained]
            
            # Generate stage 1 predictions for DNN models
            dnn_stage1_preds = []
            for model in dnn_models:
                if hasattr(model, 'predict_proba'):
                    pred = model.predict_proba(X_val_s2)[:, 1]
                else:
                    pred = model.decision_function(X_val_s2)
                dnn_stage1_preds.append(pred)
            
            dnn_stage1_preds = np.column_stack(dnn_stage1_preds)
            
            # Get DNN output (these are the weighted predictions)
            dnn_output = stage2_model.predict(dnn_stage1_preds, verbose=0).flatten()
            
            # Get simple mean for new models (including candidate)
            new_models = candidate_ensemble[n_dnn_trained:]
            new_preds = []
            for model in new_models:
                if hasattr(model, 'predict_proba'):
                    pred = model.predict_proba(X_val_s2)[:, 1]
                else:
                    pred = model.decision_function(X_val_s2)
                new_preds.append(pred)
            
            new_mean_pred = np.mean(new_preds, axis=0)
            
            # Combine: weighted average
            # DNN output represents the aggregated prediction for n_dnn_trained models
            # New mean represents the average prediction for n_new_models models
            combined_pred = (dnn_output * n_dnn_trained + new_mean_pred * n_new_models) / len(candidate_ensemble)
            candidate_score = roc_auc_score(y_val_s2, combined_pred)
            aggregation_method = f"hybrid (DNNÃ—{n_dnn_trained} + meanÃ—{n_new_models})"
        
        print(f"  Candidate ensemble AUC ({aggregation_method}): {candidate_score:.6f}")
        
        # Calculate diversity on stage 1 validation set
        all_predictions = []
        for model in candidate_ensemble:
            if hasattr(model, 'predict_proba'):
                pred = model.predict_proba(X_val_s1)[:, 1]
            else:
                pred = model.decision_function(X_val_s1)
            all_predictions.append(pred)
        
        all_predictions = np.column_stack(all_predictions)
        diversity_score = calculate_ensemble_diversity(all_predictions)
        print(f"  Diversity score: {diversity_score:.6f}")
        
        # Simulated annealing acceptance
        accept, reason = adaptive_simulated_annealing_acceptance(
            current_score=best_ensemble_score,
            candidate_score=candidate_score,
            temperature=temperature,
            random_state=RANDOM_STATE + current_iter
        )
        
        print(f"  Decision: {'âœ“ ACCEPT' if accept else 'âœ— REJECT'} ({reason})")
        
        # Log iteration
        ensemble_id = f"iter_{current_iter}"
        log_iteration(
            iteration=current_iter,
            accepted=accept,
            rejection_reason=reason,
            pipeline_hash=pipeline_hash,
            stage1_val_auc=val_auc_s1,
            stage2_val_auc=candidate_score,
            ensemble_size=len(candidate_ensemble) if accept else len(ensemble_models),
            diversity_score=diversity_score,
            temperature=temperature,
            metadata=metadata,
            ensemble_id=ensemble_id
        )
        
        # Update ensemble if accepted
        if accept:
            ensemble_models.append(fitted_pipeline)
            
            # Save model
            model_path = ENSEMBLE_DIR / f'model_{current_iter}.joblib'
            joblib.dump(fitted_pipeline, model_path)
            
            # Check if we should train/retrain stage 2 DNN
            if len(ensemble_models) % STAGE2_BATCH_SIZE_MODELS == 0:
                print(f"\n{'=' * 80}")
                print(f"BATCH COMPLETE: Training stage 2 DNN on {len(ensemble_models)} models")
                print(f"{'=' * 80}")
                
                # Get all predictions on stage 1 validation set
                all_stage1_preds = []
                for model in ensemble_models:
                    if hasattr(model, 'predict_proba'):
                        pred = model.predict_proba(X_val_s1)[:, 1]
                    else:
                        pred = model.decision_function(X_val_s1)
                    all_stage1_preds.append(pred)
                
                X_stage2_train_full = np.column_stack(all_stage1_preds)
                y_stage2_train_full = y_val_s1.values
                
                # Sample for training
                sample_size = min(50000, len(X_stage2_train_full))
                sample_indices = np.random.choice(len(X_stage2_train_full), size=sample_size, replace=False)
                X_stage2_sample = X_stage2_train_full[sample_indices]
                y_stage2_sample = y_stage2_train_full[sample_indices]
                
                # Train/val split
                split_idx = int(len(X_stage2_sample) * 0.8)
                X_train_s2 = X_stage2_sample[:split_idx]
                y_train_s2 = y_stage2_sample[:split_idx]
                X_val_s2_internal = X_stage2_sample[split_idx:]
                y_val_s2_internal = y_stage2_sample[split_idx:]
                
                if stage2_model is None:
                    # First DNN training
                    print(f"\n  Building initial stage 2 DNN...")
                    stage2_model = build_stage2_dnn(
                        n_models=len(ensemble_models),
                        n_layers=1,
                        units_per_layer=32,
                        dropout=0.2,
                        batch_norm=False,
                        activation='relu',
                        learning_rate=0.001
                    )
                else:
                    # Transfer learning: build new DNN with more inputs, copy weights where possible
                    print(f"\n  Transfer learning: expanding DNN from {stage2_model.input_shape[1]} to {len(ensemble_models)} inputs...")
                    
                    # Save old weights
                    old_weights = stage2_model.get_weights()
                    
                    # Build new model
                    new_model = build_stage2_dnn(
                        n_models=len(ensemble_models),
                        n_layers=1,
                        units_per_layer=32,
                        dropout=0.2,
                        batch_norm=False,
                        activation='relu',
                        learning_rate=0.001
                    )
                    
                    # Transfer weights: copy input layer weights for existing models, initialize new ones randomly
                    new_weights = new_model.get_weights()
                    # Input layer weights: shape (n_inputs, 32), bias: shape (32,)
                    # Copy existing input weights for old models, new model inputs already randomly initialized
                    old_n_models = old_weights[0].shape[0]
                    new_weights[0][:old_n_models, :] = old_weights[0]  # Copy old input weights
                    new_weights[1] = old_weights[1]  # Copy input bias
                    
                    # For subsequent layers, only copy if architectures match exactly
                    # Since we're using 1 hidden layer with 32 units consistently, we can copy
                    # But we need to verify the shapes match before copying
                    if len(old_weights) > 2 and len(new_weights) > 2:
                        # Hidden layer weights and biases (layers after input)
                        for i in range(2, len(old_weights)):
                            if old_weights[i].shape == new_weights[i].shape:
                                new_weights[i] = old_weights[i]
                    
                    new_model.set_weights(new_weights)
                    stage2_model = new_model
                
                print(f"\n  Training stage 2 DNN...")
                print(f"    Training samples: {len(X_train_s2):,}")
                print(f"    Validation samples: {len(X_val_s2_internal):,}")
                
                ensemble_id = f"batch_{len(ensemble_models)}"
                stage2_model, history = train_stage2_dnn(
                    model=stage2_model,
                    X_train=X_train_s2,
                    y_train=y_train_s2,
                    X_val=X_val_s2_internal,
                    y_val=y_val_s2_internal,
                    epochs=STAGE2_EPOCHS,
                    batch_size=STAGE2_BATCH_SIZE,
                    patience=STAGE2_PATIENCE,
                    log_path=ensemble_id,
                    iteration=current_iter
                )
                
                # Evaluate on held out stage 2 validation
                final_score = evaluate_ensemble(
                    stage1_models=ensemble_models,
                    stage2_model=stage2_model,
                    X=X_val_s2,
                    y=y_val_s2
                )
                
                print(f"\n  Stage 2 DNN trained!")
                print(f"  DNN ensemble AUC: {final_score:.6f}")
                print(f"{'=' * 80}\n")
            
            # Check if this is the best score
            if candidate_score > best_ensemble_score:
                print(f"  ðŸŽ‰ New best score: {candidate_score:.6f} (Î”={candidate_score - best_ensemble_score:.6f})")
                best_ensemble_score = candidate_score
                iterations_since_improvement = 0
            else:
                iterations_since_improvement += 1
        else:
            iterations_since_improvement += 1
        
        # Update temperature
        temperature = update_temperature(
            iteration=current_iter,
            acceptance_history=[accept],  # Single acceptance for this iteration
            current_temperature=temperature,
            base_temperature=BASE_TEMPERATURE,
            decay_rate=TEMPERATURE_DECAY
        )
    
    # Move to next batch
    iteration += len(trained_candidates)
    
    # Check termination conditions
    if iterations_since_improvement >= PLATEAU_ITERATIONS:
        print(f"\n{'=' * 80}")
        print(f"TERMINATING: No improvement for {PLATEAU_ITERATIONS} iterations")
        print(f"{'=' * 80}")
        break

print(f"\n{'=' * 80}")
print("HILL CLIMBING COMPLETE")
print(f"{'=' * 80}")
print(f"Final ensemble size: {len(ensemble_models)}")
print(f"Best ensemble AUC: {best_ensemble_score:.6f}")
print(f"Total iterations: {iteration}")

# Calculate acceptance rate from database
conn = ensemble_database.sqlite3.connect(ensemble_database.DB_PATH)
acceptance_stats = conn.execute("SELECT COUNT(*) as total, SUM(accepted) as accepted FROM ensemble_log").fetchone()
conn.close()
acceptance_rate = acceptance_stats[1] / acceptance_stats[0] if acceptance_stats[0] > 0 else 0.0
print(f"Acceptance rate: {acceptance_rate:.1%}")


STARTING PARALLEL HILL CLIMBING LOOP
Batch size: 10 candidates trained in parallel
Workers: 10 parallel processes

BATCH Starting at iteration 2
Current ensemble size: 1
Best score: 0.539989
Temperature: 0.010000
Iterations since improvement: 0/100

Training 10 candidates in parallel...
  âœ“ Iteration 3: gradient_boosting AUC=0.568942 (1.3s)
  âœ“ Iteration 2: gradient_boosting AUC=0.641222 (1.5s)
  âœ“ Iteration 6: gradient_boosting AUC=0.564536 (1.0s)
  âœ“ Iteration 5: gradient_boosting AUC=0.609992 (1.2s)
  âœ“ Iteration 4: gradient_boosting AUC=0.594499 (1.5s)
  âœ“ Iteration 9: gradient_boosting AUC=0.634028 (1.0s)
  âœ“ Iteration 7: gradient_boosting AUC=0.630017 (1.5s)
  âœ“ Iteration 10: gradient_boosting AUC=0.626263 (1.1s)
  âœ“ Iteration 8: gradient_boosting AUC=0.579121 (1.5s)
  âœ“ Iteration 11: gradient_boosting AUC=0.599180 (1.1s)

Batch training complete (3.3s, 0.3s per model)

--------------------------------------------------------------------------------
Processin

2025-12-07 02:56:43.745957: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)



  Stage 2 DNN trained!
  DNN ensemble AUC: 0.653950

  ðŸŽ‰ New best score: 0.651176 (Î”=0.000183)

--------------------------------------------------------------------------------
Processing Iteration 13
  Classifier: extra_trees
  Transformers: binning, log, difference
  Stage 1 validation AUC: 0.542726
  Evaluating ensemble with candidate...


InvalidArgumentError: Graph execution error:

Detected at node sequential_1/dense_1/Relu defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/vscode/.local/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/vscode/.local/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/vscode/.local/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 758, in start

  File "/home/vscode/.local/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 211, in start

  File "/usr/local/lib/python3.12/asyncio/base_events.py", line 645, in run_forever

  File "/usr/local/lib/python3.12/asyncio/base_events.py", line 1999, in _run_once

  File "/usr/local/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/home/vscode/.local/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 614, in shell_main

  File "/home/vscode/.local/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 471, in dispatch_shell

  File "/home/vscode/.local/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 366, in execute_request

  File "/home/vscode/.local/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 827, in execute_request

  File "/home/vscode/.local/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 458, in do_execute

  File "/home/vscode/.local/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 663, in run_cell

  File "/home/vscode/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3123, in run_cell

  File "/home/vscode/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3178, in _run_cell

  File "/home/vscode/.local/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/vscode/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3400, in run_cell_async

  File "/home/vscode/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3641, in run_ast_nodes

  File "/home/vscode/.local/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3701, in run_code

  File "/tmp/ipykernel_122710/3483921516.py", line 106, in <module>

  File "/workspaces/diabetes-prediction/notebooks/functions/ensemble_stage2_model.py", line 342, in evaluate_ensemble

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 588, in predict

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 282, in one_step_on_data_distributed

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 125, in wrapper

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 271, in one_step_on_data

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 110, in predict_step

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/layers/layer.py", line 941, in __call__

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/ops/operation.py", line 59, in __call__

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/models/sequential.py", line 220, in call

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/models/functional.py", line 183, in call

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/ops/function.py", line 206, in _run_through_graph

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/models/functional.py", line 644, in call

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/layers/layer.py", line 941, in __call__

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/ops/operation.py", line 59, in __call__

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/layers/core/dense.py", line 191, in call

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/activations/activations.py", line 47, in relu

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/activations/activations.py", line 101, in static_call

  File "/home/vscode/.local/lib/python3.12/site-packages/keras/src/backend/tensorflow/nn.py", line 15, in relu

Matrix size-incompatible: In[0]: [32,11], In[1]: [10,32]
	 [[{{node sequential_1/dense_1/Relu}}]] [Op:__inference_one_step_on_data_distributed_156524]

## Save Final Checkpoint and Metadata

In [None]:
# Save final checkpoint
save_checkpoint(
    checkpoint_path=CHECKPOINT_PATH,
    ensemble_models=ensemble_models,
    stage2_model=stage2_model,
    iteration=iteration - 1,  # Last completed iteration
    temperature=temperature,
    best_score=best_ensemble_score,
    acceptance_history=[],  # Not tracking per-iteration history in parallel mode
    metadata={
        'total_iterations': iteration,
        'final_ensemble_size': len(ensemble_models),
        'acceptance_rate': acceptance_rate,
        'best_score': best_ensemble_score,
        'parallel_batch_size': BATCH_SIZE,
        'n_workers': N_WORKERS
    }
)

# Save ensemble metadata as JSON
import json

metadata_path = MODELS_DIR / 'ensemble_metadata.json'
with open(metadata_path, 'w') as f:
    json.dump({
        'ensemble_size': len(ensemble_models),
        'total_iterations': iteration,
        'best_score': best_ensemble_score,
        'acceptance_rate': acceptance_rate,
        'training_completed': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'random_state': RANDOM_STATE,
        'parallel_batch_size': BATCH_SIZE,
        'n_workers': N_WORKERS
    }, f, indent=2)

print(f"\nFinal checkpoint saved: {CHECKPOINT_PATH}")
print(f"Metadata saved: {metadata_path}")

## Summary Statistics

In [None]:
print(f"\n{'=' * 80}")
print("ENSEMBLE TRAINING SUMMARY")
print(f"{'=' * 80}")
print(f"\nFinal Statistics:")
print(f"  Ensemble size: {len(ensemble_models)}")
print(f"  Best validation AUC: {best_ensemble_score:.6f}")
print(f"  Total iterations: {iteration}")
print(f"  Acceptance rate: {acceptance_rate:.1%}")
print(f"  Parallel configuration:")
print(f"    Batch size: {BATCH_SIZE} candidates")
print(f"    Workers: {N_WORKERS} processes")
print(f"\nFiles created:")
print(f"  SQLite database: {ensemble_database.DB_PATH}")
print(f"  Ensemble models: {ENSEMBLE_DIR}")
print(f"  Checkpoint: {CHECKPOINT_PATH}")
print(f"  Metadata: {metadata_path}")
print(f"\n{'=' * 80}")