In [None]:
# Standard library imports
import os
import sys
import time
from datetime import datetime
from pathlib import Path

# Disable GPU (CUDA drivers not available in dev container)
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress TensorFlow warnings

# Set environment variable to limit thread usage
os.environ['OMP_NUM_THREADS'] = '8'

# Third party imports
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline

# Add models directory to path for custom transformers
models_path = Path('../models').resolve()
sys.path.insert(0, str(models_path))

# Import custom transformers from models directory
from logistic_regression_transformers import IDColumnDropper, IQRClipper, ConstantFeatureRemover

# Import ensemble modules from functions package
from functions import ensemble_database
from functions.ensemble_hill_climbing import (
    generate_random_pipeline,
    calculate_ensemble_diversity,
    quick_optimize_pipeline,
    adaptive_simulated_annealing_acceptance,
    update_temperature,
    compute_pipeline_hash,
    log_iteration
)
from functions.ensemble_stage2_model import (
    build_stage2_dnn,
    train_stage2_dnn,
    save_checkpoint,
    load_checkpoint,
    evaluate_ensemble
)

# Configure TensorFlow
tf.get_logger().setLevel('ERROR')
print(f"TensorFlow version: {tf.__version__}")
print(f"Running on: CPU (24 cores)")
print(f"GPU disabled: CUDA drivers not available in dev container")

2025-12-07 01:05:08.365231: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765069508.387138  104338 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765069508.393957  104338 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


TensorFlow version: 2.18.0
Running on: CPU (24 cores)
GPU disabled: CUDA drivers not available in dev container


### Configuration

In [None]:
# Random state for reproducibility (only for fixed data splits)
RANDOM_STATE = 315

# Hill climbing configuration
MAX_ITERATIONS = 500
PLATEAU_ITERATIONS = 100  # Stop if no improvement for this many iterations
BASE_TEMPERATURE = 0.01
TEMPERATURE_DECAY = 0.995

# Stage 2 DNN configuration
STAGE2_BATCH_SIZE_MODELS = 10  # Retrain DNN every N accepted models
STAGE2_EPOCHS = 100
STAGE2_BATCH_SIZE = 128
STAGE2_PATIENCE = 10

# Checkpoint configuration
RESUME_FROM_CHECKPOINT = False  # Set to True to resume from saved checkpoint

# Paths
DATA_DIR = Path('../data')
MODELS_DIR = Path('../models')
ENSEMBLE_DIR = MODELS_DIR / 'ensemble_stage1_models'
CHECKPOINT_PATH = MODELS_DIR / 'ensemble_checkpoint.pkl'

# Create directories
DATA_DIR.mkdir(parents=True, exist_ok=True)
ENSEMBLE_DIR.mkdir(parents=True, exist_ok=True)

# Reset and initialize SQLite database for logging
print("Resetting database for fresh training run...")
ensemble_database.reset_database()
ensemble_database.init_database()

print(f"\nConfiguration:")
print(f"  Random state: {RANDOM_STATE} (for fixed data splits only)")
print(f"  Max iterations: {MAX_ITERATIONS}")
print(f"  Plateau threshold: {PLATEAU_ITERATIONS}")
print(f"  Stage 2 batch size: {STAGE2_BATCH_SIZE_MODELS} models")
print(f"  Resume from checkpoint: {RESUME_FROM_CHECKPOINT}")
print(f"  Ensemble directory: {ENSEMBLE_DIR}")
print(f"  Database: {ensemble_database.DB_PATH}")

Database initialized at: /workspaces/diabetes-prediction/data/ensemble_training.db
Configuration:
  Random state: 315
  Number of founder models: 5
  Training sample size: 50,000
  Max iterations: 500
  Plateau threshold: 100
  Founder optimization iterations: 5
  Stage 2 batch size: 10 models
  Resume from checkpoint: False
  Ensemble directory: ../models/ensemble_stage1_models
  Database: /workspaces/diabetes-prediction/data/ensemble_training.db


### Data Loading

In [3]:
# Load training data
train_df_path = 'https://gperdrizet.github.io/FSA_devops/assets/data/unit3/diabetes_prediction_train.csv'
train_df = pd.read_csv(train_df_path)
train_df.drop_duplicates(inplace=True)

print(f"Training data shape: {train_df.shape}")
print(f"Class distribution:")
print(train_df['diagnosed_diabetes'].value_counts(normalize=True))

# Define label and features
label = 'diagnosed_diabetes'

# Feature definitions (from logistic regression notebook)
numerical_features = [
    'age', 'alcohol_consumption_per_week', 'physical_activity_minutes_per_week',
    'diet_score', 'sleep_hours_per_day', 'screen_time_hours_per_day', 'bmi',
    'waist_to_hip_ratio', 'systolic_bp', 'diastolic_bp', 'heart_rate',
    'cholesterol_total', 'hdl_cholesterol', 'ldl_cholesterol', 'triglycerides',
    'family_history_diabetes', 'hypertension_history', 'cardiovascular_history'
]

ordinal_features = ['education_level', 'income_level']
education_categories = [['No formal', 'Highschool', 'Graduate', 'Postgraduate']]
income_categories = [['Low', 'Lower-Middle', 'Middle', 'Upper-Middle', 'High']]

nominal_features = ['gender', 'ethnicity', 'smoking_status', 'employment_status']

Training data shape: (700000, 26)
Class distribution:
diagnosed_diabetes
1.0    0.623296
0.0    0.376704
Name: proportion, dtype: float64


### Create Base Preprocessor

This preprocessor will be shared across all stage 1 models for consistent encoding.

In [4]:
# Create numerical pipeline
numerical_pipeline = Pipeline([
    ('clipper', IQRClipper(iqr_multiplier=2.0)),
    ('scaler', StandardScaler())
])

# Create encoders
ordinal_encoder = OrdinalEncoder(
    categories=education_categories + income_categories,
    handle_unknown='use_encoded_value',
    unknown_value=-1
)

onehot_encoder = OneHotEncoder(
    drop='first',
    sparse_output=False,
    handle_unknown='ignore'
)

# Create base preprocessor
base_preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_pipeline, numerical_features),
        ('ord', ordinal_encoder, ordinal_features),
        ('nom', onehot_encoder, nominal_features)
    ]
)

print("Base preprocessor created")
print(f"  Numerical features: {len(numerical_features)}")
print(f"  Ordinal features: {len(ordinal_features)}")
print(f"  Nominal features: {len(nominal_features)}")

Base preprocessor created
  Numerical features: 18
  Ordinal features: 2
  Nominal features: 4


## Initialize or Resume Ensemble

### Option 1: Initialize Founder Model (if starting fresh)

In [None]:
if not RESUME_FROM_CHECKPOINT:
    print("=" * 80)
    print("INITIALIZING FOUNDER MODEL")
    print("=" * 80)
    
    ensemble_models = []
    
    # Prepare features and labels
    X_full = train_df.drop(columns=[label])
    y_full = train_df[label]
    
    # Create FIXED three-way split:
    # 1. Training pool (60%) - for training stage 1 models (random samples from this)
    # 2. Stage 1 validation (20%) - for evaluating stage 1 models and training stage 2
    # 3. Stage 2 validation (20%) - for evaluating stage 2 model (held out)
    
    # First split: training pool vs validation
    X_train_pool, X_val_combined, y_train_pool, y_val_combined = train_test_split(
        X_full, 
        y_full, 
        test_size=0.4,  # 40% for validation (will split into 2x 20%)
        random_state=RANDOM_STATE,  # Fixed split
        stratify=y_full
    )
    
    # Second split: stage 1 validation vs stage 2 validation
    X_val_s1, X_val_s2, y_val_s1, y_val_s2 = train_test_split(
        X_val_combined,
        y_val_combined,
        test_size=0.5,  # Split 40% into 2x 20%
        random_state=RANDOM_STATE,  # Fixed split
        stratify=y_val_combined
    )
    
    print(f"\nFixed data split:")
    print("-" * 80)
    print(f"  Training pool: {len(X_train_pool):,} samples (60%)")
    print(f"  Stage 1 validation: {len(X_val_s1):,} samples (20%) - for stage 1 eval & stage 2 training")
    print(f"  Stage 2 validation: {len(X_val_s2):,} samples (20%) - for stage 2 eval (HELD OUT)")
    
    # Random sample size for founder training
    rng = np.random.RandomState(RANDOM_STATE)
    founder_sample_size = rng.randint(10000, 50001)
    
    # Sample from training pool (no random_state = different sample each time)
    X_train, _, y_train, _ = train_test_split(
        X_train_pool,
        y_train_pool,
        train_size=founder_sample_size,
        stratify=y_train_pool
    )
    
    print(f"\nTraining founder model")
    print("-" * 80)
    print(f"  Training samples: {len(X_train):,}")
    
    # Generate random pipeline for founder
    pipeline, metadata = generate_random_pipeline(
        iteration=0,
        random_state=RANDOM_STATE,
        base_preprocessor=base_preprocessor
    )
    
    print(f"  Pipeline config:")
    print(f"    Classifier: {metadata['classifier_type']}")
    print(f"    Transformers: {metadata['transformers_used']}")
    print(f"    Use PCA: {metadata['use_pca']}")
    
    # Train on training sample
    print(f"  Training pipeline...")
    start_time = time.time()
    
    # Just fit the model - no CV, no hyperparameter optimization
    fitted_pipeline = pipeline.fit(X_train, y_train)
    
    training_time = time.time() - start_time
    print(f"  Training complete ({training_time:.1f}s)")
    
    # Generate predictions on FIXED stage 1 validation set
    if hasattr(fitted_pipeline, 'predict_proba'):
        val_pred_s1 = fitted_pipeline.predict_proba(X_val_s1)[:, 1]
    else:
        val_pred_s1 = fitted_pipeline.decision_function(X_val_s1)
    
    # Calculate stage 1 validation AUC
    val_auc_s1 = roc_auc_score(y_val_s1, val_pred_s1)
    
    print(f"  Stage 1 validation AUC: {val_auc_s1:.6f}")
    
    # Save founder model
    ensemble_models.append(fitted_pipeline)
    model_path = ENSEMBLE_DIR / 'founder_model.joblib'
    joblib.dump(fitted_pipeline, model_path)
    
    # Log founder
    pipeline_hash = compute_pipeline_hash(fitted_pipeline, metadata)
    ensemble_id = "founder"
    log_iteration(
        iteration=0,
        accepted=True,
        rejection_reason='founder',
        pipeline_hash=pipeline_hash,
        stage1_val_auc=val_auc_s1,
        stage2_val_auc=val_auc_s1,  # No stage 2 yet, use stage 1 score
        ensemble_size=1,
        diversity_score=0.0,
        temperature=BASE_TEMPERATURE,
        metadata=metadata,
        ensemble_id=ensemble_id
    )
    
    print(f"\n{'=' * 80}")
    print("FOUNDER MODEL COMPLETE")
    print(f"{'=' * 80}")
    print(f"Stage 1 validation AUC: {val_auc_s1:.6f}")

INITIALIZING FOUNDER MODEL

Fixed data split:
--------------------------------------------------------------------------------
  Training pool: 420,000 samples (60%)
  Stage 1 validation: 140,000 samples (20%) - for stage 1 eval & stage 2 training
  Stage 2 validation: 140,000 samples (20%) - for stage 2 eval (HELD OUT)

Training founder model
--------------------------------------------------------------------------------
  Training samples: 39,283
  Pipeline config:
    Classifier: extra_trees
    Transformers: ['sqrt', 'ratio', 'reciprocal']
    Use PCA: True
  Training pipeline...
  Training complete (3.6s)
  Stage 1 validation ROC-AUC: 0.522364

FOUNDER MODEL COMPLETE
Stage 1 validation ROC-AUC: 0.522364


### Initialize Ensemble (if starting fresh)

Uses batch-based DNN training: simple mean for first 10 models, then DNN training every 10 accepted models with transfer learning.


In [6]:
if not RESUME_FROM_CHECKPOINT:
    print(f"\n{'=' * 80}")
    print("INITIALIZING ENSEMBLE")
    print(f"{'=' * 80}")
    
    # No stage 2 DNN yet - will train after first batch of models
    stage2_model = None
    
    # Evaluate founder using simple prediction (no ensemble yet)
    if hasattr(fitted_pipeline, 'predict_proba'):
        val_pred_s2 = fitted_pipeline.predict_proba(X_val_s2)[:, 1]
    else:
        val_pred_s2 = fitted_pipeline.decision_function(X_val_s2)
    
    founder_auc = roc_auc_score(y_val_s2, val_pred_s2)
    
    print(f"\nFounder model performance:")
    print(f"  Single model ROC-AUC: {founder_auc:.6f}")
    print(f"  Stage 2 DNN will be trained after {STAGE2_BATCH_SIZE_MODELS} accepted models")
    
    # Set initial best score
    best_ensemble_score = founder_auc
    
    # Initialize hill climbing variables
    start_iteration = 1  # Start from iteration 1 (founder is 0)
    temperature = BASE_TEMPERATURE
    acceptance_history = []



INITIALIZING ENSEMBLE

Founder model performance:
  Single model ROC-AUC: 0.523996
  Stage 2 DNN will be trained after 10 accepted models


## Hill Climbing Loop

Iteratively add diverse models with simulated annealing acceptance.

In [None]:
print(f"\n{'=' * 80}")
print("STARTING HILL CLIMBING LOOP")
print(f"{'=' * 80}")

iterations_since_improvement = 0
consecutive_rejections = 0

for iteration in range(start_iteration, MAX_ITERATIONS):
    print(f"\n{'=' * 80}")
    print(f"Iteration {iteration + 1}/{MAX_ITERATIONS}")
    print(f"{'=' * 80}")
    print(f"Current ensemble size: {len(ensemble_models)}")
    print(f"Best score: {best_ensemble_score:.6f}")
    print(f"Temperature: {temperature:.6f}")
    print(f"Iterations since improvement: {iterations_since_improvement}/{PLATEAU_ITERATIONS}")
    
    # Random sample size for this iteration
    rng = np.random.RandomState(RANDOM_STATE + iteration)
    iteration_sample_size = rng.randint(10000, 50001)
    
    # Sample from training pool (no random_state = different sample each time)
    # Both validation sets remain FIXED
    X_train, _, y_train, _ = train_test_split(
        X_train_pool,
        y_train_pool,
        train_size=iteration_sample_size,
        stratify=y_train_pool
    )
    
    print(f"  Training samples: {len(X_train):,}")
    print(f"  Stage 1 validation: {len(X_val_s1):,} (FIXED)")
    print(f"  Stage 2 validation: {len(X_val_s2):,} (HELD OUT)")
    
    # Generate random pipeline
    pipeline, metadata = generate_random_pipeline(
        iteration=iteration,
        random_state=RANDOM_STATE + iteration,
        base_preprocessor=base_preprocessor
    )
    
    print(f"\nPipeline configuration:")
    print(f"  Classifier: {metadata['classifier_type']}")
    print(f"  Transformers: {', '.join(metadata['transformers_used']) if metadata['transformers_used'] else 'None'}")
    
    # Train pipeline (no cross-validation, no hyperparameter optimization)
    print(f"\nTraining pipeline...")
    fitted_pipeline = pipeline.fit(X_train, y_train)
    
    # Evaluate on FIXED stage 1 validation set
    if hasattr(fitted_pipeline, 'predict_proba'):
        val_pred_s1 = fitted_pipeline.predict_proba(X_val_s1)[:, 1]
    else:
        val_pred_s1 = fitted_pipeline.decision_function(X_val_s1)
    
    val_auc_s1 = roc_auc_score(y_val_s1, val_pred_s1)
    print(f"  Stage 1 validation AUC: {val_auc_s1:.6f}")
    
    # Add to candidate pool and evaluate ensemble
    print(f"\nEvaluating ensemble with candidate...")
    candidate_ensemble = ensemble_models + [fitted_pipeline]
    
    # Determine aggregation method
    if stage2_model is None or len(candidate_ensemble) < STAGE2_BATCH_SIZE_MODELS:
        # Use simple mean for evaluation
        all_preds = []
        for model in candidate_ensemble:
            if hasattr(model, 'predict_proba'):
                pred = model.predict_proba(X_val_s2)[:, 1]
            else:
                pred = model.decision_function(X_val_s2)
            all_preds.append(pred)
        
        ensemble_pred = np.mean(all_preds, axis=0)
        candidate_score = roc_auc_score(y_val_s2, ensemble_pred)
        aggregation_method = "simple mean"
    else:
        # Use trained DNN for evaluation
        candidate_score = evaluate_ensemble(
            stage1_models=candidate_ensemble,
            stage2_model=stage2_model,
            X=X_val_s2,
            y=y_val_s2
        )
        aggregation_method = "DNN weighted"
    
    print(f"  Candidate ensemble AUC ({aggregation_method}): {candidate_score:.6f}")
    
    # Calculate diversity on stage 1 validation set
    all_predictions = []
    for model in candidate_ensemble:
        if hasattr(model, 'predict_proba'):
            pred = model.predict_proba(X_val_s1)[:, 1]
        else:
            pred = model.decision_function(X_val_s1)
        all_predictions.append(pred)
    
    all_predictions = np.column_stack(all_predictions)
    diversity_score = calculate_ensemble_diversity(all_predictions)
    print(f"  Diversity score: {diversity_score:.6f}")
    
    # Simulated annealing acceptance
    accept, reason = adaptive_simulated_annealing_acceptance(
        current_score=best_ensemble_score,
        candidate_score=candidate_score,
        temperature=temperature,
        random_state=RANDOM_STATE + iteration
    )
    
    print(f"\nDecision: {'âœ“ ACCEPT' if accept else 'âœ— REJECT'}")
    print(f"  Reason: {reason}")
    
    # Log iteration
    pipeline_hash = compute_pipeline_hash(fitted_pipeline, metadata)
    ensemble_id = f"iter_{iteration}"
    log_iteration(
        iteration=iteration,
        accepted=accept,
        rejection_reason=reason,
        pipeline_hash=pipeline_hash,
        stage1_val_auc=val_auc_s1,
        stage2_val_auc=candidate_score,
        ensemble_size=len(candidate_ensemble) if accept else len(ensemble_models),
        diversity_score=diversity_score,
        temperature=temperature,
        metadata=metadata,
        ensemble_id=ensemble_id
    )
    
    # Update ensemble if accepted
    if accept:
        ensemble_models.append(fitted_pipeline)
        acceptance_history.append(True)
        consecutive_rejections = 0
        
        # Save model
        model_path = ENSEMBLE_DIR / f'model_{iteration}.joblib'
        joblib.dump(fitted_pipeline, model_path)
        
        # Check if we should train/retrain stage 2 DNN
        if len(ensemble_models) % STAGE2_BATCH_SIZE_MODELS == 0:
            print(f"\n{'=' * 80}")
            print(f"BATCH COMPLETE: Training stage 2 DNN on {len(ensemble_models)} models")
            print(f"{'=' * 80}")
            
            # Get all predictions on stage 1 validation set
            all_stage1_preds = []
            for model in ensemble_models:
                if hasattr(model, 'predict_proba'):
                    pred = model.predict_proba(X_val_s1)[:, 1]
                else:
                    pred = model.decision_function(X_val_s1)
                all_stage1_preds.append(pred)
            
            X_stage2_train_full = np.column_stack(all_stage1_preds)
            y_stage2_train_full = y_val_s1.values
            
            # Sample for training
            sample_size = min(50000, len(X_stage2_train_full))
            sample_indices = np.random.choice(len(X_stage2_train_full), size=sample_size, replace=False)
            X_stage2_sample = X_stage2_train_full[sample_indices]
            y_stage2_sample = y_stage2_train_full[sample_indices]
            
            # Train/val split
            split_idx = int(len(X_stage2_sample) * 0.8)
            X_train_s2 = X_stage2_sample[:split_idx]
            y_train_s2 = y_stage2_sample[:split_idx]
            X_val_s2_internal = X_stage2_sample[split_idx:]
            y_val_s2_internal = y_stage2_sample[split_idx:]
            
            if stage2_model is None:
                # First DNN training
                print(f"\n  Building initial stage 2 DNN...")
                stage2_model = build_stage2_dnn(
                    n_models=len(ensemble_models),
                    n_layers=1,
                    units_per_layer=32,
                    dropout=0.2,
                    batch_norm=False,
                    activation='relu',
                    learning_rate=0.001
                )
            else:
                # Transfer learning: build new DNN with more inputs, copy weights where possible
                print(f"\n  Transfer learning: expanding DNN from {stage2_model.input_shape[1]} to {len(ensemble_models)} inputs...")
                
                # Save old weights
                old_weights = stage2_model.get_weights()
                
                # Build new model
                new_model = build_stage2_dnn(
                    n_models=len(ensemble_models),
                    n_layers=1,
                    units_per_layer=32,
                    dropout=0.2,
                    batch_norm=False,
                    activation='relu',
                    learning_rate=0.001
                )
                
                # Transfer weights: copy input layer weights for existing models, initialize new ones randomly
                new_weights = new_model.get_weights()
                # Input layer weights: shape (n_inputs, 32), bias: shape (32,)
                # Copy existing weights, new model weights already randomly initialized
                old_n_models = old_weights[0].shape[0]
                new_weights[0][:old_n_models, :] = old_weights[0]  # Copy old input weights
                # Bias and other layers can be copied directly
                new_weights[1] = old_weights[1]  # Input bias
                if len(old_weights) > 2:  # Copy remaining layers
                    for i in range(2, len(old_weights)):
                        new_weights[i] = old_weights[i]
                
                new_model.set_weights(new_weights)
                stage2_model = new_model
            
            print(f"\n  Training stage 2 DNN...")
            print(f"    Training samples: {len(X_train_s2):,}")
            print(f"    Validation samples: {len(X_val_s2_internal):,}")
            
            ensemble_id = f"batch_{len(ensemble_models)}"
            stage2_model, history = train_stage2_dnn(
                model=stage2_model,
                X_train=X_train_s2,
                y_train=y_train_s2,
                X_val=X_val_s2_internal,
                y_val=y_val_s2_internal,
                epochs=STAGE2_EPOCHS,
                batch_size=STAGE2_BATCH_SIZE,
                patience=STAGE2_PATIENCE,
                log_path=ensemble_id,
                iteration=iteration
            )
            
            # Evaluate on held out stage 2 validation
            final_score = evaluate_ensemble(
                stage1_models=ensemble_models,
                stage2_model=stage2_model,
                X=X_val_s2,
                y=y_val_s2
            )
            
            print(f"\n  Stage 2 DNN trained!")
            print(f"  DNN ensemble AUC: {final_score:.6f}")
            print(f"{'=' * 80}\n")
        
        # Check if this is the best score
        if candidate_score > best_ensemble_score:
            print(f"  ðŸŽ‰ New best score: {candidate_score:.6f} (Î”={candidate_score - best_ensemble_score:.6f})")
            best_ensemble_score = candidate_score
            iterations_since_improvement = 0
        else:
            iterations_since_improvement += 1
    else:
        acceptance_history.append(False)
        consecutive_rejections += 1
        iterations_since_improvement += 1
    
    # Update temperature
    temperature = update_temperature(
        iteration=iteration,
        acceptance_history=acceptance_history,
        current_temperature=temperature,
        base_temperature=BASE_TEMPERATURE,
        decay_rate=TEMPERATURE_DECAY
    )
    
    # Check termination conditions
    if iterations_since_improvement >= PLATEAU_ITERATIONS:
        print(f"\n{'=' * 80}")
        print(f"TERMINATING: No improvement for {PLATEAU_ITERATIONS} iterations")
        print(f"{'=' * 80}")
        break

print(f"\n{'=' * 80}")
print("HILL CLIMBING COMPLETE")
print(f"{'=' * 80}")
print(f"Final ensemble size: {len(ensemble_models)}")
print(f"Best ensemble AUC: {best_ensemble_score:.6f}")
print(f"Total iterations: {iteration + 1}")
print(f"Acceptance rate: {sum(acceptance_history) / len(acceptance_history):.1%}")


STARTING HILL CLIMBING LOOP

Iteration 2/500
Current ensemble size: 1
Best score: 0.523996
Temperature: 0.010000
Iterations since improvement: 0/100
  Training samples: 27,213
  Stage 1 validation: 140,000 (FIXED)
  Stage 2 validation: 140,000 (HELD OUT)

Pipeline configuration:
  Classifier: mlp
  Transformers: reciprocal, kde, binning

Training pipeline...
  Training samples: 27,213
  Stage 1 validation: 140,000 (FIXED)
  Stage 2 validation: 140,000 (HELD OUT)

Pipeline configuration:
  Classifier: mlp
  Transformers: reciprocal, kde, binning

Training pipeline...




  Stage 1 validation ROC-AUC: 0.502696

Evaluating ensemble with candidate...
  Candidate ensemble ROC-AUC (simple mean): 0.511511


## Save Final Checkpoint and Metadata

In [None]:
# Save final checkpoint
save_checkpoint(
    checkpoint_path=CHECKPOINT_PATH,
    ensemble_models=ensemble_models,
    stage2_model=stage2_model,
    iteration=iteration,
    temperature=temperature,
    best_score=best_ensemble_score,
    acceptance_history=acceptance_history,
    metadata={
        'total_iterations': iteration + 1,
        'final_ensemble_size': len(ensemble_models),
        'acceptance_rate': sum(acceptance_history) / len(acceptance_history),
        'best_score': best_ensemble_score
    }
)

# Save ensemble metadata as JSON
import json

metadata_path = MODELS_DIR / 'ensemble_metadata.json'
with open(metadata_path, 'w') as f:
    json.dump({
        'ensemble_size': len(ensemble_models),
        'total_iterations': iteration + 1,
        'best_score': best_ensemble_score,
        'acceptance_rate': sum(acceptance_history) / len(acceptance_history),
        'training_completed': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'random_state': RANDOM_STATE
    }, f, indent=2)

print(f"\nFinal checkpoint saved: {CHECKPOINT_PATH}")
print(f"Metadata saved: {metadata_path}")

## Summary Statistics

In [None]:
print(f"\n{'=' * 80}")
print("ENSEMBLE TRAINING SUMMARY")
print(f"{'=' * 80}")
print(f"\nFinal Statistics:")
print(f"  Ensemble size: {len(ensemble_models)}")
print(f"  Best validation AUC: {best_ensemble_score:.6f}")
print(f"  Total iterations: {iteration + 1}")
print(f"  Accepted models: {sum(acceptance_history)}")
print(f"  Rejected models: {len(acceptance_history) - sum(acceptance_history)}")
print(f"  Acceptance rate: {sum(acceptance_history) / len(acceptance_history):.1%}")
print(f"\nFiles created:")
print(f"  SQLite database: {ensemble_database.DB_PATH}")
print(f"  Ensemble models: {ENSEMBLE_DIR}")
print(f"  Checkpoint: {CHECKPOINT_PATH}")
print(f"  Metadata: {metadata_path}")
print(f"\n{'=' * 80}")