In [None]:
# Standard library imports
import os
import sys
import time
from datetime import datetime
from pathlib import Path

# Disable GPU (CUDA drivers not available in dev container)
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress TensorFlow warnings

# Set environment variable to limit thread usage
os.environ['OMP_NUM_THREADS'] = '8'

# Third party imports
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder

# Add models directory to path for custom transformers
models_path = Path('../models').resolve()
sys.path.insert(0, str(models_path))

# Import custom transformers
from logistic_regression_transformers import IDColumnDropper, IQRClipper

# Import utility functions from notebooks/functions
from functions.ensemble_transformers import *
from functions.ensemble_hill_climbing import (
    generate_random_pipeline,
    calculate_ensemble_diversity,
    quick_optimize_pipeline,
    adaptive_simulated_annealing_acceptance,
    update_temperature,
    compute_pipeline_hash,
    log_iteration
)
from functions.ensemble_stage2_model import (
    build_stage2_dnn,
    train_stage2_dnn,
    save_checkpoint,
    load_checkpoint
)
from functions import ensemble_database

# Configure TensorFlow
tf.get_logger().setLevel('ERROR')
print(f"TensorFlow version: {tf.__version__}")
print(f"Running on: CPU (24 cores)")
print(f"GPU disabled: CUDA drivers not available in dev container")

2025-12-06 15:39:44.177993: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-06 15:39:44.182253: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-06 15:39:44.192806: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765035584.214168   59200 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765035584.220827   59200 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


ModuleNotFoundError: No module named 'ensemble_transformers'

### Configuration

In [None]:
# Random state for reproducibility
RANDOM_STATE = 315

# K-fold configuration
N_FOLDS = 5

# Hill climbing configuration
MAX_ITERATIONS = 500
PLATEAU_ITERATIONS = 100  # Stop if no improvement for this many iterations
BASE_TEMPERATURE = 0.01
TEMPERATURE_DECAY = 0.995

# Stage 1 optimization
QUICK_OPTIMIZE_ITERATIONS = 10
QUICK_OPTIMIZE_CV = 3
QUICK_OPTIMIZE_JOBS = 8

# Founder ensemble optimization (more thorough)
FOUNDER_OPTIMIZE_ITERATIONS = 20
FOUNDER_OPTIMIZE_CV = 3

# Stage 2 DNN configuration
STAGE2_EPOCHS = 100
STAGE2_BATCH_SIZE = 128
STAGE2_PATIENCE = 10

# Paths
DATA_DIR = Path('../data')
MODELS_DIR = Path('../models')
ENSEMBLE_DIR = MODELS_DIR / 'ensemble_stage1_models'
CHECKPOINT_PATH = MODELS_DIR / 'ensemble_checkpoint.pkl'

# Create directories
DATA_DIR.mkdir(parents=True, exist_ok=True)
ENSEMBLE_DIR.mkdir(parents=True, exist_ok=True)

# Initialize SQLite database for logging
ensemble_database.init_database()

print(f"Configuration:")
print(f"  Random state: {RANDOM_STATE}")
print(f"  K-folds: {N_FOLDS}")
print(f"  Max iterations: {MAX_ITERATIONS}")
print(f"  Plateau threshold: {PLATEAU_ITERATIONS}")
print(f"  Ensemble directory: {ENSEMBLE_DIR}")
print(f"  Database: {ensemble_database.DB_PATH}")

### Data Loading

In [None]:
# Load training data
train_df_path = 'https://gperdrizet.github.io/FSA_devops/assets/data/unit3/diabetes_prediction_train.csv'
train_df = pd.read_csv(train_df_path)
train_df.drop_duplicates(inplace=True)

print(f"Training data shape: {train_df.shape}")
print(f"Class distribution:")
print(train_df['diagnosed_diabetes'].value_counts(normalize=True))

# Define label and features
label = 'diagnosed_diabetes'

# Feature definitions (from logistic regression notebook)
numerical_features = [
    'age', 'alcohol_consumption_per_week', 'physical_activity_minutes_per_week',
    'diet_score', 'sleep_hours_per_day', 'screen_time_hours_per_day', 'bmi',
    'waist_to_hip_ratio', 'systolic_bp', 'diastolic_bp', 'heart_rate',
    'cholesterol_total', 'hdl_cholesterol', 'ldl_cholesterol', 'triglycerides',
    'family_history_diabetes', 'hypertension_history', 'cardiovascular_history'
]

ordinal_features = ['education_level', 'income_level']
education_categories = [['No formal', 'Highschool', 'Graduate', 'Postgraduate']]
income_categories = [['Low', 'Lower-Middle', 'Middle', 'Upper-Middle', 'High']]

nominal_features = ['gender', 'ethnicity', 'smoking_status', 'employment_status']

### Create K-Fold Splits

In [None]:
# Create stratified k-fold splits
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)

# Store fold indices
fold_indices = []
for train_idx, val_idx in skf.split(train_df, train_df[label]):
    fold_indices.append((train_idx, val_idx))

print(f"Created {N_FOLDS} stratified folds:")
for i, (train_idx, val_idx) in enumerate(fold_indices):
    print(f"  Fold {i}: {len(train_idx)} train, {len(val_idx)} val")

### Create Base Preprocessor

This preprocessor will be shared across all stage 1 models for consistent encoding.

In [None]:
# Create numerical pipeline
numerical_pipeline = Pipeline([
    ('clipper', IQRClipper(iqr_multiplier=2.0)),
    ('scaler', StandardScaler())
])

# Create encoders
ordinal_encoder = OrdinalEncoder(
    categories=education_categories + income_categories,
    handle_unknown='use_encoded_value',
    unknown_value=-1
)

onehot_encoder = OneHotEncoder(
    drop='first',
    sparse_output=False,
    handle_unknown='ignore'
)

# Create base preprocessor
base_preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_pipeline, numerical_features),
        ('ord', ordinal_encoder, ordinal_features),
        ('nom', onehot_encoder, nominal_features)
    ]
)

print("Base preprocessor created")
print(f"  Numerical features: {len(numerical_features)}")
print(f"  Ordinal features: {len(ordinal_features)}")
print(f"  Nominal features: {len(nominal_features)}")

## Check for Existing Checkpoint

In [None]:
if not RESUME_FROM_CHECKPOINT:
    print(f"\n{'=' * 80}")
    print("CREATING FOUNDER ENSEMBLE (5 diverse models)")
    print(f"{'=' * 80}")
    
    ensemble_models = []
    founder_scores = []
    fold_predictions = {}
    fold_labels = {}
    
    for fold in range(N_FOLDS):
        print(f"\n--- Founder Model {fold + 1}/{N_FOLDS} (fold {fold}) ---")
        
        # Generate random pipeline
        pipeline, metadata = generate_random_pipeline(
            iteration=fold,
            random_state=RANDOM_STATE + fold,
            base_preprocessor=base_preprocessor
        )
        
        # Quick optimize (more thorough for founder)
        optimized_pipeline, cv_score = quick_optimize_pipeline(
            pipeline=pipeline,
            X_train=X_train,
            y_train=y_train,
            random_state=RANDOM_STATE + fold,
            n_iter=FOUNDER_OPTIMIZE_ITERATIONS,
            cv_folds=FOUNDER_OPTIMIZE_CV,
            n_jobs=QUICK_OPTIMIZE_JOBS
        )
        
        print(f"  CV ROC-AUC: {cv_score:.6f}")
        
        # Evaluate on this fold's validation set
        X_val, y_val = fold_data[fold]['X_val'], fold_data[fold]['y_val']
        
        # Get predictions
        if hasattr(optimized_pipeline, 'predict_proba'):
            val_pred = optimized_pipeline.predict_proba(X_val)[:, 1]
        else:
            val_pred = optimized_pipeline.decision_function(X_val)
        
        fold_predictions[fold] = val_pred
        fold_labels[fold] = y_val.values
        
        # Calculate validation AUC
        from sklearn.metrics import roc_auc_score
        val_auc = roc_auc_score(y_val, val_pred)
        founder_scores.append(val_auc)
        
        print(f"  Validation ROC-AUC: {val_auc:.6f}")
        
        # Save founder model
        ensemble_models.append(optimized_pipeline)
        model_path = ENSEMBLE_DIR / f'founder_model_{fold}.joblib'
        joblib.dump(optimized_pipeline, model_path)
        
        # Log founder
        pipeline_hash = compute_pipeline_hash(optimized_pipeline, metadata)
        ensemble_id = f"founder_{fold}"
        log_iteration(
            iteration=fold,
            fold=fold,
            accepted=True,
            rejection_reason='founder',
            pipeline_hash=pipeline_hash,
            stage1_cv_score=cv_score,
            stage1_val_auc=val_auc,
            stage2_val_auc=0.0,  # Will update after stage 2 training
            ensemble_size=fold + 1,
            diversity_score=0.0,
            temperature=BASE_TEMPERATURE,
            metadata=metadata,
            ensemble_id=ensemble_id
        )
    
    print(f"\n{'=' * 80}")
    print(f"FOUNDER ENSEMBLE COMPLETE")
    print(f"{'=' * 80}")
    print(f"Mean validation ROC-AUC: {np.mean(founder_scores):.6f} Â± {np.std(founder_scores):.6f}")
    print(f"Per-fold scores: {[f'{s:.6f}' for s in founder_scores]}")

## Initialize or Resume Ensemble

### Option 1: Initialize Founder Ensemble (if starting fresh)

In [None]:
if not RESUME_FROM_CHECKPOINT:
    print("=" * 80)
    print("INITIALIZING FOUNDER ENSEMBLE (5 models, one per fold)")
    print("=" * 80)
    
    ensemble_models = []
    fold_predictions = [None] * N_FOLDS
    fold_labels = [None] * N_FOLDS
    founder_scores = []
    
    # Train one founder model per fold
    for fold in range(N_FOLDS):
        print(f"\nTraining founder model {fold + 1}/{N_FOLDS} (Fold {fold})")
        print("-" * 80)
        
        # Get fold data
        train_idx, val_idx = fold_indices[fold]
        X_train = train_df.iloc[train_idx]
        y_train = train_df.iloc[train_idx][label]
        X_val = train_df.iloc[val_idx]
        y_val = train_df.iloc[val_idx][label]
        
        # Generate random pipeline for founder
        pipeline, metadata = generate_random_pipeline(
            iteration=fold,
            random_state=RANDOM_STATE + fold,
            base_preprocessor=base_preprocessor
        )
        
        print(f"  Pipeline config:")
        print(f"    Classifier: {metadata['classifier_type']}")
        print(f"    Transformers: {metadata['transformers_used']}")
        print(f"    Use PCA: {metadata['use_pca']}")
        
        # Optimize on full training data (all folds)
        print(f"  Optimizing on full training data...")
        start_time = time.time()
        
        optimized_pipeline, cv_score = quick_optimize_pipeline(
            pipeline=pipeline,
            X=train_df.drop(columns=[label]),
            y=train_df[label],
            n_iter=FOUNDER_OPTIMIZE_ITERATIONS,
            cv=FOUNDER_OPTIMIZE_CV,
            n_jobs=QUICK_OPTIMIZE_JOBS,
            random_state=RANDOM_STATE + fold
        )
        
        optimization_time = time.time() - start_time
        print(f"  Optimization complete ({optimization_time:.1f}s)")
        print(f"  CV ROC-AUC: {cv_score:.6f}")
        
        # Generate predictions on validation fold
        if hasattr(optimized_pipeline, 'predict_proba'):
            val_pred = optimized_pipeline.predict_proba(X_val)[:, 1]
        else:
            val_pred = optimized_pipeline.decision_function(X_val)
        
        fold_predictions[fold] = val_pred
        fold_labels[fold] = y_val.values
        
        # Calculate validation AUC
        from sklearn.metrics import roc_auc_score
        val_auc = roc_auc_score(y_val, val_pred)
        founder_scores.append(val_auc)
        
        print(f"  Validation ROC-AUC: {val_auc:.6f}")
        
        # Save founder model
        ensemble_models.append(optimized_pipeline)
        model_path = ENSEMBLE_DIR / f'founder_model_{fold}.joblib'
        joblib.dump(optimized_pipeline, model_path)
        
        # Log founder
        pipeline_hash = compute_pipeline_hash(optimized_pipeline, metadata)
        log_iteration(
            log_path=TRAINING_LOG_PATH,
            iteration=fold,
            fold=fold,
            accepted=True,
            rejection_reason='founder',
            pipeline_hash=pipeline_hash,
            stage1_cv_score=cv_score,
            stage1_val_auc=val_auc,
            stage2_val_auc=0.0,  # Will update after stage 2 training
            ensemble_size=fold + 1,
            diversity_score=0.0,
            temperature=BASE_TEMPERATURE,
            metadata=metadata
        )
    
    print(f"\n{'=' * 80}")
    print(f"FOUNDER ENSEMBLE COMPLETE")
    print(f"{'=' * 80}")
    print(f"Mean validation ROC-AUC: {np.mean(founder_scores):.6f} Â± {np.std(founder_scores):.6f}")
    print(f"Per-fold scores: {[f'{s:.6f}' for s in founder_scores]}")

### Train Initial Stage 2 DNN (if starting fresh)

In [None]:
if not RESUME_FROM_CHECKPOINT:
    print(f"\n{'=' * 80}")
    print("TRAINING INITIAL STAGE 2 DNN")
    print(f"{'=' * 80}")
    
    # For initial stage 2, use simple architecture
    stage2_model = build_stage2_dnn(
        n_models=len(ensemble_models),
        n_layers=1,
        units_per_layer=64,
        dropout=0.3,
        batch_norm=True,
        activation='relu',
        learning_rate=0.001
    )
    
    print(f"Initial stage 2 DNN architecture:")
    stage2_model.summary()
    
    # Train on each fold and evaluate
    fold_stage2_scores = []
    
    for fold in range(N_FOLDS):
        print(f"\nTraining stage 2 on fold {fold}...")
        
        # Get predictions for this fold
        X_stage2 = fold_predictions[fold].reshape(-1, 1)
        y_stage2 = fold_labels[fold]
        
        # Use a simple train/val split for initial training
        split_idx = int(len(X_stage2) * 0.8)
        X_train_s2 = X_stage2[:split_idx]
        y_train_s2 = y_stage2[:split_idx]
        X_val_s2 = X_stage2[split_idx:]
        y_val_s2 = y_stage2[split_idx:]
        
        # Train (pass ensemble_id as log_path parameter for logging)
        ensemble_id = f"founder_stage2_fold{fold}"
        stage2_model, history = train_stage2_dnn(
            model=stage2_model,
            X_train=X_train_s2,
            y_train=y_train_s2,
            X_val=X_val_s2,
            y_val=y_val_s2,
            epochs=50,
            batch_size=32,
            patience=10,
            log_path=ensemble_id,  # Pass ensemble_id as string
            iteration=fold,
            fold=fold
        )
        
        # Evaluate on full fold
        fold_pred_s2 = stage2_model.predict(X_stage2, verbose=0).flatten()
        fold_auc_s2 = roc_auc_score(y_stage2, fold_pred_s2)
        fold_stage2_scores.append(fold_auc_s2)
        
        print(f"  Fold {fold} stage 2 ROC-AUC: {fold_auc_s2:.6f}")
    
    # Set initial best score
    best_ensemble_score = np.mean(fold_stage2_scores)
    
    print(f"\nInitial ensemble performance:")
    print(f"  Mean stage 2 ROC-AUC: {best_ensemble_score:.6f} Â± {np.std(fold_stage2_scores):.6f}")
    
    # Initialize hill climbing variables
    start_iteration = N_FOLDS
    current_fold = 0
    temperature = BASE_TEMPERATURE
    acceptance_history = []

## Hill Climbing Loop

Iteratively add diverse models with simulated annealing acceptance.

In [None]:
print(f"\n{'=' * 80}")
print("STARTING HILL CLIMBING LOOP")
print(f"{'=' * 80}")

iterations_since_improvement = 0
consecutive_rejections = 0

for iteration in range(start_iteration, MAX_ITERATIONS):
    print(f"\n{'=' * 80}")
    print(f"Iteration {iteration + 1}/{MAX_ITERATIONS}")
    print(f"{'=' * 80}")
    print(f"Current ensemble size: {len(ensemble_models)}")
    print(f"Best score: {best_ensemble_score:.6f}")
    print(f"Temperature: {temperature:.6f}")
    print(f"Iterations since improvement: {iterations_since_improvement}/{PLATEAU_ITERATIONS}")
    
    # Get current fold
    current_fold = iteration % N_FOLDS
    print(f"Using fold {current_fold} for validation")
    
    # Generate random pipeline
    pipeline, metadata = generate_random_pipeline(
        iteration=iteration,
        random_state=RANDOM_STATE + iteration,
        base_preprocessor=base_preprocessor
    )
    
    print(f"\nPipeline configuration:")
    print(f"  Row sample: {metadata['row_sample_pct']:.1%}")
    print(f"  Col sample: {metadata['col_sample_pct']:.1%}")
    print(f"  Classifier: {metadata['classifier_type']}")
    print(f"  Transformers: {', '.join(metadata['transformers_used']) if metadata['transformers_used'] else 'None'}")
    
    # Quick optimize
    print(f"\nOptimizing pipeline...")
    optimized_pipeline, cv_score = quick_optimize_pipeline(
        pipeline=pipeline,
        X_train=X_train,
        y_train=y_train,
        random_state=RANDOM_STATE + iteration,
        n_iter=QUICK_OPTIMIZE_ITERATIONS,
        cv_folds=QUICK_OPTIMIZE_CV,
        n_jobs=QUICK_OPTIMIZE_JOBS
    )
    
    print(f"  Stage 1 CV ROC-AUC: {cv_score:.6f}")
    
    # Evaluate on validation fold
    X_val_fold = fold_data[current_fold]['X_val']
    y_val_fold = fold_data[current_fold]['y_val']
    
    if hasattr(optimized_pipeline, 'predict_proba'):
        val_pred = optimized_pipeline.predict_proba(X_val_fold)[:, 1]
    else:
        val_pred = optimized_pipeline.decision_function(X_val_fold)
    
    val_auc = roc_auc_score(y_val_fold, val_pred)
    print(f"  Stage 1 validation ROC-AUC: {val_auc:.6f}")
    
    # Add to candidate pool and retrain stage 2
    print(f"\nEvaluating ensemble with candidate...")
    candidate_ensemble = ensemble_models + [optimized_pipeline]
    
    # Evaluate ensemble on current fold
    candidate_score = evaluate_ensemble(
        stage1_models=candidate_ensemble,
        stage2_model=stage2_model,
        X=X_val_fold,
        y=y_val_fold
    )
    
    print(f"  Candidate ensemble ROC-AUC: {candidate_score:.6f}")
    
    # Calculate diversity
    all_predictions = []
    for model in candidate_ensemble:
        if hasattr(model, 'predict_proba'):
            pred = model.predict_proba(X_val_fold)[:, 1]
        else:
            pred = model.decision_function(X_val_fold)
        all_predictions.append(pred)
    
    all_predictions = np.column_stack(all_predictions)
    diversity_score = calculate_ensemble_diversity(all_predictions)
    print(f"  Diversity score: {diversity_score:.6f}")
    
    # Simulated annealing acceptance
    accept, reason = adaptive_simulated_annealing_acceptance(
        current_score=best_ensemble_score,
        candidate_score=candidate_score,
        temperature=temperature,
        random_state=RANDOM_STATE + iteration
    )
    
    print(f"\nDecision: {'âœ“ ACCEPT' if accept else 'âœ— REJECT'}")
    print(f"  Reason: {reason}")
    
    # Log iteration
    pipeline_hash = compute_pipeline_hash(optimized_pipeline, metadata)
    ensemble_id = f"iter_{iteration}_fold_{current_fold}"
    log_iteration(
        iteration=iteration,
        fold=current_fold,
        accepted=accept,
        rejection_reason=reason,
        pipeline_hash=pipeline_hash,
        stage1_cv_score=cv_score,
        stage1_val_auc=val_auc,
        stage2_val_auc=candidate_score,
        ensemble_size=len(candidate_ensemble) if accept else len(ensemble_models),
        diversity_score=diversity_score,
        temperature=temperature,
        metadata=metadata,
        ensemble_id=ensemble_id
    )
    
    # Update ensemble if accepted
    if accept:
        ensemble_models.append(optimized_pipeline)
        acceptance_history.append(True)
        consecutive_rejections = 0
        
        # Save model
        model_path = ENSEMBLE_DIR / f'model_{iteration}.joblib'
        joblib.dump(optimized_pipeline, model_path)
        
        # Check if this is the best score
        if candidate_score > best_ensemble_score:
            print(f"  ðŸŽ‰ New best score: {candidate_score:.6f} (Î”={candidate_score - best_ensemble_score:.6f})")
            best_ensemble_score = candidate_score
            iterations_since_improvement = 0
        else:
            iterations_since_improvement += 1
    else:
        acceptance_history.append(False)
        consecutive_rejections += 1
        iterations_since_improvement += 1
    
    # Update temperature
    temperature = update_temperature(
        iteration=iteration,
        acceptance_history=acceptance_history,
        current_temperature=temperature,
        base_temperature=BASE_TEMPERATURE,
        decay_rate=TEMPERATURE_DECAY
    )
    
    # Check termination conditions
    if iterations_since_improvement >= PLATEAU_ITERATIONS:
        print(f"\n{'=' * 80}")
        print(f"TERMINATING: No improvement for {PLATEAU_ITERATIONS} iterations")
        print(f"{'=' * 80}")
        break

print(f"\n{'=' * 80}")
print("HILL CLIMBING COMPLETE")
print(f"{'=' * 80}")
print(f"Final ensemble size: {len(ensemble_models)}")
print(f"Best ensemble ROC-AUC: {best_ensemble_score:.6f}")
print(f"Total iterations: {iteration + 1}")
print(f"Acceptance rate: {sum(acceptance_history) / len(acceptance_history):.1%}")

## Save Final Checkpoint and Metadata

In [None]:
# Save final checkpoint
save_checkpoint(
    checkpoint_path=CHECKPOINT_PATH,
    ensemble_models=ensemble_models,
    stage2_model=stage2_model,
    iteration=iteration,
    temperature=temperature,
    current_fold=current_fold,
    best_score=best_ensemble_score,
    acceptance_history=acceptance_history,
    metadata={
        'total_iterations': iteration + 1,
        'final_ensemble_size': len(ensemble_models),
        'acceptance_rate': sum(acceptance_history) / len(acceptance_history),
        'best_score': best_ensemble_score
    }
)

# Save ensemble metadata as JSON
import json

metadata_path = MODELS_DIR / 'ensemble_metadata.json'
with open(metadata_path, 'w') as f:
    json.dump({
        'ensemble_size': len(ensemble_models),
        'total_iterations': iteration + 1,
        'best_score': best_ensemble_score,
        'acceptance_rate': sum(acceptance_history) / len(acceptance_history),
        'training_completed': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'n_folds': N_FOLDS,
        'random_state': RANDOM_STATE
    }, f, indent=2)

print(f"\nFinal checkpoint saved: {CHECKPOINT_PATH}")
print(f"Metadata saved: {metadata_path}")

## Summary Statistics

In [None]:
print(f"\n{'=' * 80}")
print("ENSEMBLE TRAINING SUMMARY")
print(f"{'=' * 80}")
print(f"\nFinal Statistics:")
print(f"  Ensemble size: {len(ensemble_models)}")
print(f"  Best validation ROC-AUC: {best_ensemble_score:.6f}")
print(f"  Total iterations: {iteration + 1}")
print(f"  Accepted models: {sum(acceptance_history)}")
print(f"  Rejected models: {len(acceptance_history) - sum(acceptance_history)}")
print(f"  Acceptance rate: {sum(acceptance_history) / len(acceptance_history):.1%}")
print(f"\nFiles created:")
print(f"  Training log: {TRAINING_LOG_PATH}")
print(f"  Stage 2 log: {STAGE2_LOG_PATH}")
print(f"  Ensemble models: {ENSEMBLE_DIR}")
print(f"  Checkpoint: {CHECKPOINT_PATH}")
print(f"  Metadata: {metadata_path}")
print(f"\n{'=' * 80}")