## Setup

In [1]:
# GPU Configuration - Use P100 (GPU 1) for best performance
import os
import sys

# Set library path for CUDA libraries installed via pip
venv_cuda_libs = '/mnt/arkk/kaggle/diabetes-prediction/.venv/lib/python3.12/site-packages/nvidia/cudnn/lib'
if 'LD_LIBRARY_PATH' in os.environ:
    os.environ['LD_LIBRARY_PATH'] = f"{os.environ['LD_LIBRARY_PATH']}:{venv_cuda_libs}"
else:
    os.environ['LD_LIBRARY_PATH'] = venv_cuda_libs

os.environ['CUDA_VISIBLE_DEVICES'] = '1'  # 0=GTX1080, 1=P100
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Verify GPU
import tensorflow as tf
print(f"TensorFlow version: {tf.__version__}")
gpus = tf.config.list_physical_devices('GPU')
print(f"GPUs available: {gpus}")
print(f"Number of GPUs: {len(gpus)}")

2025-12-08 01:43:04.305334: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765176184.329048 1144811 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765176184.336544 1144811 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


TensorFlow version: 2.18.0
GPUs available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Number of GPUs: 1


In [2]:
# Standard imports
import sys
from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd
from tensorflow import keras
from sklearn.metrics import roc_auc_score

# Add functions to path
sys.path.insert(0, str(Path('.').resolve()))

# Import ensemble modules - use same optimization function as online path!
from functions.ensemble_initialization import create_data_splits
from functions.ensemble_stage2_training import optimize_and_update_config
from functions import ensemble_config

### Configuration

In [3]:
# Configuration
RANDOM_STATE = 315
LABEL = 'diagnosed_diabetes'

# Hyperparameter search settings (GPU-optimized)
MAX_TRIALS = 30  # ~15-20 min on P100
EXECUTIONS_PER_TRIAL = 3  # Statistical confidence

# Database path for loading ensemble state
DB_PATH = '../data/ensemble_training.db'

print(f"Configuration:")
print(f"  Max trials: {MAX_TRIALS}")
print(f"  Executions per trial: {EXECUTIONS_PER_TRIAL}")
print(f"  Database: {DB_PATH}")
print(f"  Estimated time: ~15-30 minutes on P100 GPU")

Configuration:
  Max trials: 30
  Executions per trial: 3
  Database: ../data/ensemble_training.db
  Estimated time: ~15-30 minutes on P100 GPU


## Data Preparation

## Load Stage 1 Models from prior ensemble run.

In [None]:
import joblib
from glob import glob

models_path = f"../models/run_20251208_045148/ensemble_stage1_models"

print(f"Loading models from: {models_path}\n")

# Find all model files (excluding founder)
model_files = sorted(glob(f"{models_path}/model_*.joblib"))

# Load first 5 models
stage1_models = []

for model_file in model_files[:5]:
    model_name = Path(model_file).stem
    model = joblib.load(model_file)
    
    # Evaluate on validation set
    pred = model.predict_proba(X_val_s1)[:, 1]
    auc = roc_auc_score(y_val_s1, pred)
    
    stage1_models.append(model)
    print(f"  {model_name}: AUC = {auc:.6f}")

print(f"\n{len(stage1_models)} Stage 1 models loaded!")
print(f"Model files: {[Path(f).stem for f in model_files[:5]]}")

Loading models from: ../models/run_20251208_045148/ensemble_stage1_models



NameError: name 'X_val_s1' is not defined

## Run Hyperparameter Optimization

**Uses SAME `optimize_and_update_config()` function as online hill climbing!**

This ensures:
- Identical search space
- Same data preparation (conservative 95/5 split)
- Same architecture builders (pyramid/funnel)
- Results directly applicable to production

In [None]:
print("=" * 80)
print("RUNNING STAGE 2 DNN HYPERPARAMETER OPTIMIZATION")
print("Using SAME function as online hill climbing: optimize_and_update_config()")
print("=" * 80)

# Run optimization - this will update ensemble_config.STAGE2_DNN_CONFIG in-memory
optimized_config = optimize_and_update_config(
    ensemble_models=stage1_models,
    X_val_s1=X_val_s1,
    y_val_s1=y_val_s1,
    X_val_s2=X_val_s2,
    y_val_s2=y_val_s2,
    max_trials=MAX_TRIALS,
    executions_per_trial=EXECUTIONS_PER_TRIAL
)

print("\n" + "=" * 80)
print("OPTIMIZATION COMPLETE")
print("=" * 80)

## Results

In [None]:
# Display optimized configuration
import json

print("\nOptimized Stage 2 DNN Configuration:")
print("=" * 80)
print(json.dumps(optimized_config, indent=2))
print("=" * 80)

In [None]:
# Extract key hyperparameters
layers = optimized_config['architecture']['hidden_layers']
lr = optimized_config['training']['learning_rate']

print("\nKey Hyperparameters:")
print("-" * 40)
print(f"Number of layers: {len(layers)}")
print(f"Layer configuration:")
for i, layer in enumerate(layers, 1):
    print(f"  Layer {i}: {layer['units']} units, dropout={layer['dropout']:.3f}")
print(f"Learning rate: {lr:.6f}")
print("-" * 40)

## Copy-Paste Configuration

Use this configuration in `ensemble_config.py` to apply optimized hyperparameters.

In [None]:
print("\n" + "=" * 80)
print("COPY-PASTE CONFIGURATION FOR ensemble_config.py")
print("=" * 80)
print()
print("# Stage 2 DNN Configuration (optimized via Keras Tuner)")
print(f"# Optimized on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"# Models used: {len(stage1_models)} (from {LATEST_RUN})")
print(f"# Trials: {MAX_TRIALS} × {EXECUTIONS_PER_TRIAL} executions")
print()
print("STAGE2_DNN_CONFIG = " + json.dumps(optimized_config, indent=4))
print()
print("=" * 80)

## Summary

✅ **Test notebook complete!**

### What we did:

1. ✅ Loaded training data (60/20/20 split)
2. ✅ Loaded actual Stage 1 models from batch 1
3. ✅ Ran **`optimize_and_update_config()`** (same as online hill climbing)
4. ✅ Displayed optimized hyperparameters
5. ✅ Generated copy-paste config for `ensemble_config.py`

### Key points:

- **Shared code path**: Uses identical optimization function as online training
- **Real data**: Uses actual Stage 1 models from hill climbing run
- **GPU-optimized**: ~15-30 min on P100 GPU
- **Focused search**: pyramid/funnel architectures, 2-3 layers, 64-256 units
- **Conservative split**: 95% training (266k), 5% validation (14k)

### Next steps:

1. Copy the `STAGE2_DNN_CONFIG` above into `ensemble_config.py`
2. Run full hill climbing with GPU enabled
3. Architecture will be automatically optimized at batches 10, 20, 30+
4. Compare performance against baseline