In [1]:
"""
Notebook: 05_train_models.ipynb
Train and compare baseline (structured-only) vs fused (structured + embeddings) models
"""

# Cell 1: Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import joblib
from datetime import datetime

# ML libraries
import lightgbm as lgb
from sklearn.metrics import (
    roc_auc_score, average_precision_score, 
    roc_curve, precision_recall_curve,
    confusion_matrix, classification_report,
    brier_score_loss
)
from sklearn.calibration import calibration_curve, CalibratedClassifierCV
from sklearn.model_selection import cross_val_score
import shap

# MLflow for experiment tracking
import mlflow
import mlflow.lightgbm

import warnings
warnings.filterwarnings('ignore')

print("✓ Setup complete")



IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.


✓ Setup complete


In [2]:
# Cell 2: Load Configuration and Data
"""
Load all datasets
"""
base_path = Path('..')

# Load config
with open(base_path / 'configs/config.json', 'r') as f:
    config = json.load(f)

# Load structured-only features
print("Loading structured-only datasets...")
train_struct = pd.read_parquet(base_path / 'data/processed/train_features.parquet')
calib_struct = pd.read_parquet(base_path / 'data/processed/calibration_features.parquet')
test_struct = pd.read_parquet(base_path / 'data/processed/test_features.parquet')

print(f"✓ Structured features:")
print(f"   Train: {train_struct.shape}")
print(f"   Calib: {calib_struct.shape}")
print(f"   Test:  {test_struct.shape}")

# Load fused features (structured + embeddings)
print("\nLoading fused datasets...")
train_fused = pd.read_parquet(base_path / 'data/processed/train_fused.parquet')
calib_fused = pd.read_parquet(base_path / 'data/processed/calibration_fused.parquet')
test_fused = pd.read_parquet(base_path / 'data/processed/test_fused.parquet')

print(f"✓ Fused features:")
print(f"   Train: {train_fused.shape}")
print(f"   Calib: {calib_fused.shape}")
print(f"   Test:  {test_fused.shape}")



Loading structured-only datasets...
✓ Structured features:
   Train: (14, 88)
   Calib: (2, 88)
   Test:  (5, 88)

Loading fused datasets...
✓ Fused features:
   Train: (14, 1112)
   Calib: (2, 1112)
   Test:  (5, 1112)


In [None]:
# Load pre-split datasets directly (no additional splitting needed)
print("Loading pre-split datasets...")
train_struct = pd.read_parquet(processed_dir / 'train_features.parquet')
calib_struct = pd.read_parquet(processed_dir / 'calibration_features.parquet')
test_struct = pd.read_parquet(processed_dir / 'test_features.parquet')

# Remove categorical columns if they still exist
categorical_cols = ['AGE_GROUP', 'ETHNICITY_CATEGORY', 'LOS_CATEGORY', 'DISCHARGE_SEASON']
for df in [train_struct, calib_struct, test_struct]:
    df.drop([col for col in categorical_cols if col in df.columns], axis=1, inplace=True, errors='ignore')

# Extract features and labels
feature_cols = [col for col in train_struct.columns if col not in ['HADM_ID', 'SUBJECT_ID', 'READMIT_30']]
X_train = train_struct[feature_cols].values
y_train = train_struct['READMIT_30'].values
X_calib = calib_struct[feature_cols].values
y_calib = calib_struct['READMIT_30'].values
X_test = test_struct[feature_cols].values
y_test = test_struct['READMIT_30'].values

print(f"✓ Train set: {X_train.shape}, Positive rate: {y_train.mean():.2%}")
print(f"✓ Calibration set: {X_calib.shape}, Positive rate: {y_calib.mean():.2%}")
print(f"✓ Test set: {X_test.shape}, Positive rate: {y_test.mean():.2%}")


Structured features: 86
Train: (9, 86), Calib: (2, 86), Test: (3, 86)
Class distribution:
  Train: {0: 6, 1: 3}
  Calib: {1: 1, 0: 1}
  Test: {0: 2, 1: 1}


In [13]:
# Cell 4: Setup MLflow Tracking
"""
Initialize MLflow for experiment tracking
"""
mlflow.set_tracking_uri(str(base_path / 'mlruns'))
mlflow.set_experiment('TRANCE_Readmission_Prediction')

print("✓ MLflow tracking initialized")
print(f"   Tracking URI: {mlflow.get_tracking_uri()}")



✓ MLflow tracking initialized
   Tracking URI: ..\mlruns


In [14]:
# ===============================
# Cell 5: Train Baseline Model
# ===============================

import lightgbm as lgb
from sklearn.metrics import roc_auc_score, average_precision_score
import mlflow

with mlflow.start_run(run_name="Baseline_Structured_Only"):

    # Model parameters
    params = {
        'objective': 'binary',
        'metric': 'auc',
        'boosting_type': 'gbdt',
        'num_leaves': 31,
        'learning_rate': 0.05,
        'feature_fraction': 0.9,
        'bagging_fraction': 0.8,
        'bagging_freq': 5,
        'verbose': -1,
        'random_state': 42
    }

    # Log params
    mlflow.log_params(params)
    mlflow.log_param("n_features", X_train_struct.shape[1])
    mlflow.log_param("n_train", len(X_train_struct))

    # Create LightGBM datasets
    train_data = lgb.Dataset(X_train_struct, label=y_train)
    valid_data = lgb.Dataset(X_calib_struct, label=y_calib, reference=train_data)

    # Train model
    model = lgb.train(
        params,
        train_data,
        num_boost_round=1000,
        valid_sets=[train_data, valid_data],
        valid_names=['train', 'calib'],
        callbacks=[lgb.early_stopping(stopping_rounds=50), lgb.log_evaluation(period=100)]
    )

    # Predictions
    y_pred_train = model.predict(X_train_struct)
    y_pred_calib = model.predict(X_calib_struct)
    y_pred_test = model.predict(X_test_struct)

    # Metrics
    train_auc = roc_auc_score(y_train, y_pred_train)
    calib_auc = roc_auc_score(y_calib, y_pred_calib)
    test_auc = roc_auc_score(y_test, y_pred_test)

    train_auprc = average_precision_score(y_train, y_pred_train)
    calib_auprc = average_precision_score(y_calib, y_pred_calib)
    test_auprc = average_precision_score(y_test, y_pred_test)

    mlflow.log_metric("train_auc", train_auc)
    mlflow.log_metric("calib_auc", calib_auc)
    mlflow.log_metric("test_auc", test_auc)
    mlflow.log_metric("train_auprc", train_auprc)
    mlflow.log_metric("calib_auprc", calib_auprc)
    mlflow.log_metric("test_auprc", test_auprc)

    print(f"Train AUROC: {train_auc:.4f}, AUPRC: {train_auprc:.4f}")
    print(f"Calib AUROC: {calib_auc:.4f}, AUPRC: {calib_auprc:.4f}")
    print(f"Test  AUROC: {test_auc:.4f}, AUPRC: {test_auprc:.4f}")

    # Save model
    model_path = base_path / 'outputs/models/baseline_model.txt'
    model_path.parent.mkdir(parents=True, exist_ok=True)
    model.save_model(str(model_path))
    mlflow.log_artifact(str(model_path))
    print(f"Model saved to: {model_path}")


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	train's auc: 0.5	calib's auc: 0.5
Train AUROC: 0.5000, AUPRC: 0.3333
Calib AUROC: 0.5000, AUPRC: 0.5000
Test  AUROC: 0.5000, AUPRC: 0.3333
Model saved to: ..\outputs\models\baseline_model.txt


In [11]:
# Cell 6: Train Fused Model (Structured + Embeddings)
"""
Train LightGBM on fused features (structured + text embeddings)
"""
print("\n" + "="*60)
print("TRAINING FUSED MODEL (STRUCTURED + EMBEDDINGS)")
print("="*60)

with mlflow.start_run(run_name="Fused_Structured_Embeddings"):
    
    # Log parameters
    mlflow.log_param("model_type", "LightGBM")
    mlflow.log_param("features", "structured_and_embeddings")
    mlflow.log_param("n_features", len(fused_features))
    mlflow.log_param("n_embedding_dims", len(embedding_features))
    mlflow.log_param("n_train", len(X_train_fused))
    
    # Model parameters (same as baseline for fair comparison)
    params_fused = params_baseline.copy()
    
    for key, value in params_fused.items():
        mlflow.log_param(f"lgbm_{key}", value)
    
    # Create datasets
    train_data = lgb.Dataset(X_train_fused, label=y_train)
    valid_data = lgb.Dataset(X_calib_fused, label=y_calib, reference=train_data)
    
    # Train model
    print("Training fused model...")
    model_fused = lgb.train(
        params_fused,
        train_data,
        num_boost_round=1000,
        valid_sets=[train_data, valid_data],
        valid_names=['train', 'valid'],
        callbacks=[
            lgb.early_stopping(stopping_rounds=50),
            lgb.log_evaluation(period=100)
        ]
    )
    
    print(f"✓ Training complete")
    print(f"   Best iteration: {model_fused.best_iteration}")
    print(f"   Best score: {model_fused.best_score['valid']['auc']:.4f}")
    
    # Make predictions
    y_pred_fused_train = model_fused.predict(X_train_fused)
    y_pred_fused_calib = model_fused.predict(X_calib_fused)
    y_pred_fused_test = model_fused.predict(X_test_fused)
    
    # Evaluate
    train_auc_f = roc_auc_score(y_train, y_pred_fused_train)
    calib_auc_f = roc_auc_score(y_calib, y_pred_fused_calib)
    test_auc_f = roc_auc_score(y_test, y_pred_fused_test)
    
    train_auprc_f = average_precision_score(y_train, y_pred_fused_train)
    calib_auprc_f = average_precision_score(y_calib, y_pred_fused_calib)
    test_auprc_f = average_precision_score(y_test, y_pred_fused_test)
    
    # Log metrics
    mlflow.log_metric("train_auc", train_auc_f)
    mlflow.log_metric("calib_auc", calib_auc_f)
    mlflow.log_metric("test_auc", test_auc_f)
    mlflow.log_metric("train_auprc", train_auprc_f)
    mlflow.log_metric("calib_auprc", calib_auprc_f)
    mlflow.log_metric("test_auprc", test_auprc_f)
    
    print(f"\nFused Model Performance:")
    print(f"  Train AUROC: {train_auc_f:.4f} | AUPRC: {train_auprc_f:.4f}")
    print(f"  Calib AUROC: {calib_auc_f:.4f} | AUPRC: {calib_auprc_f:.4f}")
    print(f"  Test  AUROC: {test_auc_f:.4f} | AUPRC: {test_auprc_f:.4f}")
    
    # Save model
    model_path = base_path / 'outputs/models/fused_model.txt'
    model_fused.save_model(str(model_path))
    mlflow.log_artifact(str(model_path))
    
    print(f"\n✓ Model saved to: {model_path}")




TRAINING FUSED MODEL (STRUCTURED + EMBEDDINGS)
Training fused model...


ValueError: pandas dtypes must be int, float or bool.
Fields with bad pandas dtypes: AGE_GROUP: object, ETHNICITY_CATEGORY: object, DISCHARGE_SEASON: object

In [None]:
# Cell 7: Model Comparison
"""
Compare baseline vs fused model performance
"""
print("\n" + "="*60)
print("MODEL COMPARISON")
print("="*60)

# Calculate improvements
auc_improvement = test_auc_f - test_auc
auprc_improvement = test_auprc_f - test_auprc

print("\nTest Set Performance Comparison:")
print("-" * 60)
print(f"{'Metric':<20} {'Baseline':<12} {'Fused':<12} {'Improvement':<12}")
print("-" * 60)
print(f"{'AUROC':<20} {test_auc:<12.4f} {test_auc_f:<12.4f} {auc_improvement:+.4f} ({auc_improvement/test_auc*100:+.2f}%)")
print(f"{'AUPRC':<20} {test_auprc:<12.4f} {test_auprc_f:<12.4f} {auprc_improvement:+.4f} ({auprc_improvement/test_auprc*100:+.2f}%)")
print("-" * 60)

if auc_improvement > 0:
    print(f"\n✅ Embeddings provide {auc_improvement:.4f} AUROC improvement!")
else:
    print(f"\n⚠️  Embeddings don't improve performance significantly")



In [None]:
# Cell 8: ROC and PR Curves
"""
Visualize model performance curves
"""
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# ROC Curve
fpr_baseline, tpr_baseline, _ = roc_curve(y_test, y_pred_baseline_test)
fpr_fused, tpr_fused, _ = roc_curve(y_test, y_pred_fused_test)

axes[0].plot(fpr_baseline, tpr_baseline, label=f'Baseline (AUROC={test_auc:.3f})', linewidth=2)
axes[0].plot(fpr_fused, tpr_fused, label=f'Fused (AUROC={test_auc_f:.3f})', linewidth=2)
axes[0].plot([0, 1], [0, 1], 'k--', label='Random', alpha=0.3)
axes[0].set_xlabel('False Positive Rate')
axes[0].set_ylabel('True Positive Rate')
axes[0].set_title('ROC Curve - Test Set')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Precision-Recall Curve
precision_baseline, recall_baseline, _ = precision_recall_curve(y_test, y_pred_baseline_test)
precision_fused, recall_fused, _ = precision_recall_curve(y_test, y_pred_fused_test)

axes[1].plot(recall_baseline, precision_baseline, label=f'Baseline (AUPRC={test_auprc:.3f})', linewidth=2)
axes[1].plot(recall_fused, precision_fused, label=f'Fused (AUPRC={test_auprc_f:.3f})', linewidth=2)
axes[1].axhline(y=y_test.mean(), color='k', linestyle='--', label='Baseline Rate', alpha=0.3)
axes[1].set_xlabel('Recall')
axes[1].set_ylabel('Precision')
axes[1].set_title('Precision-Recall Curve - Test Set')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig(base_path / 'outputs/figures/model_performance_curves.png', dpi=300, bbox_inches='tight')
print("\n✓ Performance curves saved")
plt.show()



In [None]:
# Cell 9: Feature Importance Analysis
"""
Analyze feature importance for both models
"""
print("\n" + "="*60)
print("FEATURE IMPORTANCE ANALYSIS")
print("="*60)

# Get feature importance
importance_baseline = model_baseline.feature_importance(importance_type='gain')
importance_fused = model_fused.feature_importance(importance_type='gain')

# Create dataframes
feat_imp_baseline = pd.DataFrame({
    'feature': struct_features,
    'importance': importance_baseline
}).sort_values('importance', ascending=False)

feat_imp_fused = pd.DataFrame({
    'feature': fused_features,
    'importance': importance_fused
}).sort_values('importance', ascending=False)

# Separate structured vs embedding importance in fused model
feat_imp_fused['type'] = feat_imp_fused['feature'].apply(
    lambda x: 'Embedding' if x.startswith('emb_') else 'Structured'
)

# Calculate total importance by type
total_importance = feat_imp_fused.groupby('type')['importance'].sum()
print("\nImportance Distribution in Fused Model:")
print(f"  Structured features: {total_importance.get('Structured', 0):.0f} ({total_importance.get('Structured', 0)/feat_imp_fused['importance'].sum()*100:.1f}%)")
print(f"  Embedding features:  {total_importance.get('Embedding', 0):.0f} ({total_importance.get('Embedding', 0)/feat_imp_fused['importance'].sum()*100:.1f}%)")

# Top features
print("\nTop 15 Features (Baseline Model):")
print(feat_imp_baseline.head(15).to_string(index=False))

print("\nTop 15 Features (Fused Model):")
print(feat_imp_fused.head(15).to_string(index=False))

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Baseline top features
top_n = 20
feat_imp_baseline.head(top_n).plot.barh(x='feature', y='importance', ax=axes[0], legend=False)
axes[0].set_title('Top 20 Features - Baseline Model')
axes[0].set_xlabel('Importance (Gain)')
axes[0].invert_yaxis()

# Fused top features with color coding
top_fused = feat_imp_fused.head(top_n)
colors = ['steelblue' if t == 'Structured' else 'orange' for t in top_fused['type']]
axes[1].barh(range(len(top_fused)), top_fused['importance'], color=colors)
axes[1].set_yticks(range(len(top_fused)))
axes[1].set_yticklabels(top_fused['feature'])
axes[1].set_title('Top 20 Features - Fused Model')
axes[1].set_xlabel('Importance (Gain)')
axes[1].invert_yaxis()
axes[1].legend(['Structured', 'Embedding'], loc='lower right')

plt.tight_layout()
plt.savefig(base_path / 'outputs/figures/feature_importance.png', dpi=300, bbox_inches='tight')
print("\n✓ Feature importance plots saved")
plt.show()



In [None]:
# Cell 10: SHAP Analysis (Fused Model)
"""
Generate SHAP values for interpretability
"""
print("\n" + "="*60)
print("GENERATING SHAP VALUES (This may take a few minutes...)")
print("="*60)

# Sample for SHAP (use subset for speed)
n_shap_samples = min(500, len(X_test_fused))
X_shap = X_test_fused.sample(n=n_shap_samples, random_state=42)

print(f"Computing SHAP values for {n_shap_samples} samples...")

# Create SHAP explainer
explainer = shap.TreeExplainer(model_fused)
shap_values = explainer.shap_values(X_shap)

# If binary classification, shap_values might be a list
if isinstance(shap_values, list):
    shap_values = shap_values[1]  # Positive class

print("✓ SHAP values computed")

# Save SHAP values
shap_data = {
    'shap_values': shap_values,
    'data': X_shap.values,
    'feature_names': X_shap.columns.tolist(),
    'expected_value': explainer.expected_value[1] if isinstance(explainer.expected_value, list) else explainer.expected_value
}

joblib.dump(shap_data, base_path / 'outputs/models/shap_values.pkl')
print("✓ SHAP values saved")

# SHAP summary plot
plt.figure(figsize=(10, 8))
shap.summary_plot(shap_values, X_shap, max_display=20, show=False)
plt.tight_layout()
plt.savefig(base_path / 'outputs/figures/shap_summary.png', dpi=300, bbox_inches='tight')
print("✓ SHAP summary plot saved")
plt.show()



In [None]:
# Cell 11: Probability Calibration
"""
Calibrate model probabilities using isotonic regression
"""
print("\n" + "="*60)
print("PROBABILITY CALIBRATION")
print("="*60)

from sklearn.isotonic import IsotonicRegression

# Fit calibrator on calibration set
print("Fitting isotonic regression calibrator...")
calibrator = IsotonicRegression(out_of_bounds='clip')
calibrator.fit(y_pred_fused_calib, y_calib)

# Calibrate test predictions
y_pred_calibrated = calibrator.predict(y_pred_fused_test)

# Evaluate calibration
brier_uncalibrated = brier_score_loss(y_test, y_pred_fused_test)
brier_calibrated = brier_score_loss(y_test, y_pred_calibrated)

print(f"\nBrier Score:")
print(f"  Uncalibrated: {brier_uncalibrated:.4f}")
print(f"  Calibrated:   {brier_calibrated:.4f}")
print(f"  Improvement:  {brier_uncalibrated - brier_calibrated:.4f}")

# Calibration curve
fraction_of_positives_uncal, mean_predicted_value_uncal = calibration_curve(
    y_test, y_pred_fused_test, n_bins=10, strategy='uniform'
)
fraction_of_positives_cal, mean_predicted_value_cal = calibration_curve(
    y_test, y_pred_calibrated, n_bins=10, strategy='uniform'
)

# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Reliability diagram
axes[0].plot([0, 1], [0, 1], 'k--', label='Perfect calibration')
axes[0].plot(mean_predicted_value_uncal, fraction_of_positives_uncal, 
             'o-', label=f'Uncalibrated (Brier={brier_uncalibrated:.3f})')
axes[0].plot(mean_predicted_value_cal, fraction_of_positives_cal, 
             's-', label=f'Calibrated (Brier={brier_calibrated:.3f})')
axes[0].set_xlabel('Mean Predicted Probability')
axes[0].set_ylabel('Observed Frequency')
axes[0].set_title('Reliability Diagram')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Distribution of predictions
axes[1].hist(y_pred_fused_test, bins=30, alpha=0.5, label='Uncalibrated', edgecolor='black')
axes[1].hist(y_pred_calibrated, bins=30, alpha=0.5, label='Calibrated', edgecolor='black')
axes[1].set_xlabel('Predicted Probability')
axes[1].set_ylabel('Count')
axes[1].set_title('Distribution of Predictions')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig(base_path / 'outputs/figures/calibration_analysis.png', dpi=300, bbox_inches='tight')
print("\n✓ Calibration plots saved")
plt.show()

# Save calibrator
joblib.dump(calibrator, base_path / 'outputs/models/calibrator.pkl')
print("✓ Calibrator saved")



In [None]:
# Cell 12: Operating Point Analysis
"""
Determine optimal thresholds for different use cases
"""
print("\n" + "="*60)
print("OPERATING POINT ANALYSIS")
print("="*60)

# Calculate precision, recall, F1 at different thresholds
thresholds = np.linspace(0, 1, 100)
metrics_at_threshold = []

for thresh in thresholds:
    y_pred_binary = (y_pred_calibrated >= thresh).astype(int)
    
    if y_pred_binary.sum() == 0:
        continue
        
    tp = ((y_pred_binary == 1) & (y_test == 1)).sum()
    fp = ((y_pred_binary == 1) & (y_test == 0)).sum()
    fn = ((y_pred_binary == 0) & (y_test == 1)).sum()
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    metrics_at_threshold.append({
        'threshold': thresh,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'n_predicted_positive': y_pred_binary.sum()
    })

metrics_df = pd.DataFrame(metrics_at_threshold)

# Find optimal thresholds
optimal_f1_idx = metrics_df['f1'].idxmax()
optimal_f1_threshold = metrics_df.loc[optimal_f1_idx, 'threshold']

print(f"Optimal F1 Threshold: {optimal_f1_threshold:.3f}")
print(f"  Precision: {metrics_df.loc[optimal_f1_idx, 'precision']:.3f}")
print(f"  Recall:    {metrics_df.loc[optimal_f1_idx, 'recall']:.3f}")
print(f"  F1 Score:  {metrics_df.loc[optimal_f1_idx, 'f1']:.3f}")

# Capacity-based threshold (e.g., top 20 patients per day)
# Assuming ~10 discharges per day in test set, top 20% = ~2 per day
capacity_percentile = 80  # Top 20%
capacity_threshold = np.percentile(y_pred_calibrated, capacity_percentile)

capacity_mask = y_pred_calibrated >= capacity_threshold
capacity_precision = y_test[capacity_mask].mean()
capacity_recall = y_test[capacity_mask].sum() / y_test.sum()

print(f"\nCapacity-Based Threshold (top {100-capacity_percentile}%): {capacity_threshold:.3f}")
print(f"  Precision: {capacity_precision:.3f}")
print(f"  Recall:    {capacity_recall:.3f}")
print(f"  N flagged: {capacity_mask.sum()} ({capacity_mask.mean()*100:.1f}%)")

# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Precision-Recall tradeoff
axes[0].plot(metrics_df['threshold'], metrics_df['precision'], label='Precision')
axes[0].plot(metrics_df['threshold'], metrics_df['recall'], label='Recall')
axes[0].plot(metrics_df['threshold'], metrics_df['f1'], label='F1 Score', linewidth=2)
axes[0].axvline(optimal_f1_threshold, color='red', linestyle='--', label=f'Optimal F1 ({optimal_f1_threshold:.3f})')
axes[0].set_xlabel('Threshold')
axes[0].set_ylabel('Score')
axes[0].set_title('Precision-Recall Tradeoff')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Number flagged vs threshold
axes[1].plot(metrics_df['threshold'], metrics_df['n_predicted_positive'])
axes[1].axhline(len(y_test) * 0.2, color='red', linestyle='--', label='20% capacity')
axes[1].set_xlabel('Threshold')
axes[1].set_ylabel('Number Flagged as High Risk')
axes[1].set_title('Workload vs Threshold')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig(base_path / 'outputs/figures/operating_point_analysis.png', dpi=300, bbox_inches='tight')
print("\n✓ Operating point plots saved")
plt.show()



In [None]:
# Cell 13: Save All Predictions
"""
Save predictions for dashboard and further analysis
"""
print("\n" + "="*60)
print("SAVING PREDICTIONS")
print("="*60)

# Create predictions dataframe
predictions_df = pd.DataFrame({
    'HADM_ID': test_fused['HADM_ID'],
    'SUBJECT_ID': test_fused['SUBJECT_ID'],
    'true_label': y_test,
    'pred_prob_baseline': y_pred_baseline_test,
    'pred_prob_fused_raw': y_pred_fused_test,
    'pred_prob_fused_calibrated': y_pred_calibrated,
    'pred_binary_f1_optimal': (y_pred_calibrated >= optimal_f1_threshold).astype(int),
    'pred_binary_capacity': (y_pred_calibrated >= capacity_threshold).astype(int),
    'risk_score': (y_pred_calibrated * 100).round(1)  # Convert to 0-100 scale
})

# Save
predictions_path = base_path / 'outputs/results/test_predictions.parquet'
predictions_df.to_parquet(predictions_path, index=False)

print(f"✓ Predictions saved to: {predictions_path}")
print(f"   Shape: {predictions_df.shape}")

# Also save SHAP values mapping
shap_mapping = pd.DataFrame({
    'HADM_ID': X_shap.reset_index()['HADM_ID'],
    'shap_index': range(len(X_shap))
})
shap_mapping.to_parquet(base_path / 'outputs/results/shap_mapping.parquet', index=False)



In [None]:
# Cell 14: Create Results Summary
"""
Generate comprehensive results summary
"""
results_summary = {
    'experiment_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'data': {
        'n_train': int(len(X_train_fused)),
        'n_calib': int(len(X_calib_fused)),
        'n_test': int(len(X_test_fused)),
        'readmission_rate': float(y_test.mean())
    },
    'features': {
        'n_structured': len(struct_features),
        'n_embedding': len(embedding_features),
        'n_total_fused': len(fused_features)
    },
    'baseline_model': {
        'features': 'structured_only',
        'test_auroc': float(test_auc),
        'test_auprc': float(test_auprc),
        'brier_score': float(brier_score_loss(y_test, y_pred_baseline_test))
    },
    'fused_model': {
        'features': 'structured_and_embeddings',
        'test_auroc': float(test_auc_f),
        'test_auprc': float(test_auprc_f),
        'brier_score_uncalibrated': float(brier_uncalibrated),
        'brier_score_calibrated': float(brier_calibrated)
    },
    'improvements': {
        'auroc_gain': float(auc_improvement),
        'auroc_gain_pct': float(auc_improvement / test_auc * 100),
        'auprc_gain': float(auprc_improvement),
        'auprc_gain_pct': float(auprc_improvement / test_auprc * 100)
    },
    'embedding_contribution': {
        'importance_pct': float(total_importance.get('Embedding', 0) / feat_imp_fused['importance'].sum() * 100)
    },
    'operating_points': {
        'optimal_f1_threshold': float(optimal_f1_threshold),
        'optimal_f1_precision': float(metrics_df.loc[optimal_f1_idx, 'precision']),
        'optimal_f1_recall': float(metrics_df.loc[optimal_f1_idx, 'recall']),
        'capacity_threshold': float(capacity_threshold),
        'capacity_precision': float(capacity_precision),
        'capacity_recall': float(capacity_recall)
    }
}

# Save summary
summary_path = base_path / 'outputs/results/model_results_summary.json'
with open(summary_path, 'w') as f:
    json.dump(results_summary, f, indent=2)

print(f"✓ Results summary saved to: {summary_path}")



In [None]:
# Cell 15: Final Summary
print("\n" + "="*60)
print("MODEL TRAINING COMPLETE!")
print("="*60)
print(f"\n✅ Baseline Model (Structured Only):")
print(f"   AUROC: {test_auc:.4f} | AUPRC: {test_auprc:.4f}")
print(f"\n✅ Fused Model (Structured + Embeddings):")
print(f"   AUROC: {test_auc_f:.4f} | AUPRC: {test_auprc_f:.4f}")
print(f"   Improvement: +{auc_improvement:.4f} AUROC ({auc_improvement/test_auc*100:+.2f}%)")
print(f"\n✅ Calibrated Model:")
print(f"   Brier Score: {brier_calibrated:.4f}")
print(f"\n📊 Outputs:")
print(f"   - outputs/models/baseline_model.txt")
print(f"   - outputs/models/fused_model.txt")
print(f"   - outputs/models/calibrator.pkl")
print(f"   - outputs/models/shap_values.pkl")
print(f"   - outputs/results/test_predictions.parquet")
print(f"   - outputs/figures/ (multiple visualizations)")
print(f"\n📝 Next Steps:")
print("   1. Build Streamlit dashboard (06_build_dashboard.py)")
print("   2. Implement volume forecasting")
print("   3. Create patient risk interface")