End-to-end (current focus: Readmission with XGBoost + Optuna). Structure prepared for mortality & prolonged LOS later.

In [19]:
# Environment & core imports
import os, sys, json, random, platform, importlib, datetime
from pathlib import Path
import numpy as np, pandas as pd
SEED = 42
random.seed(SEED); np.random.seed(SEED)
PROJECT_ROOT = (Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd())
DATA_DIR = PROJECT_ROOT / 'data'
ARTIFACTS_DIR = PROJECT_ROOT / 'artifacts'
RUNS_ROOT = PROJECT_ROOT / 'runs'
print(f"Project root: {PROJECT_ROOT}")
print(f"Data dir exists: {(DATA_DIR).exists()}")
VERSIONS = {'python': sys.version.split()[0], 'platform': platform.platform()}
for pkg in ['xgboost','optuna','shap','sklearn','pandas','numpy']:
    try:
        m = importlib.import_module(pkg if pkg != 'sklearn' else 'sklearn')
        VERSIONS[pkg] = getattr(m,'__version__','?')
    except Exception as e:
        VERSIONS[pkg] = f'NA({e})'
print('Versions:', json.dumps(VERSIONS, indent=2))

Project root: c:\Users\Almog Luz\Documents\GitHub\mlhc-final-project\project
Data dir exists: True
Versions: {
  "python": "3.13.3",
  "platform": "Windows-10-10.0.19045-SP0",
  "xgboost": "2.1.1",
  "optuna": "4.5.0",
  "shap": "0.48.0",
  "sklearn": "1.7.1",
  "pandas": "2.3.1",
  "numpy": "2.2.6"
}


### Labels
Load readmission labels (or synthesize) and report prevalence.

In [20]:
# Load or generate labels (readmission focus)
import pandas as pd, random
LABELS_PATH = None
LABEL_CANDIDATES = [DATA_DIR / 'labels.csv', PROJECT_ROOT / 'labels.csv']
for cand in LABEL_CANDIDATES:
    if cand.exists():
        LABELS_PATH = cand
        break
labels_df = None
if LABELS_PATH is not None:
    labels_df = pd.read_csv(LABELS_PATH)
else:
    cohort_path = DATA_DIR / 'initial_cohort.csv'
    if not cohort_path.exists():
        raise FileNotFoundError('initial_cohort.csv missing; cannot synthesize labels')
    subj = pd.read_csv(cohort_path)
    random.seed(SEED)
    synth = pd.Series([1 if random.random() < 0.043 else 0 for _ in range(len(subj))])
    labels_df = pd.DataFrame({'subject_id': subj['subject_id'],'hadm_id': -1,'readmission_label': synth.values})
    LABELS_PATH = '<synthetic>'
# Normalize column name
if 'readmission_label' not in labels_df.columns:
    lower_map = {c.lower(): c for c in labels_df.columns}
    for alias in ['readmission_label','readmission','readmit','readmit_30d','readmission_30d']:
        if alias in lower_map:
            if lower_map[alias] != 'readmission_label':
                labels_df.rename(columns={lower_map[alias]:'readmission_label'}, inplace=True)
            break
if 'readmission_label' not in labels_df.columns:
    raise ValueError('Could not identify readmission label column')
labels_df = labels_df.drop_duplicates('subject_id')
labels_df['readmission_label'] = labels_df['readmission_label'].astype(int)
assert labels_df['subject_id'].isna().sum()==0
prev = labels_df['readmission_label'].mean()
print(f"Labels source: {LABELS_PATH} | shape={labels_df.shape} | prevalence={prev:.4f}")

Labels source: c:\Users\Almog Luz\Documents\GitHub\mlhc-final-project\project\data\labels.csv | shape=(28473, 5) | prevalence=0.0433


### Features
Load (or regenerate) prepared feature matrix aligned to subjects.

In [21]:
# Load feature matrix (regenerate if tiny/corrupt)
import pandas as pd, json, hashlib
feature_path = ARTIFACTS_DIR / 'features_full.parquet'
regenerated = False
if feature_path.exists() and feature_path.stat().st_size < 1000:
    print('Corrupted feature parquet detected; attempting regeneration.')
    cache_dir = DATA_DIR / 'extracted_cache'
    try:
        from src.features import build_features, build_feature_provenance  # type: ignore
        def load_opt(name):
            p = cache_dir / name
            return pd.read_parquet(p) if p.exists() else None
        demo = load_opt('demographics.parquet')
        first_adm = load_opt('first_admissions.parquet')
        vitals = load_opt('vitals_48h.parquet')
        labs = load_opt('labs_48h.parquet')
        rx = load_opt('prescriptions_48h.parquet')
        proc = load_opt('procedures_48h.parquet')
        feats = build_features(first_adm, demo, vitals, labs, rx, proc)
        feats = feats.reindex(labels_df['subject_id']).fillna(0.0)
        feats.to_parquet(feature_path)
        prov = build_feature_provenance(feats)
        (ARTIFACTS_DIR / 'feature_provenance.json').write_text(json.dumps(prov, indent=2))
        (ARTIFACTS_DIR / 'feature_columns.json').write_text(json.dumps(list(feats.columns)))
        regenerated = True
        print('Regenerated features:', feats.shape)
    except Exception as e:
        print('Feature regeneration failed:', e)
if not feature_path.exists():
    raise FileNotFoundError(f'Missing {feature_path}; ensure extraction step executed.')
feature_df = pd.read_parquet(feature_path)
if 'subject_id' in feature_df.columns:
    feature_df = feature_df.set_index('subject_id')
feature_df = feature_df.reindex(labels_df['subject_id']).fillna(0.0)
print('Features loaded shape:', feature_df.shape, '| regenerated' if regenerated else '')

Features loaded shape: (28473, 1429) 


### Train/Validation/Test Split
Create 60/20/20 stratified split and compute imbalance weight.

In [22]:
# Train/valid/test split (60/20/20) + class weight factor
from sklearn.model_selection import train_test_split
readmit_y = labels_df['readmission_label'].astype(int).to_numpy()
subject_index = feature_df.index.to_numpy()
X = feature_df.values
X_tr, X_temp, y_tr, y_temp, sid_tr, sid_temp = train_test_split(
    X, readmit_y, subject_index, test_size=0.4, stratify=readmit_y, random_state=SEED)
X_val, X_te, y_val, y_te, sid_val, sid_te = train_test_split(
    X_temp, y_temp, sid_temp, test_size=0.5, stratify=y_temp, random_state=SEED)
pos_rate = y_tr.mean(); scale_pos_weight = (1-pos_rate)/max(pos_rate,1e-6)
print(f'Split -> train {X_tr.shape} valid {X_val.shape} test {X_te.shape} | pos_rate_train={pos_rate:.4f} | spw≈{scale_pos_weight:.2f}')

Split -> train (17083, 1429) valid (5695, 1429) test (5695, 1429) | pos_rate_train=0.0433 | spw≈22.12


### Metrics Helpers
Utility functions to compute threshold-dependent metrics and cost.

In [23]:
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score, confusion_matrix
import numpy as np
C_FP = 1.0; C_FN = 5.0
beta = 2.0

def metrics_at(proba, y, thr):
    pred = (proba >= thr).astype(int)
    tn, fp, fn, tp = confusion_matrix(y, pred).ravel()
    cost = C_FP*fp + C_FN*fn
    f1 = f1_score(y, pred)
    prec = tp/(tp+fp+1e-9); rec = tp/(tp+fn+1e-9)
    fbeta = (1+beta**2)*prec*rec/(beta**2*prec+rec+1e-9)
    return dict(f1=f1, precision=prec, recall=rec, cost=cost, fbeta=fbeta)

### Baseline Model
Train a simple class-weighted logistic regression for reference AUC.

In [24]:
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
baseline_pipe = Pipeline([
    ("imp", SimpleImputer(strategy='median')),
    ("sc", StandardScaler(with_mean=False)),
    ("lr", LogisticRegression(max_iter=500, class_weight='balanced', solver='liblinear'))
])
baseline_pipe.fit(X_tr, y_tr)
baseline_val_proba = baseline_pipe.predict_proba(X_val)[:,1]
baseline_auc = roc_auc_score(y_val, baseline_val_proba)
print('Baseline Logistic Validation AUC:', round(baseline_auc,4))

Baseline Logistic Validation AUC: 0.5811


In [25]:
import optuna, xgboost as xgb
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
study = optuna.create_study(direction='maximize', sampler=TPESampler(seed=SEED), pruner=MedianPruner())
print('Study created.')

[I 2025-09-13 15:48:00,048] A new study created in memory with name: no-name-41c50ae1-356b-4146-b359-b33331652ed2


Study created.


### Objective Definition
Define Optuna objective: 5-fold stratified CV with early stopping (mean validation AUC).

In [30]:
# Optuna objective: 5-fold stratified CV AUC (lightweight) using sklearn XGBClassifier
# Performance tweaks: lower max n_estimators, optional row subsampling per fold for speed.
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
MAX_ROUNDS = 400  # reduced for speed since no early stopping
N_FOLDS = 5
SPEED_SAMPLE_MAX = 12000  # cap rows per fold training subset for speed


def objective(trial: optuna.Trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 7),
        'min_child_weight': trial.suggest_float('min_child_weight', 1.0, 8.0),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_lambda': trial.suggest_float('lambda', 1e-3, 5.0, log=True),
        'reg_alpha': trial.suggest_float('alpha', 1e-3, 2.0, log=True),
        'gamma': trial.suggest_float('gamma', 0.0, 4.0),
        'n_estimators': trial.suggest_int('n_estimators', 120, MAX_ROUNDS),
    }
    fold_aucs = []
    rng_local = np.random.default_rng(SEED + trial.number)
    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
    for fold, (tr_idx, va_idx) in enumerate(skf.split(X_tr, y_tr), 1):
        Xtr_f, Xva_f = X_tr[tr_idx], X_tr[va_idx]
        ytr_f, yva_f = y_tr[tr_idx], y_tr[va_idx]
        # Speed row subsample (stratified) if oversized
        if Xtr_f.shape[0] > SPEED_SAMPLE_MAX:
            pos_idx = np.where(ytr_f==1)[0]
            neg_idx = np.where(ytr_f==0)[0]
            # keep all positives to maintain signal if rare
            keep_pos = pos_idx
            # sample negatives to reach SPEED_SAMPLE_MAX total
            remaining = SPEED_SAMPLE_MAX - len(keep_pos)
            if remaining < len(neg_idx):
                keep_neg = rng_local.choice(neg_idx, size=remaining, replace=False)
            else:
                keep_neg = neg_idx
            keep = np.concatenate([keep_pos, keep_neg])
            rng_local.shuffle(keep)
            Xtr_f = Xtr_f[keep]
            ytr_f = ytr_f[keep]
        model = XGBClassifier(
            objective='binary:logistic', tree_method='hist',
            scale_pos_weight=scale_pos_weight, eval_metric='auc',
            verbosity=0, **params
        )
        model.fit(Xtr_f, ytr_f, verbose=False)
        proba = model.predict_proba(Xva_f)[:,1]
        fold_auc = roc_auc_score(yva_f, proba)
        fold_aucs.append(fold_auc)
    mean_auc = float(np.mean(fold_aucs))
    trial.set_user_attr('fold_aucs', fold_aucs)
    trial.set_user_attr('cv_mean_auc', mean_auc)
    return mean_auc

### Run Hyperparameter Search
Execute trials optimizing mean 5-fold CV AUC (early stopping each fold).

In [31]:
N_TRIALS = 5  # adjust upward for thorough search
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)
print('Best AUC:', study.best_value)
print('Best Params:', study.best_params)

[I 2025-09-13 16:21:43,841] Trial 13 finished with value: 0.6222714796987352 and parameters: {'learning_rate': 0.010196893017132471, 'max_depth': 6, 'min_child_weight': 7.788405875023145, 'subsample': 0.7471244966734023, 'colsample_bytree': 0.9963583331053159, 'lambda': 0.14565654991219462, 'alpha': 0.11799624551738404, 'gamma': 2.585651259188504, 'n_estimators': 123}. Best is trial 4 with value: 0.6274018529118605.
[I 2025-09-13 16:22:02,304] Trial 14 finished with value: 0.6228481171309606 and parameters: {'learning_rate': 0.015529361529665643, 'max_depth': 5, 'min_child_weight': 4.1958034965567395, 'subsample': 0.7146484682094928, 'colsample_bytree': 0.9893673051130601, 'lambda': 0.19511174188897631, 'alpha': 0.0062716249528685115, 'gamma': 2.5691150354358707, 'n_estimators': 129}. Best is trial 4 with value: 0.6274018529118605.
[I 2025-09-13 16:22:02,304] Trial 14 finished with value: 0.6228481171309606 and parameters: {'learning_rate': 0.015529361529665643, 'max_depth': 5, 'min_ch

Best AUC: 0.6274018529118605
Best Params: {'learning_rate': 0.015380821666156693, 'max_depth': 3, 'min_child_weight': 7.158097238609412, 'subsample': 0.7200762468698007, 'colsample_bytree': 0.5610191174223894, 'lambda': 0.09565499215943825, 'alpha': 0.0013403002793227008, 'gamma': 4.546602010393911, 'n_estimators': 533}


### Inspect Trials
Overview of trials and (optional) optimization history plot.

In [None]:
trials_df = study.trials_dataframe()
print('Trials:', trials_df.shape)
try:
    optuna.visualization.plot_optimization_history(study)
except Exception:
    pass

### Final Model Training
Train final booster on combined train+validation using best params.

In [32]:
from xgboost import XGBClassifier
import numpy as np
# Combine train+validation for final model used for raw probability generation (before calibration)
X_tr_full = np.vstack([X_tr, X_val])
y_tr_full = np.concatenate([y_tr, y_val])
params = study.best_params.copy()
final_model = XGBClassifier(
    objective='binary:logistic', tree_method='hist',
    learning_rate=params['learning_rate'],
    n_estimators=params['n_estimators'],
    max_depth=params['max_depth'],
    min_child_weight=params['min_child_weight'],
    subsample=params['subsample'],
    colsample_bytree=params['colsample_bytree'],
    reg_lambda=params['lambda'],
    reg_alpha=params['alpha'],
    gamma=params['gamma'],
    scale_pos_weight=scale_pos_weight,
    eval_metric='auc',
    verbosity=0
)
final_model.fit(X_tr_full, y_tr_full)
print('Final XGBClassifier trained (sklearn API, DataFrame/array based).')

Final XGBClassifier trained (sklearn API, DataFrame/array based).


### Calibration & Threshold
Fit isotonic on validation; pick F1-optimal threshold on calibrated validation.

In [33]:
# Manual isotonic calibration (sklearn XGBClassifier base, no DMatrix)
from xgboost import XGBClassifier
from sklearn.isotonic import IsotonicRegression
import numpy as np

# Base model: train-only (exclude validation for calibration fairness)
params = study.best_params.copy()
base_model = XGBClassifier(
    objective='binary:logistic', tree_method='hist',
    learning_rate=params['learning_rate'],
    n_estimators=params['n_estimators'],
    max_depth=params['max_depth'],
    min_child_weight=params['min_child_weight'],
    subsample=params['subsample'],
    colsample_bytree=params['colsample_bytree'],
    reg_lambda=params['lambda'],
    reg_alpha=params['alpha'],
    gamma=params['gamma'],
    scale_pos_weight=scale_pos_weight,
    eval_metric='logloss',
    verbosity=0
)
base_model.fit(X_tr, y_tr)
val_proba_raw = base_model.predict_proba(X_val)[:,1]
iso = IsotonicRegression(out_of_bounds='clip')
iso.fit(val_proba_raw, y_val)
print('Isotonic calibration fitted on validation set.')

def predict_calibrated(X):
    return iso.transform(base_model.predict_proba(X)[:,1])

# Derive operating threshold on calibrated validation probabilities
val_cal = predict_calibrated(X_val)
ths = np.linspace(0.01,0.9,300)
threshold_info = None
for t in ths:
    m = metrics_at(val_cal, y_val, t)
    if (threshold_info is None) or (m['f1'] > threshold_info['f1']):
        threshold_info = {**m, 'threshold': float(t)}
print('Selected threshold (calibrated validation):', threshold_info)

Isotonic calibration fitted on validation set.
Selected threshold (calibrated validation): {'f1': 0.1557377049180328, 'precision': np.float64(0.11752577319563397), 'recall': np.float64(0.2307692307682965), 'cost': np.float64(1378.0), 'fbeta': np.float64(0.19348268811432212), 'threshold': 0.0665551839464883}
Selected threshold (calibrated validation): {'f1': 0.1557377049180328, 'precision': np.float64(0.11752577319563397), 'recall': np.float64(0.2307692307682965), 'cost': np.float64(1378.0), 'fbeta': np.float64(0.19348268811432212), 'threshold': 0.0665551839464883}


### Test Evaluation
Apply calibrated model + selected threshold; report core metrics.

In [34]:
# Evaluate calibrated model on test set
from sklearn.metrics import brier_score_loss
import json
cal_proba_test = predict_calibrated(X_te)
auc = roc_auc_score(y_te, cal_proba_test)
pr = average_precision_score(y_te, cal_proba_test)
brier = brier_score_loss(y_te, cal_proba_test)
thr = threshold_info['threshold']
th_metrics = metrics_at(cal_proba_test, y_te, thr)
report = {
    'auc': float(auc),
    'pr_auc': float(pr),
    'brier': float(brier),
    'threshold': float(thr),
    'f1_at_threshold': float(th_metrics['f1']),
    'precision_at_threshold': float(th_metrics['precision']),
    'recall_at_threshold': float(th_metrics['recall']),
    'cost_at_threshold': float(th_metrics['cost']),
}
print(json.dumps(report, indent=2))

{
  "auc": 0.6049995001693457,
  "pr_auc": 0.05755169256503535,
  "brier": 0.04140597358729218,
  "threshold": 0.0665551839464883,
  "f1_at_threshold": 0.08743169398907104,
  "precision_at_threshold": 0.06584362139904147,
  "recall_at_threshold": 0.13008130081247934,
  "cost_at_threshold": 1524.0
}


### SHAP Summary
Compute SHAP values on a sample for global importance.

In [35]:
# SHAP global explanation (sample subset) using base_model
import numpy as np
try:
    import shap
    X_tr_full = np.vstack([X_tr, X_val])
    sample_idx = np.random.RandomState(42).choice(X_tr_full.shape[0], size=min(400, X_tr_full.shape[0]), replace=False)
    X_sample = X_tr_full[sample_idx]
    explainer = shap.TreeExplainer(base_model)
    shap_val = explainer.shap_values(X_sample)
    mean_abs = np.abs(shap_val).mean(axis=0)
    top_order = np.argsort(-mean_abs)[:20]
    print('Top SHAP feature indices (first 10 of 20):', top_order[:10])
except Exception as e:
    print('SHAP skipped:', e)

Top SHAP feature indices (first 10 of 20): [  25  170  625    6 1169  455 1345 1014   95 1005]


### Bootstrap AUC CI
Estimate uncertainty of test ROC AUC via stratified bootstrap.

In [36]:
# Bootstrap 95% CI for test AUC
import numpy as np
from sklearn.metrics import roc_auc_score
R = 1000
rng = np.random.default_rng(42)
auc_samples = []
for _ in range(R):
    idx_pos = np.where(y_te==1)[0]
    idx_neg = np.where(y_te==0)[0]
    b_pos = rng.choice(idx_pos, size=len(idx_pos), replace=True)
    b_neg = rng.choice(idx_neg, size=len(idx_neg), replace=True)
    b_idx = np.concatenate([b_pos, b_neg])
    auc_samples.append(roc_auc_score(y_te[b_idx], cal_proba_test[b_idx]))
auc_samples = np.array(auc_samples)
ci_low, ci_high = np.percentile(auc_samples, [2.5,97.5])
print(f'AUC bootstrap mean={auc_samples.mean():.4f} 95% CI=({ci_low:.4f},{ci_high:.4f}) n={R}')

AUC bootstrap mean=0.6050 95% CI=(0.5730,0.6390) n=1000


### Cross-Validation Summary
Single consolidated cell: reuse Optuna best-trial fold AUCs + compute logistic regression 5-fold CV for uplift comparison.

In [None]:
# Consolidated CV summary: XGB folds from best Optuna trial + new logistic CV
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score

# Extract XGB fold AUCs stored during tuning
best_trial = study.best_trial
xgb_fold_aucs = best_trial.user_attrs.get('fold_aucs', [])
xgb_mean = float(np.mean(xgb_fold_aucs)) if xgb_fold_aucs else float('nan')
xgb_std = float(np.std(xgb_fold_aucs)) if xgb_fold_aucs else float('nan')

# Logistic CV on same combined train+validation set for contrast
X_cv = np.vstack([X_tr, X_val])
y_cv = np.concatenate([y_tr, y_val])
log_skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
log_aucs = []
for fold,(tr_idx, va_idx) in enumerate(log_skf.split(X_cv, y_cv), 1):
    Xtr, Xva = X_cv[tr_idx], X_cv[va_idx]; ytr, yva = y_cv[tr_idx], y_cv[va_idx]
    lr_pipe = Pipeline([
        ('imp', SimpleImputer(strategy='median')),
        ('sc', StandardScaler(with_mean=False)),
        ('lr', LogisticRegression(max_iter=500, class_weight='balanced', solver='liblinear'))
    ])
    lr_pipe.fit(Xtr, ytr)
    log_aucs.append(roc_auc_score(yva, lr_pipe.predict_proba(Xva)[:,1]))
log_mean = float(np.mean(log_aucs)); log_std = float(np.std(log_aucs))

print('XGB CV AUCs (best trial):', [round(a,4) for a in xgb_fold_aucs])
print(f'XGB CV mean ± std: {xgb_mean:.4f} ± {xgb_std:.4f}')
print('Logistic CV AUCs:', [round(a,4) for a in log_aucs])
print(f'Logistic CV mean ± std: {log_mean:.4f} ± {log_std:.4f}')

# Optional uplift summary
if xgb_mean == xgb_mean and log_mean == log_mean:  # not NaN
    uplift = xgb_mean - log_mean
    print(f'Uplift (XGB - Logistic) mean AUC: {uplift:.4f}')

CV AUC Logistic: mean 0.5872 ± 0.0257
CV AUC XGB     : mean 0.6348 ± 0.0154


### Persist Artifacts
Save model, calibration objects, metrics, threshold, and metadata.

In [38]:
# Persist artifacts (sklearn XGBClassifier + calibration + metadata)
import json, joblib, hashlib, time, subprocess
OUT_DIR = ARTIFACTS_DIR
OUT_DIR.mkdir(exist_ok=True)
joblib.dump(base_model, OUT_DIR / 'model_readmission.joblib')
joblib.dump(iso, OUT_DIR / 'isotonic.joblib')
with open(OUT_DIR / 'best_params.json','w',encoding='utf-8') as f: json.dump(study.best_params, f, indent=2)
with open(OUT_DIR / 'metrics.json','w',encoding='utf-8') as f: json.dump(report, f, indent=2)
with open(OUT_DIR / 'threshold.txt','w') as f: f.write(str(report['threshold']))
try:
    git_commit = subprocess.check_output(['git','rev-parse','HEAD'], text=True).strip()
except Exception:
    git_commit = 'UNKNOWN'
feat_cols = list(feature_df.columns)
feat_sig = hashlib.sha256(('|'.join(feat_cols)).encode()).hexdigest()[:16]
meta = {
    'saved_utc': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()),
    'git_commit': git_commit,
    'n_features': len(feat_cols),
    'feature_sig_sha256_16': feat_sig,
    'prevalence_train': float(y_tr.mean()),
    'prevalence_valid': float(y_val.mean()),
    'prevalence_test': float(y_te.mean()),
    'optuna_best_value': float(study.best_value),
    'threshold_info': threshold_info,
    'calibration': 'isotonic_on_validation',
    'cv_folds_best_mean_auc': float(study.best_value)
}
with open(OUT_DIR / 'run_metadata.json','w',encoding='utf-8') as f: json.dump(meta, f, indent=2)
print('Artifacts saved ->', OUT_DIR)

Artifacts saved -> c:\Users\Almog Luz\Documents\GitHub\mlhc-final-project\project\artifacts


### Experiment Registry
Append current run metrics to CSV registry for tracking.

In [39]:
# Append metrics to experiment registry
import csv, time
REG_PATH = PROJECT_ROOT / 'experiment_registry.csv'
row = {'ts': time.time(), **report}
write_header = not REG_PATH.exists()
with open(REG_PATH,'a',newline='') as f:
    w = csv.DictWriter(f, fieldnames=row.keys())
    if write_header: w.writeheader()
    w.writerow(row)
print('Logged metrics to', REG_PATH)

Logged metrics to c:\Users\Almog Luz\Documents\GitHub\mlhc-final-project\project\experiment_registry.csv


### Single Prediction Demo
Show calibrated probability for one test instance.

In [40]:
# Single example calibrated probability demo (using base_model + iso)
raw_proba = base_model.predict_proba(X_te[:1])[:,1][0]
calib_proba = iso.transform([raw_proba])[0]
print('Single test example calibrated probability:', float(calib_proba))

Single test example calibrated probability: 0.04131355881690979
