In [None]:
# ICU Admission Prediction: Multi-Model Comparison

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, RocCurveDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from pygam import LogisticGAM, s
import matplotlib.pyplot as plt
import seaborn as sns
import shap

# Load dataset
df = pd.read_csv("prepared_clinical_dataset.csv")

# Define features and target
X = df.drop(columns=["ICU_Admission"])
y = df["ICU_Admission"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Container for model evaluation results
results = {}

# 1. Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
results['Logistic Regression'] = classification_report(y_test, y_pred_lr, output_dict=True)

# 2. Decision Tree
tree = DecisionTreeClassifier(max_depth=4, random_state=42)
tree.fit(X_train, y_train)
y_pred_tree = tree.predict(X_test)
results['Decision Tree'] = classification_report(y_test, y_pred_tree, output_dict=True)

# 3. Naive Bayes
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_nb = gnb.predict(X_test)
results['Naive Bayes'] = classification_report(y_test, y_pred_nb, output_dict=True)

# 4. Generalized Additive Model (GAM)
gam = LogisticGAM(s(0) + s(1) + s(2)).fit(X_train.values, y_train.values)  # Adjust s() for num of features if needed
y_pred_gam = gam.predict(X_test.values)
y_pred_gam_class = (y_pred_gam > 0.5).astype(int)
results['GAM'] = classification_report(y_test, y_pred_gam_class, output_dict=True)

# 5. XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
results['XGBoost'] = classification_report(y_test, y_pred_xgb, output_dict=True)

# Evaluation summary
for model_name, report in results.items():
    print(f"\n=== {model_name} ===")
    print(pd.DataFrame(report).T)

# ROC Curve for all models
plt.figure(figsize=(10, 6))
for name, model in zip([
    'Logistic Regression', 'Decision Tree', 'Naive Bayes', 'GAM', 'XGBoost'
], [lr, tree, gnb, gam, xgb]):
    if name == 'GAM':
        y_score = gam.predict_proba(X_test.values)
    else:
        y_score = model.predict_proba(X_test)[:, 1]
    RocCurveDisplay.from_predictions(y_test, y_score, name=name)
plt.title("ROC Curves")
plt.legend()
plt.grid()
plt.show()

# Feature importance (Tree and XGBoost)
plt.figure(figsize=(10, 5))
plt.barh(X.columns, tree.feature_importances_)
plt.title("Decision Tree Feature Importance")
plt.show()

plt.figure(figsize=(10, 5))
plt.barh(X.columns, xgb.feature_importances_)
plt.title("XGBoost Feature Importance")
plt.show()

# SHAP values for XGBoost
explainer = shap.TreeExplainer(xgb)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test, plot_type="bar")
