# 1. Imports & Data Loading


In [None]:
# ICU Admission Prediction: Multi-Model Comparison with SMOTE (XGBoost Only)

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_auc_score, RocCurveDisplay
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Load dataset
df = pd.read_csv("prepared_clinical_dataset.csv")


#  2. Feature Selection, Preprocessing & SMOTE

In [None]:
X = df[[
    'HeartRate_mean_15m', 'BreathingRate_mean_15m', 'SpO2_mean_15m', 'SkinTemp_mean_15m', 'SysBp_mean_15m', 'DiasBp_mean_15m', 'MapBp_mean_15m',
    'HeartRate_mean_30m', 'BreathingRate_mean_30m', 'SpO2_mean_30m', 'SkinTemp_mean_30m', 'SysBp_mean_30m', 'DiasBp_mean_30m', 'MapBp_mean_30m',
    'HeartRate_min_60m', 'HeartRate_max_60m', 'HeartRate_p90_60m',
    'BreathingRate_min_60m', 'BreathingRate_max_60m', 'BreathingRate_p90_60m',
    'SpO2_min_60m', 'SpO2_max_60m', 'SpO2_p90_60m',
    'SkinTemp_min_60m', 'SkinTemp_max_60m', 'SkinTemp_p90_60m',
    'SysBp_min_60m', 'SysBp_max_60m', 'SysBp_p90_60m',
    'DiasBp_min_60m', 'DiasBp_max_60m', 'DiasBp_p90_60m',
    'MapBp_min_60m', 'MapBp_max_60m', 'MapBp_p90_60m',
    'diagnosis_group_blood_diseases', 'diagnosis_group_circulatory_system', 'diagnosis_group_congenital_abnormalities', 'diagnosis_group_digestive_system',
    'diagnosis_group_ear_diseases', 'diagnosis_group_endocrine_diseases', 'diagnosis_group_external_causes', 'diagnosis_group_eye_diseases',
    'diagnosis_group_genitourinary_diseases', 'diagnosis_group_health_status_contact', 'diagnosis_group_infectious_diseases',
    'diagnosis_group_injury_poisoning', 'diagnosis_group_mental_disorders', 'diagnosis_group_musculoskeletal_diseases',
    'diagnosis_group_neoplasms', 'diagnosis_group_nervous_system', 'diagnosis_group_perinatal_conditions',
    'diagnosis_group_pregnancy_childbirth', 'diagnosis_group_respiratory_system', 'diagnosis_group_skin_diseases',
    'diagnosis_group_symptoms_not_classified', 'SexLabel_Male', 'SurgeryLabel_Emergency', 'SurgeryLabel_Planned', 'SurgeryLabel_Unplanned'
]]
y = df['ICU_Admission']

# Clean column names
X.columns = X.columns.str.replace(r'[\[\]<>]', '_', regex=True)

# Scale features
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# Apply SMOTE to address class imbalance
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)


# 3. Train & Evaluate XGBoost

In [None]:
xgb = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    classifier__subsample=1.0,
    classifier__reg_lambda=5,
    classifier__reg_alpha=1,
    classifier__n_estimators=100,
    classifier__max_depth=7,
    classifier__learning_rate=0.1,
    classifier__gamma=0.5,
    classifier__colsample_bytree=1.0
)

xgb.fit(X_train_resampled, y_train_resampled)
y_pred_xgb = xgb.predict(X_test)

# Save results
results = {}
predictions = {}
results['XGBoost'] = classification_report(y_test, y_pred_xgb, output_dict=True)
predictions['XGBoost'] = y_pred_xgb


# 4. Display Metrics: Classification Report, ROC, Confusion Matrix

In [None]:
# Classification Report (nicely printed)
print("\n=== XGBoost ===")
print(pd.DataFrame(results['XGBoost']).T)

# ROC Curve
y_score = xgb.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_score)

plt.figure(figsize=(8, 6))
RocCurveDisplay.from_predictions(y_test, y_score, name="XGBoost", color="purple")
plt.title("ROC Curve for XGBoost")
plt.legend(loc="lower right")
plt.grid(True)
plt.tight_layout()
plt.show()

# Confusion Matrix
cm = confusion_matrix(y_test, predictions['XGBoost'])
tn, fp, fn, tp = cm.ravel()

confusion_table = pd.DataFrame({'TN': [tn], 'FP': [fp], 'FN': [fn], 'TP': [tp]})
print(confusion_table)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No ICU', 'ICU'])
disp.plot()
plt.title("Confusion Matrix - XGBoost")
plt.grid(False)
plt.show()


# 5. Feature Importance Plot

In [None]:
plt.figure(figsize=(10, 12))
plt.barh(X.columns, xgb.feature_importances_)
plt.title("XGBoost Feature Importance")
plt.show()
