In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

# === 1. Load your dataset ===


In [None]:
df = pd.read_csv("your_dataset.csv")  # Replace with your actual file


# === 2. Define feature sets (BMI is excluded) ===


In [None]:
vitals_mean_60 = ['SysBp_mean_60', 'DiasBp_mean_60', 'MapBp_mean_60',
                  'HR_mean_60', 'RR_mean_60', 'SpO2_mean_60', 'Temp_mean_60']

vitals_extremes = ['SysBp_min_60', 'DiasBp_min_60', 'MapBp_min_60',
                   'SysBp_max_60', 'DiasBp_max_60', 'MapBp_max_60',
                   'SysBp_90p_60', 'DiasBp_90p_60', 'MapBp_90p_60',
                   'HR_min_60', 'HR_max_60', 'HR_90p_60',
                   'RR_min_60', 'RR_max_60', 'RR_90p_60',
                   'SpO2_min_60', 'SpO2_max_60', 'SpO2_90p_60']

vitals_mean_short = ['SysBp_mean_15', 'DiasBp_mean_15', 'MapBp_mean_15',
                     'HR_mean_15', 'RR_mean_15', 'SpO2_mean_15',
                     'SysBp_mean_30', 'DiasBp_mean_30', 'MapBp_mean_30',
                     'HR_mean_30', 'RR_mean_30', 'SpO2_mean_30']

vitals_combined = vitals_mean_60 + vitals_extremes + vitals_mean_short
demographic_features = ['Age', 'ASA', 'SexLabel']
diagnosis_surgery = ['DiagnosisGroup', 'SurgeryType']

feature_sets = {
    "Vitals_Mean_Only": vitals_mean_60,
    "Vitals_Extremes_Only": vitals_extremes,
    "Vitals_Short_Term": vitals_mean_short,
    "Vitals_Combined": vitals_combined,
    "Vitals_+_Demographics": vitals_mean_60 + demographic_features,
    "Vitals_+_Diag_+_Surg": vitals_mean_60 + diagnosis_surgery,
    "All_Features": vitals_combined + demographic_features + diagnosis_surgery
}

# === 3. Define models ===


In [None]:
models = {
    "Logistic": LogisticRegression(max_iter=1000),
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# === 4. Evaluation Function ===


In [None]:
def evaluate_model(X, y, model, apply_smote=False):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    if apply_smote:
        sm = SMOTE(random_state=42)
        X_train, y_train = sm.fit_resample(X_train, y_train)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    return tn, fp, fn, tp

# === 5. Run evaluation for each model and feature set ===


In [None]:
def run_experiment(apply_smote=False):
    results = []
    for feature_name, features in feature_sets.items():
        X = df[features].copy()
        y = df['ICU']  # replace with actual ICU column name

        for model_name, model in models.items():
            try:
                tn, fp, fn, tp = evaluate_model(X, y, model, apply_smote)
                results.append({
                    'Model': model_name,
                    'Feature_Set': feature_name,
                    'SMOTE': apply_smote,
                    'TN': tn,
                    'FP': fp,
                    'FN': fn,
                    'TP': tp
                })
            except Exception as e:
                results.append({
                    'Model': model_name,
                    'Feature_Set': feature_name,
                    'SMOTE': apply_smote,
                    'TN': None,
                    'FP': None,
                    'FN': None,
                    'TP': None,
                    'Error': str(e)
                })
    return pd.DataFrame(results)

# === 6. Run both versions ===


In [None]:
results_no_smote = run_experiment(apply_smote=False)
results_smote = run_experiment(apply_smote=True)

# === 7. Combine and show summary ===


In [None]:
final_results = pd.concat([results_no_smote, results_smote], ignore_index=True)
print(final_results)
final_results.sort_values(by=['TP', 'FP']).to_csv('results.csv', index=False)