# Import Libraries and Data

In [10]:
# Loading Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from imblearn.under_sampling import RandomUnderSampler
from sklearn.calibration import CalibrationDisplay, CalibratedClassifierCV, calibration_curve

In [11]:
# Loading Dataset
raw_data = pd.read_csv("data_new.csv")
raw_data.head()

Unnamed: 0.1,Unnamed: 0,Hospital Mortality,Age,Gender,Uncomplicated Hypertension,Complicated Hypertension,Uncomplicated Diabetes,Complicated Diabetes,Malignancy,Hematologic Disease,...,Mean WBC,Max BUN,Min BUN,Mean BUN,Max Creatinine,Min Creatinine,Mean Creatinine,Max Hemoglobin,Min Hemoglobin,Mean Hemoglobin
0,0,0,77,M,0,0,0,0,0,0,...,17.26,53.0,41.0,44.75,3.2,2.4,2.65,12.8,7.8,10.04
1,1,1,42,M,1,0,0,0,0,0,...,10.6,17.0,16.0,16.5,1.4,1.2,1.3,15.4,12.9,14.1
2,2,1,72,M,1,0,0,0,0,0,...,8.1,39.0,28.0,33.5,1.7,1.3,1.5,13.3,7.8,10.23
3,9,1,72,M,1,0,0,0,1,1,...,6.9,13.0,11.0,12.0,0.9,0.7,0.8,10.6,10.6,10.6
4,13,0,76,M,0,0,0,0,0,0,...,18.2,55.0,42.0,48.5,2.6,1.8,2.2,12.5,12.5,12.5


# Data Preparation

In [12]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12489 entries, 0 to 12488
Data columns (total 69 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Unnamed: 0                             12489 non-null  int64  
 1   Hospital Mortality                     12489 non-null  int64  
 2   Age                                    12489 non-null  int64  
 3   Gender                                 12489 non-null  object 
 4   Uncomplicated Hypertension             12489 non-null  int64  
 5   Complicated Hypertension               12489 non-null  int64  
 6   Uncomplicated Diabetes                 12489 non-null  int64  
 7   Complicated Diabetes                   12489 non-null  int64  
 8   Malignancy                             12489 non-null  int64  
 9   Hematologic Disease                    12489 non-null  int64  
 10  Metastasis                             12489 non-null  int64  
 11  Pe

No missing values, all correct datatype, CHECK.

In [13]:
raw_data['Hospital Mortality'].value_counts()

0    10331
1     2158
Name: Hospital Mortality, dtype: int64

In [14]:
print(f"The data is unbalanced with {3017/(3017+15866)*100:.1f}% mortality rate.")

The data is unbalanced with 16.0% mortality rate.


In [15]:
# Convert Gender from Str to Numeric
raw_data['Gender'] = pd.get_dummies(raw_data['Gender'], drop_first = True)

In [16]:
raw_data.columns

Index(['Unnamed: 0', 'Hospital Mortality', 'Age', 'Gender',
       'Uncomplicated Hypertension', 'Complicated Hypertension',
       'Uncomplicated Diabetes', 'Complicated Diabetes', 'Malignancy',
       'Hematologic Disease', 'Metastasis', 'Peripheral Vascular Disease',
       'Hypothyroidism', 'Chronic Heart Failure', 'Stroke', 'Liver Disease',
       'SAPS II', 'SOFA', 'OASIS', 'Sepsis', 'Any Organ Failure',
       'Severe Respiratory Failure', 'Severe Coagulation Failure',
       'Severe Liver Failure', 'Severe Cardiovascular Failure',
       'Severe Central Nervous System Failure', 'Severe Renal Failure',
       'Respiratory Dysfunction', 'Cardiovascular Dysfunction',
       'Renal Dysfunction', 'Hematologic Dysfunction', 'Metabolic Dysfunction',
       'Neurologic Dysfunction', 'Max Heart Rate', 'Min Heart Rate',
       'Mean Heart Rate', 'Max MAP', 'Min MAP', 'Mean MAP',
       'Max Systolic Pressure', 'Min Systolic Pressure',
       'Mean Systolic Pressure', 'Max Diastolic Press

In [17]:
features = ['Age', 'Gender', 'Uncomplicated Hypertension',
       'Complicated Hypertension', 'Uncomplicated Diabetes',
       'Complicated Diabetes', 'Malignancy', 'Hematologic Disease',
       'Metastasis', 'Peripheral Vascular Disease', 'Hypothyroidism',
       'Chronic Heart Failure', 'Stroke', 'Liver Disease', 'SAPS II', 'SOFA',
       'OASIS', 'Sepsis', 'Any Organ Failure', 'Severe Respiratory Failure',
       'Severe Coagulation Failure', 'Severe Liver Failure',
       'Severe Cardiovascular Failure',
       'Severe Central Nervous System Failure', 'Severe Renal Failure',
       'Respiratory Dysfunction', 'Cardiovascular Dysfunction',
       'Renal Dysfunction', 'Hematologic Dysfunction', 'Metabolic Dysfunction',
       'Neurologic Dysfunction', 'Max Heart Rate', 'Min Heart Rate',
       'Mean Heart Rate', 'Max MAP', 'Min MAP', 'Mean MAP',
       'Max Systolic Pressure', 'Min Systolic Pressure',
       'Mean Systolic Pressure', 'Max Diastolic Pressure',
       'Min Diastolic Pressure', 'Mean Diastolic Pressure', 'Max Temperature',
       'Min Temperature', 'Mean Temperature', 'Max Lactate', 'Min Lactate',
       'Mean Lactate', 'Max pH', 'Min pH', 'Mean pH', 'Max Glucose',
       'Min Glucose', 'Mean Glucose', 'Max WBC', 'Min WBC', 'Mean WBC',
       'Max BUN', 'Min BUN', 'Mean BUN', 'Max Creatinine', 'Min Creatinine',
       'Mean Creatinine', 'Max Hemoglobin', 'Min Hemoglobin',
       'Mean Hemoglobin']

In [18]:
### SELECTED FEATURES data
selected_feat_os = [
    'SAPS II', 'OASIS', 'Age', 'Gender',
    'Min Heart Rate', 'Mean Heart Rate', 'Mean MAP', 'Min Systolic Pressure',
    'Mean Systolic Pressure', 'Min Diastolic Pressure', 'Mean Diastolic Pressure', 'Max Temperature',
    'Mean Temperature', 'Max Lactate', 'Min Lactate', 'Min pH',
    'Mean pH', 'Min Glucose', 'Min BUN', 'Max Hemoglobin',
    'Min Hemoglobin', 'Uncomplicated Hypertension', 'Complicated Hypertension', 'Uncomplicated Diabetes',
    'Complicated Diabetes', 'Malignancy', 'Hematologic Disease', 'Peripheral Vascular Disease',
    'Hypothyroidism', 'Chronic Heart Failure', 'Liver Disease', 'Any Organ Failure',
    'Severe Respiratory Failure', 'Severe Cardiovascular Failure', 'Severe Central Nervous System Failure', 'Severe Renal Failure',
    'Respiratory Dysfunction', 'Renal Dysfunction', 'Metabolic Dysfunction', 'Neurologic Dysfunction'
    ]