In [52]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.tree import DecisionTreeClassifier
import seaborn as sns
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import confusion_matrix

In [111]:
class confusion_matrix_metrics(object):
    def __init__(self,M):
        self.M = M
        self.TP = M[0,0]
        self.FN = M[0,1]
        self.FP = M[1,0]
        self.TN = M[1,1]
    
    def sensitivity(self):
        return self.TP/(self.TP + self.FN)
    def specificity(self):
        return self.TN/(self.TN + self.FP)
    def precision(self):
        return self.TP/(self.TP + self.FP)
    def negative_predictive_value(self):
        return self.TN/(self.TN + self.FN)
    def miss_rate(self):
        return 1 - self.sensitivity()
    def fall_out(self):
        return 1 - self.specificity()
    def false_discovery(self):
        return 1 - self.precision()
    def false_omission(self):
        return 1 - self.negative_predictive_value()
    def threat_score(self):
        return self.TP/(self.TP + self.FN + self.FP)
    def accuracy(self):
        return (self.TP + self.TN)/(self.TP + self. TN + self.FP + self.FN)
    def balanced_accuracy(self):
        return self.sensitivity()/2 + self.specificity()/2

In [124]:
df = pd.read_csv('mimic_dataset.csv')
df.drop(columns=['Unnamed: 0'],inplace=True)
fillcols = {'hospital_expire_flag':0,'age':df.age.mean(),'NumDrugs':0,'num_procedures':0,'curr_service':0,'num_serv':0,'num_transfers':0,'curr_careunit':0,\
            'avg_los':df.avg_los.mean(),'tot_los':df.tot_los.mean(),'num_unique_reads':df.num_unique_reads.mean(),\
           'total_reads':df.total_reads.mean(),'uinique_caregivers':df.uinique_caregivers.mean(),'total_icd9':df.total_icd9.mean(),'total_icu_hours':0,\
           'avg_icu_hours':0,'total_icu_stays':0,'avg_num_drug_administered':0,'max_drug_administered':0,'total_input_drugs':0,'tot_routes':0,\
           'patientweight':df.patientweight.mean(),'tot_org':0,'org_name':0,'org_itemid':0}
df.fillna(value=fillcols,inplace=True)
serv = df.curr_service.unique()
care = df.curr_careunit.unique()
org = df.org_name.unique()

def replace_stuff(s):
    new_dic = {}
    i = 1
    for j in s:
        if j != 0:
            new_dic[j] = i
            i +=1
    return new_dic
df.replace({'curr_service':replace_stuff(serv),'curr_careunit':replace_stuff(care),'org_name':replace_stuff(org)},inplace=True)
df = df.apply(np.int64)
y = df.hospital_expire_flag
X = df.drop(columns=['hospital_expire_flag'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [125]:
model = AdaBoostClassifier()
model.fit(X_train,y_train)
yhat = model.predict(X_test)
M = confusion_matrix(y_true=y_test,y_pred=yhat)
metrics = confusion_matrix_metrics(M)
metrics.sensitivity()
metrics.specificity()

0.3055822906641001

In [126]:
metrics.balanced_accuracy()

0.6444026243607476

### Class Balancing

In [127]:
df1 = df[df.hospital_expire_flag == 1]
df2 = df[df.hospital_expire_flag == 0]
df2 = df2.sample(n=len(df1))
df3 = df1.append(df2)
X = df3.drop(columns=['hospital_expire_flag'])
y = df3.hospital_expire_flag
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.33, random_state=42)

In [128]:
model = AdaBoostClassifier()
model.fit(X_train1,y_train1)
yhat1 = model.predict(X_test1)
yhat = model.predict(X_test)

In [130]:
M1 = confusion_matrix(y_true=y_test1,y_pred=yhat1)
M = confusion_matrix(y_test,yhat)
metrics = confusion_matrix_metrics(M)
metrics1 = confusion_matrix_metrics(M1)

In [138]:
M1

array([[1616,  394],
       [ 419, 1706]], dtype=int64)