In [1]:
import pandas as pd
import numpy as np
import os

from Models import probitModel, logisticModel
from util import *
from sklearn.preprocessing import *
from sklearn.impute import *

import json
import joblib

import lightgbm as lgb
from lightgbm import LGBMClassifier

In [2]:
data_folder = 'data'
train = pd.read_csv(os.path.join(data_folder,'mimiciv_traindata.csv')).drop('mortality', axis=1)
print(train.columns)
train.head()

Index(['Fraction inspired oxygen_mean', 'Fraction inspired oxygen_min',
       'Fraction inspired oxygen_max', 'Glucose_mean', 'Glucose_min',
       'Glucose_max', 'Heart Rate_mean', 'Heart Rate_min', 'Heart Rate_max',
       'Mean blood pressure_mean', 'Mean blood pressure_min',
       'Mean blood pressure_max', 'Diastolic blood pressure_mean',
       'Diastolic blood pressure_min', 'Diastolic blood pressure_max',
       'Systolic blood pressure_mean', 'Systolic blood pressure_min',
       'Systolic blood pressure_max', 'Oxygen saturation_mean',
       'Oxygen saturation_min', 'Oxygen saturation_max',
       'Respiratory rate_mean', 'Respiratory rate_min', 'Respiratory rate_max',
       'Temperature_mean', 'Temperature_min', 'Temperature_max', 'Weight_mean',
       'Weight_min', 'Weight_max', 'pH_mean', 'pH_min', 'pH_max'],
      dtype='object')


Unnamed: 0,Fraction inspired oxygen_mean,Fraction inspired oxygen_min,Fraction inspired oxygen_max,Glucose_mean,Glucose_min,Glucose_max,Heart Rate_mean,Heart Rate_min,Heart Rate_max,Mean blood pressure_mean,...,Respiratory rate_max,Temperature_mean,Temperature_min,Temperature_max,Weight_mean,Weight_min,Weight_max,pH_mean,pH_min,pH_max
0,0.484615,0.4,0.6,155.181818,52.0,280.0,52.367347,35.0,93.0,87.096154,...,27.0,36.356349,35.2,37.1,81.419764,81.419764,81.419764,7.32875,7.19,7.4
1,0.6,0.5,1.0,127.954545,85.0,177.0,72.660377,58.0,105.0,72.315789,...,28.0,36.786325,36.388889,37.055556,93.10351,91.807021,95.0,7.377143,7.34,7.42
2,0.48125,0.4,0.8,145.625,116.0,180.0,107.26,96.0,118.0,86.462963,...,30.0,37.316239,36.611111,37.944444,67.5,67.5,67.5,7.372,7.29,7.45
3,0.4,0.4,0.4,100.142857,77.0,154.0,57.25,47.0,65.0,69.762712,...,34.0,,,,80.403091,76.838485,86.0,7.16125,6.0,7.36
4,0.566667,0.4,1.0,138.0,116.0,155.0,84.015625,64.0,114.0,70.662162,...,32.0,36.14899,34.722222,37.666667,,,,7.374444,7.27,7.42


In [3]:
test = pd.read_csv(os.path.join(data_folder,'mortality_testdata.csv'))
print(test.columns)
# Make sure the test data and the training data have the same columns
assert np.array_equal(train.columns, test.columns)
test.head()
x = compute_features(test)

Index(['Fraction inspired oxygen_mean', 'Fraction inspired oxygen_min',
       'Fraction inspired oxygen_max', 'Glucose_mean', 'Glucose_min',
       'Glucose_max', 'Heart Rate_mean', 'Heart Rate_min', 'Heart Rate_max',
       'Mean blood pressure_mean', 'Mean blood pressure_min',
       'Mean blood pressure_max', 'Diastolic blood pressure_mean',
       'Diastolic blood pressure_min', 'Diastolic blood pressure_max',
       'Systolic blood pressure_mean', 'Systolic blood pressure_min',
       'Systolic blood pressure_max', 'Oxygen saturation_mean',
       'Oxygen saturation_min', 'Oxygen saturation_max',
       'Respiratory rate_mean', 'Respiratory rate_min', 'Respiratory rate_max',
       'Temperature_mean', 'Temperature_min', 'Temperature_max', 'Weight_mean',
       'Weight_min', 'Weight_max', 'pH_mean', 'pH_min', 'pH_max'],
      dtype='object')


# Probit model

In [4]:
probit_pipeline = joblib.load('../others/probit_pipeline.pkl')
probit = joblib.load('../others/probit.pkl')
probit_pipeline

In [5]:
probit_x = probit_pipeline.transform(x)
probit_y = probit.predict(probit_x)
# Uncomment this if you want to save the predictions for probit model
#pd.Series(probit_y).to_csv(os.path.join(data_folder,'../probit_predictions.csv'), index=False)

# Logistic Model

In [6]:
logistic_pipeline = joblib.load('../others/logistic_pipeline.pkl')
logistic = joblib.load('../others/logistic.pkl')
logistic_pipeline

In [7]:
logistic_x = logistic_pipeline.transform(x)
logistic_y = logistic.predict(logistic_x)
# Uncomment this if you want to save the predictions for logistic model
#pd.Series(logistic_y).to_csv(os.path.join(data_folder,'../logistic_predictions.csv'), index=False)

# LightGBM Model

In [8]:
lightgbm_pipeline = joblib.load('../others/lightgbm_pipeline.pkl')
lightgbm = lgb.Booster(model_file='../others/lightgbm.txt')
lightgbm_pipeline

In [9]:
lightgbm_x = lightgbm_pipeline.transform(x)
lightgbm_p = lightgbm.predict(lightgbm_x)
lightgbm_y = (lightgbm_p>=0.5).astype(int)
pd.Series(lightgbm_y).to_csv(os.path.join(data_folder,'../../predictions.csv'), index=False)