In [1]:
# Import libraries
import os
import sys

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from itertools import combinations

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [2]:
# Set up paths & import src functions
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
src_folder = os.path.join(project_root, 'src')
sys.path.insert(0, src_folder)
from modeling import *
from s3_storage import *

In [3]:
# Import data
X_train = from_s3(bucket='mimic-jamesi', filepath='data/acute_kidney_failure_X_train.npy')
X_test = from_s3(bucket='mimic-jamesi', filepath='data/acute_kidney_failure_X_test.npy')
y_train = from_s3(bucket='mimic-jamesi', filepath='data/acute_kidney_failure_y_train.npy')
y_test = from_s3(bucket='mimic-jamesi', filepath='data/acute_kidney_failure_y_test.npy')

In [4]:
def run_final_test(model, X_test, y_test):
    m = from_s3(bucket='mimic-jamesi', filepath='models/{}'.format(model))
    predict = m.predict_proba(X_test)
    score = roc_auc_score(y_test, predict[:,-1])
    print("{} score: ".format(model), score)
    return score

## Logistic Regression

In [5]:
logistic_score = run_final_test('logistic_regression', X_test, y_test)

logistic_regression score:  0.8473821401576503


## Decision Tree

In [6]:
dt_score = run_final_test('decision_tree', X_test, y_test)

decision_tree score:  0.8468897527264875


## Random Forest

In [7]:
rf_score = run_final_test('random_forest', X_test, y_test)

random_forest score:  0.871756397797214


## LightGBM

In [8]:
lightgbm_score = run_final_test('light_gbm', X_test, y_test)

light_gbm score:  0.8865662455458374


## Neural Network

In [9]:
nn_score = run_final_test('neural_network', X_test, y_test)

Using TensorFlow backend.


neural_network score:  0.8736095454054636
