In [16]:
# Import libraries
import os
import sys

import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import statistics
import datetime as dt

from sklearn.preprocessing import MinMaxScaler, Imputer
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

import lightgbm as lgb

import keras
from keras.models import Sequential
from keras.layers import Dense

import talos as ta

In [2]:
# Check virtual environment: should be: '/Users/James/anaconda3/envs/mimic/bin/python'
sys.executable

'/Users/James/anaconda3/envs/mimic/bin/python'

In [3]:
# Set up paths
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
src_folder = os.path.join(project_root, 'src')

src_preparation_folder = os.path.join(src_folder, 'preparation')
src_processing_folder = os.path.join(src_folder, 'processing')
src_modeling_folder = os.path.join(src_folder, 'modeling')

In [4]:
# Import src functions
sys.path.insert(0, src_preparation_folder)
from import_data import get_table
from import_data import get_patient_admissions_diagnoses
from import_data import get_admission_data
from import_data import get_chartevents
from import_data import get_labevents
from extract_codes import find_ndc_codes

sys.path.insert(0, src_processing_folder)
from stats import plot_KDE
from stats import plot_perc_bar_chart
from stats import compare_groups

sys.path.insert(0, src_modeling_folder)
from models import train_lgb

  """)


In [5]:
# RANDOM ARRAYS FOR TESTING AND BUILDING MODELS
#features = np.random.rand(100,100)
#labels = np.random.randint(0,2,100)

In [6]:
features = np.load(os.path.join(os.getcwd(), os.pardir, 'data', 'alzheimers_features.npy'))
labels = np.load(os.path.join(os.getcwd(), os.pardir, 'data', 'alzheimers_labels.npy'))

In [7]:
print(features.shape)
print(labels.shape)

(4380, 1526)
(4380,)


In [14]:
def train_keras_nn(features, labels, test_size):
    
    # Split into test and validation
    X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size = test_size)
    print(len(X_train))
    print(len(X_val))
        
    # --- NEURAL NETWORK

    #Initializing Neural Network
    classifier = Sequential()

    # Adding the input layer and the first hidden layer
    classifier.add(Dense(output_dim = 250, init = 'uniform', activation = 'relu', input_dim = X_train.shape[1]))
    # Adding the second hidden layer
    classifier.add(Dense(output_dim = 250, init = 'uniform', activation = 'relu'))
    # Adding the third hidden layer
    classifier.add(Dense(output_dim = 250, init = 'uniform', activation = 'relu'))
    # Adding the fourth hidden layer
    classifier.add(Dense(output_dim = 250, init = 'uniform', activation = 'relu'))
    # Adding the fifth hidden layer
    classifier.add(Dense(output_dim = 250, init = 'uniform', activation = 'relu'))
    # Adding the sixth hidden layer
    classifier.add(Dense(output_dim = 250, init = 'uniform', activation = 'relu'))
    # Adding the seventh hidden layer
    classifier.add(Dense(output_dim = 250, init = 'uniform', activation = 'relu'))
    # Adding the eighth hidden layer
    classifier.add(Dense(output_dim = 250, init = 'uniform', activation = 'relu'))
    # Adding the output layer
    classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))
    
    #Optimiser
    sgd = keras.optimizers.SGD(lr=0.1, momentum=0.0, decay=0.0, nesterov=False)

    # Compiling Neural Network
    classifier.compile(optimizer = sgd, loss = 'binary_crossentropy', metrics = ['accuracy'])

    # Fitting our model 
    classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 10)
    
    # Find train and test ROC scores
    y_predict = classifier.predict(X_val)
    val_ROC_sc = roc_auc_score(y_val, y_predict)
    
    y_predict = classifier.predict(X_train)
    train_ROC_sc = roc_auc_score(y_train, y_predict)
    
    print('Train is ' + str(train_ROC_sc))
    print('Val is ' + str(val_ROC_sc))

In [15]:
train_keras_nn(features=features, labels=labels, test_size=0.2)

3504
876


  
  app.launch_new_instance()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train is 0.6998787263872956
Val is 0.6816083903525313


In [40]:
# first we have to make sure to input data and params into the function
def train_keras_nn_v2(x_train, y_train, x_val, y_val, params):

    # next we can build the model exactly like we would normally do it
    model = Sequential()
    model.add(Dense(10, input_dim=x_train.shape[1],
                    activation=params['activation'],
                    kernel_initializer='normal'))
    
    #model.add(dropout=(params['dropout']))
    
    # if we want to also test for number of layers and shapes, that's possible
    #hidden_layers(model, params, 1)
   
    # then we finish again with completely standard Keras way
    model.add(Dense(1, activation=params['last_activation'],
                    kernel_initializer='normal'))
    
    model.compile(loss=params['losses'],
                  # here we add a regulizer normalization function from Talos
                  optimizer=params['optimizer'],
                  lr=params['lr'],
                  metrics=['acc'])
    
    history = model.fit(x_train, y_train, 
                        validation_data=[x_val, y_val],
                        batch_size=params['batch_size'],
                        epochs=params['epochs'],
                        verbose=0)
    
    # finally we have to make sure that history object and model are returned
    return history, model

In [41]:
# then we can go ahead and set the parameter space
p = {'lr': (0.5, 5, 10),
     'first_neuron':[4, 8, 16, 32, 64],
     'hidden_layers':[0, 1, 2],
     'batch_size': (2, 30, 10),
     'epochs': [10],
     'dropout': (0, 0.5, 5),
     'weight_regulizer':[None],
     'emb_output_dims': [None],
     'shape':['brick','long_funnel'],
     'optimizer': ['Adam', 'Nadam', 'RMSprop'],
     'losses': ['logcosh', 'binary_crossentropy'],
     'activation':['relu', 'elu'],
     'last_activation': ['sigmoid']}

In [42]:
# and run the experiment
t = ta.Scan(x=features,
            y=labels,
            model=train_keras_nn_v2,
            grid_downsample=0.01, 
            params=p,
            dataset_name='first_test',
            experiment_no='1')








  0%|          | 0/1800 [00:00<?, ?it/s][A[A[A[A[A[A[A

ValueError: ('Some keys in session_kwargs are not supported at this time: %s', dict_keys(['lr']))