In [1]:
# Import libraries
import os
import sys

import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import statistics
import datetime as dt

from sklearn.preprocessing import MinMaxScaler, Imputer
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

import lightgbm as lgb

import keras
from keras.models import Sequential
from keras.layers import Dense

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.
Using TensorFlow backend.


In [2]:
# Check virtual environment: should be: '/Users/James/anaconda3/envs/mimic/bin/python'
sys.executable

'/Users/James/anaconda3/envs/mimic/bin/python'

In [3]:
# Set up paths
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
src_folder = os.path.join(project_root, 'src')

src_preparation_folder = os.path.join(src_folder, 'preparation')
src_processing_folder = os.path.join(src_folder, 'processing')
src_modeling_folder = os.path.join(src_folder, 'modeling')

In [4]:
# Import src functions
sys.path.insert(0, src_preparation_folder)
from import_data import get_table
from import_data import get_patient_admissions_diagnoses
from import_data import get_admission_data
from import_data import get_chartevents
from import_data import get_labevents
from extract_codes import find_ndc_codes

sys.path.insert(0, src_processing_folder)
from stats import plot_KDE
from stats import plot_perc_bar_chart
from stats import compare_groups

sys.path.insert(0, src_modeling_folder)
from models import train_lgb

  """)


In [5]:
# RANDOM ARRAYS FOR TESTING AND BUILDING MODELS
#features = np.random.rand(100,100)
#labels = np.random.randint(0,2,100)

In [6]:
features = np.load(os.path.join(os.getcwd(), os.pardir, 'data', 'alzheimers_features.npy'))
labels = np.load(os.path.join(os.getcwd(), os.pardir, 'data', 'alzheimers_labels.npy'))

In [7]:
print(features.shape)
print(labels.shape)

(4380, 1526)
(4380,)


In [8]:
# Train a Light GBM
train_lgb(features = features,
         labels = labels,
         n_folds = 5,
         params = {
            'metric':'auc',
            'nthread':4,
            'n_estimators':10000,
            'learning_rate':0.01,
            'num_leaves':5,
            'colsample_bytree':0.9497036,
            'subsample':0.8715623,
            'max_depth':5,
            'reg_alpha':1,
            'reg_lambda':1,
            'min_split_gain':0.0222415,
            'min_child_weight':39.3259775,
            'silent':-1,
            'verbose':-1
         },
         eval_metric = 'auc',
         early_stopping_rounds = 500,
         verbose = 100)       

LGB starting
Training until validation scores don't improve for 500 rounds.
[100]	valid's auc: 0.661279	train's auc: 0.801919
[200]	valid's auc: 0.669064	train's auc: 0.826288
[300]	valid's auc: 0.676801	train's auc: 0.843078
[400]	valid's auc: 0.686376	train's auc: 0.855599
[500]	valid's auc: 0.692274	train's auc: 0.867739
[600]	valid's auc: 0.697801	train's auc: 0.876587
[700]	valid's auc: 0.700886	train's auc: 0.883467
[800]	valid's auc: 0.704267	train's auc: 0.8898
[900]	valid's auc: 0.70671	train's auc: 0.895123
[1000]	valid's auc: 0.70977	train's auc: 0.899901
[1100]	valid's auc: 0.70935	train's auc: 0.90358
[1200]	valid's auc: 0.711102	train's auc: 0.907542
[1300]	valid's auc: 0.71204	train's auc: 0.910963
[1400]	valid's auc: 0.712484	train's auc: 0.913954
[1500]	valid's auc: 0.712928	train's auc: 0.916706
[1600]	valid's auc: 0.713422	train's auc: 0.919201
[1700]	valid's auc: 0.712287	train's auc: 0.921604
[1800]	valid's auc: 0.711349	train's auc: 0.923859
[1900]	valid's auc: 0.

Unnamed: 0,fold,train,valid
0,0,0.917936,0.713718
1,1,0.841961,0.726487
2,2,0.820118,0.657731
3,3,0.901019,0.720024
4,4,0.822333,0.679873
5,overall,0.860673,0.697504
