In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Dense, Dropout

from sklearn.model_selection import cross_validate, StratifiedKFold

from imblearn.over_sampling import SMOTE

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Using TensorFlow backend.


/kaggle/input/severity-features-sigmoid/sofa_processed_sigmoid.csv
/kaggle/input/severity-features-sigmoid/saps_processed_sigmoid.csv
/kaggle/input/severity-features-sigmoid/oasis_processed_sigmoid.csv
/kaggle/input/severity-features-sigmoid/sapsii_processed_sigmoid.csv
/kaggle/input/severity-features/sapsii_processed.csv
/kaggle/input/severity-features/saps_processed.csv
/kaggle/input/severity-features/sofa_processed.csv
/kaggle/input/severity-features/oasis_processed.csv
/kaggle/input/severity-features-minmax-scaling/sofa_processed_min_max.csv
/kaggle/input/severity-features-minmax-scaling/sapsii_processed_min_max.csv
/kaggle/input/severity-features-minmax-scaling/saps_processed_min_max.csv
/kaggle/input/severity-features-minmax-scaling/oasis_processed_min_max.csv
/kaggle/input/last-icustay/last_icustay_20200614.csv
/kaggle/input/optimized-model/optimized_model.h5


In [2]:
#Load the data
last_icustay = pd.read_csv(r'/kaggle/input/last-icustay/last_icustay_20200614.csv', index_col = 'icustay_id')

#severity scores features (Z-Score)
sofa = pd.read_csv(r'/kaggle/input/severity-features/sofa_processed.csv', index_col = 'icustay_id')
oasis = pd.read_csv(r'/kaggle/input/severity-features/oasis_processed.csv', index_col = 'icustay_id')
saps = pd.read_csv(r'/kaggle/input/severity-features/saps_processed.csv', index_col = 'icustay_id')
sapsii = pd.read_csv(r'/kaggle/input/severity-features/sapsii_processed.csv', index_col = 'icustay_id')
'''
#severity scores features (Min-Max Scaling)
sofa = pd.read_csv(r'/kaggle/input/severity-features-minmax-scaling/sofa_processed_min_max.csv', index_col = 'icustay_id')
oasis = pd.read_csv(r'/kaggle/input/severity-features-minmax-scaling/oasis_processed_min_max.csv', index_col = 'icustay_id')
saps = pd.read_csv(r'/kaggle/input/severity-features-minmax-scaling/saps_processed_min_max.csv', index_col = 'icustay_id')
sapsii = pd.read_csv(r'/kaggle/input/severity-features-minmax-scaling/sapsii_processed_min_max.csv', index_col = 'icustay_id')

#severity scores features (Sigmoid Transformation)
sofa = pd.read_csv(r'/kaggle/input/severity-features-sigmoid/sofa_processed_sigmoid.csv', index_col = 'icustay_id')
oasis = pd.read_csv(r'/kaggle/input/severity-features-sigmoid/oasis_processed_sigmoid.csv', index_col = 'icustay_id')
saps = pd.read_csv(r'/kaggle/input/severity-features-sigmoid/saps_processed_sigmoid.csv', index_col = 'icustay_id')
sapsii = pd.read_csv(r'/kaggle/input/severity-features-sigmoid/sapsii_processed_sigmoid.csv', index_col = 'icustay_id')
'''
#dictionary with the severity scores dataframes
severity = {'sofa': sofa, 'oasis': oasis, 'saps': saps, 'sapsii': sapsii}

In [3]:
for key in severity:
    severity[key] = severity[key].loc[severity[key].index.isin(last_icustay.index)]
    if key == 'sofa':
        full_features = severity[key]
    else:
        full_features = pd.concat([full_features, \
                        severity[key][severity[key].columns[~severity[key].columns.isin(full_features.columns)]]], axis=1)

severity = {'full features': full_features}

In [4]:
def create_model(input_dim = 0, dropout = 0.2, optimizer = 'Adam', \
                    nodes_hidden_1 = 60, nodes_hidden_2 = 35):
    model = Sequential()

    model.add(Dense(nodes_hidden_1, activation = 'relu', input_dim = input_dim, \
                        kernel_initializer='random_uniform', bias_initializer='zeros'))
    model.add(Dropout(dropout))
    
    model.add(Dense(nodes_hidden_2, activation = 'relu', \
                        kernel_initializer='random_uniform', bias_initializer='zeros'))
    model.add(Dropout(dropout))

    model.add(Dense(2, activation = 'softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [5]:
def oversampling(X, y):
    smote = SMOTE()
    X_sm, y_sm = smote.fit_sample(X, y)
    return X_sm, y_sm

In [6]:
kfolds = StratifiedKFold(n_splits=10)

In [9]:
#Scores
metrics = {'accuracy': 'test_accuracy', 'roc_auc': 'test_roc_auc',\
           'f1': 'test_f1', 'precision': 'test_precision', 'recall': 'test_recall'}

scores = {}

for key in severity:
    X_train, y_train = severity[key].drop(columns = ['clase']), severity[key]['clase']
    X_train, y_train = oversampling(X_train, y_train)
    
    params = {'input_dim': len(X_train.values[0]), 'optimizer': 'Adam', 'dropout': 0.2, \
                 'hidden_1': 60, 'hidden_2': 35}
    
    model = KerasClassifier(build_fn = create_model, input_dim = params['input_dim'], \
                            dropout = params['dropout'], optimizer = params['optimizer'], \
                            nodes_hidden_1 = params['hidden_1'], nodes_hidden_2 = params['hidden_2'], \
                            batch_size = 100, epochs = 150, verbose = 0)
    #model = KerasClassifier(build_fn = load_optimized_model, \
                                        #batch_size = 100, epochs = 150, verbose = 0)
    scores[key] = cross_validate(estimator = model, X = X_train, y = y_train, cv = kfolds, \
                                    scoring = list(metrics.keys()), n_jobs = -1)
    #scores[key] = cross_validate(estimator = optimized_model, X = X_train, y = y_train, cv = kfolds, \
                                    #scoring = list(metrics.keys()))


In [10]:
results = {}

for key in severity:
    print(key)
    print('---')
    for metric in metrics:
        print(metric)
        print('mean', scores[key][metrics[metric]].mean())
        print('std', scores[key][metrics[metric]].std())
        print('***')
        results[metric] = [scores[key][metrics[metric]].mean(), scores[key][metrics[metric]].std()]
        
#Save results in csv
results = pd.DataFrame.from_dict(results, orient='index', columns = ['mean', 'std'])
results.to_csv('results_v9_pao2bilirrubin.csv', index = True)

full features
---
accuracy
mean 0.8969693133304301
std 0.02147860846637141
***
roc_auc
mean 0.9609733885167013
std 0.01315888217355979
***
f1
mean 0.8991032460471
std 0.025136727481206333
***
precision
mean 0.876311329360492
std 0.009977747995499914
***
recall
mean 0.9245848103157813
std 0.048545414400955375
***
