In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Dense, Dropout

from sklearn.model_selection import GridSearchCV, StratifiedKFold

from imblearn.over_sampling import SMOTE

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#Load the data
last_icustay = pd.read_csv(r'/kaggle/input/last-icustay/last_icustay_20200614.csv', index_col = 'icustay_id')

#severity scores features (Z-Score)
sofa = pd.read_csv(r'/kaggle/input/severity-features/sofa_processed.csv', index_col = 'icustay_id')
oasis = pd.read_csv(r'/kaggle/input/severity-features/oasis_processed.csv', index_col = 'icustay_id')
saps = pd.read_csv(r'/kaggle/input/severity-features/saps_processed.csv', index_col = 'icustay_id')
sapsii = pd.read_csv(r'/kaggle/input/severity-features/sapsii_processed.csv', index_col = 'icustay_id')
'''
#severity scores features (Min-Max Scaling)
sofa = pd.read_csv(r'/kaggle/input/severity-features-minmax-scaling/sofa_processed_min_max.csv', index_col = 'icustay_id')
oasis = pd.read_csv(r'/kaggle/input/severity-features-minmax-scaling/oasis_processed_min_max.csv', index_col = 'icustay_id')
saps = pd.read_csv(r'/kaggle/input/severity-features-minmax-scaling/saps_processed_min_max.csv', index_col = 'icustay_id')
sapsii = pd.read_csv(r'/kaggle/input/severity-features-minmax-scaling/sapsii_processed_min_max.csv', index_col = 'icustay_id')

#severity scores features (Sigmoid Transformation)
sofa = pd.read_csv(r'/kaggle/input/severity-features-sigmoid/sofa_processed_sigmoid.csv', index_col = 'icustay_id')
oasis = pd.read_csv(r'/kaggle/input/severity-features-sigmoid/oasis_processed_sigmoid.csv', index_col = 'icustay_id')
saps = pd.read_csv(r'/kaggle/input/severity-features-sigmoid/saps_processed_sigmoid.csv', index_col = 'icustay_id')
sapsii = pd.read_csv(r'/kaggle/input/severity-features-sigmoid/sapsii_processed_sigmoid.csv', index_col = 'icustay_id')
'''
#dictionary with the severity scores dataframes
severity = {'sofa': sofa, 'oasis': oasis, 'saps': saps, 'sapsii': sapsii}

In [None]:
for key in severity:
    severity[key] = severity[key].loc[severity[key].index.isin(last_icustay.index)]
    if key == 'sofa':
        full_features = severity[key]
    else:
        full_features = pd.concat([full_features, \
                        severity[key][severity[key].columns[~severity[key].columns.isin(full_features.columns)]]], axis=1)

severity = {'full features': full_features}

In [None]:
def create_model(input_dim = 0, dropout = 0.2, optimizer = 'Adadelta', \
                    nodes_hidden_1 = 50, nodes_hidden_2 = 25):
    model = Sequential()

    model.add(Dense(nodes_hidden_1, activation = 'relu', input_dim = input_dim, \
                        kernel_initializer='random_uniform', bias_initializer='zeros'))
    model.add(Dropout(dropout))
    
    model.add(Dense(nodes_hidden_2, activation = 'relu', \
                        kernel_initializer='random_uniform', bias_initializer='zeros'))
    model.add(Dropout(dropout))

    model.add(Dense(2, activation = 'softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [None]:
def oversampling(X, y):
    smote = SMOTE()
    X_sm, y_sm = smote.fit_sample(X, y)
    return X_sm, y_sm

In [None]:
kfolds = StratifiedKFold(n_splits=10)

In [None]:
#GridSearch
for key in severity:
    X_train, y_train = severity[key].drop(columns = ['clase']), severity[key]['clase']
    X_train, y_train = oversampling(X_train, y_train)
    model = KerasClassifier(build_fn = create_model, input_dim = len(X_train.values[0]), \
                                        batch_size = 100, epochs = 150, verbose = 0)
    
    # define the grid search parameters
    optimizer = ['Adadelta', 'Adam']
    dropout = [0.2, 0.4]
    nodes_hidden_1 = [40, 50, 60]
    nodes_hidden_2 = [25, 30, 35]
    
    param_grid = dict(optimizer = optimizer, dropout = dropout, \
                            nodes_hidden_1 = nodes_hidden_1, nodes_hidden_2 = nodes_hidden_2)
    
    grid = GridSearchCV(estimator=model, param_grid=param_grid, cv = kfolds, \
                        scoring = 'recall', n_jobs = -1)
    grid_result = grid.fit(X_train, y_train)
    
    # summarize results
    print('Recall Results')
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    print('------------------------')
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    
    print('------------------------')
    
    #save the best estimator
    grid.best_estimator_.model.save('optimized_model.h5')