###  Created by Luis Alejandro (alejand@umich.edu)

## Cross-validation using Keras, Tensorflow and Sklearn Wrapper

Builds a Keras MLP neural network using Tensorflow backend and performs model selection of the following hyperparameters:


We use an [sk-learn wrapper](https://keras.io/scikit-learn-api/) for Keras models and peform cross-validation and grid search using [GridSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html). Since we want to apply standarization, we must use a [Pipeline](https://scikit-learn.org/stable/modules/compose.html#pipeline) to correctly standarize and evaluate the models during the cross-validation.

In [1]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import numpy as np
import pandas as pd
import time
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import sys
sys.path.append('../')
import utils.reports as rp

In [2]:
# Utility function to build the Keras model (params of the function are used during grid search)
def build_model(num_features, num_outputs, architecture, regularization = 0, activation='sigmoid', dropout_rate = 0):
    mdl = Sequential()
    for i, value in enumerate(architecture):
        if i == 0:
            mdl.add(Dense(value, activation=activation, input_dim=num_features,
                          kernel_regularizer=regularizers.l2(regularization)))
        else:
            mdl.add(Dense(value, activation=activation,
                          kernel_regularizer=regularizers.l2(regularization)))
        mdl.add(Dropout(rate=dropout_rate))

    if num_outputs > 1:
        mdl.add(Dense(num_outputs, activation='softmax'))
        mdl.compile(optimizer='adam', loss='categorical_crossentropy') 
    else:
        mdl.add(Dense(num_outputs, activation='sigmoid'))
        mdl.compile(optimizer='adam', loss='binary_crossentropy')
    
    return mdl

In [3]:
# Loads dataset from file
dataset = pd.read_csv('../../datasets/classification/bank_exiting.csv')
predictors = dataset.iloc[:,3:-1].values
responses = dataset.iloc[:,-1].values
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# Encoding the gender feature
print(dataset['Gender'].unique())
print(predictors[1,:])
encoder = LabelEncoder()
predictors[:,2] = encoder.fit_transform(predictors[:,2]) # only 0 or 1 after this (just one column needed)
print(predictors[1,:])

['Female' 'Male']
[608 'Spain' 'Female' 41 1 83807.86 1 0 1 112542.58]
[608 'Spain' 0 41 1 83807.86 1 0 1 112542.58]


In [5]:
# Encoding the country feature
print(predictors[1,:])
ct = ColumnTransformer([('country_category', OneHotEncoder(categories='auto', drop='first'),[1])], remainder='passthrough')
predictors = ct.fit_transform(predictors)
print(predictors[1,:])
X,X_holdout,y,y_holdout = train_test_split(predictors, responses, test_size = 0.2)

[608 'Spain' 0 41 1 83807.86 1 0 1 112542.58]
[0.0 1.0 608 0 41 1 83807.86 1 0 1 112542.58]


In [6]:
# Performs grid search
start = time.perf_counter()

sc = StandardScaler()
clf = KerasClassifier(build_model, num_features = X.shape[1], num_outputs = 1, batch_size = None, epochs = 100, verbose = 0)
estimators = [('normalizer', sc), ('classifier', clf)]
pipe = Pipeline(estimators)

architecture_choices = [(20,6), (20,)]
regularization_choices = [0.1]
activation_choices = ['relu']
dropout_choices = [0]

hyperparams = [{
    'classifier__architecture': architecture_choices,
    'classifier__regularization': regularization_choices,
    'classifier__activation': activation_choices,
    'classifier__dropout_rate': dropout_choices
}]

validator = GridSearchCV(pipe, cv=5, param_grid=hyperparams, scoring='accuracy', n_jobs=-1, verbose = 0)
validator.fit(X,y)
end = time.perf_counter()
print('Elapsed time: ', end - start, ' seconds\n')
rp.report_grid_search(validator.cv_results_)

Elapsed time:  80.5477089  seconds

Model with rank: 1
Mean validation score: 0.853 (std: 0.012)
Parameters: {'classifier__activation': 'relu', 'classifier__architecture': (20,), 'classifier__dropout_rate': 0, 'classifier__regularization': 0.1}

Model with rank: 2
Mean validation score: 0.815 (std: 0.016)
Parameters: {'classifier__activation': 'relu', 'classifier__architecture': (20, 6), 'classifier__dropout_rate': 0, 'classifier__regularization': 0.1}



In [7]:
# Perform evaluation in the holdout set
y_pred = validator.predict(X_holdout)
rp.report_classification(y_holdout,y_pred,title='Holdout')

Holdout (Metrics): 

Accuracy:  0.85
F1 Score:  0.55
Recall:  0.44
Precision:  0.73

Confusion Matrix:
 [[1517   69]
 [ 230  184]]


In [8]:
mdl = validator.best_estimator_['classifier'].model
mdl.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                240       
_________________________________________________________________
dropout (Dropout)            (None, 20)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 21        
Total params: 261
Trainable params: 261
Non-trainable params: 0
_________________________________________________________________
