###  Created by Luis A. Sanchez-Perez (alejand@umich.edu).
<p><span style="color:green"><b>Copyright &#169;</b> Do not distribute or use without authorization from author.</span></p>

### Cross-validation using Tensorflow, Keras API and Sklearn Wrapper

Builds a Keras MLP neural network using Tensorflow backend and performs model selection of the following hyperparameters:


We use an [sk-learn wrapper](https://keras.io/scikit-learn-api/) for Keras models and peform cross-validation using [RandomizedSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html). Since we want to apply standarization, we must use a [Pipeline](https://scikit-learn.org/stable/modules/compose.html#pipeline) to correctly standarize and evaluate the models during the cross-validation.

In [1]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import scipy
import numpy as np
import pandas as pd
import utils
import tensorflow as tf
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
# Utility function to build the Keras model (params of the function are used during grid search)
def build_model(num_features, num_outputs, architecture, regularization=0, activation='sigmoid', dropout_rate=0):
    mdl = tf.keras.models.Sequential()
    mdl.add(tf.keras.layers.Dense(architecture[0], activation=activation, input_dim=num_features,
                                          kernel_regularizer=tf.keras.regularizers.l2(regularization)))
    mdl.add(tf.keras.layers.Dropout(rate=dropout_rate))
    for i in range(1, len(architecture)):
        mdl.add(tf.keras.layers.Dense(architecture[i], activation=activation,
                                       kernel_regularizer=tf.keras.regularizers.l2(regularization)))
        
        mdl.add(tf.keras.layers.Dropout(rate=dropout_rate))
    if num_outputs > 1:
        mdl.add(tf.keras.layers.Dense(num_outputs, activation='softmax'))
        mdl.compile(optimizer='adam', loss='categorical_crossentropy') 
    else:
        mdl.add(tf.keras.layers.Dense(num_outputs, activation='sigmoid'))
        mdl.compile(optimizer='adam', loss='binary_crossentropy')
    return mdl

In [3]:
# Loads dataset from file
dataset = pd.read_csv('E:/datasets/classification/bank_exiting.csv')
predictors = dataset.iloc[:,3:-1].values
responses = dataset.iloc[:,-1].values
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# Encoding the gender feature
print(dataset['Gender'].unique())
print(predictors[1,:])
encoder = LabelEncoder()
predictors[:,2] = encoder.fit_transform(predictors[:,2]) # only 0 or 1 after this (just one column needed)
print(predictors[1,:])

['Female' 'Male']
[608 'Spain' 'Female' 41 1 83807.86 1 0 1 112542.58]
[608 'Spain' 0 41 1 83807.86 1 0 1 112542.58]


In [5]:
# Encoding the country feature
print(predictors[1,:])
ct = ColumnTransformer([('country_category', OneHotEncoder(categories='auto', drop='first'),[1])], remainder='passthrough')
predictors = ct.fit_transform(predictors)
print(predictors[1,:])
X, X_holdout, y, y_holdout = train_test_split(predictors, responses, test_size=0.2, stratify=responses)

[608 'Spain' 0 41 1 83807.86 1 0 1 112542.58]
[0.0 1.0 608 0 41 1 83807.86 1 0 1 112542.58]


In [6]:
# Defines pipeline
sc = StandardScaler()
clf = tf.keras.wrappers.scikit_learn.KerasClassifier(build_model,
                                                     num_features=X.shape[1],
                                                     num_outputs = 1, epochs=100, verbose=0)
estimators = [('normalizer', sc), ('classifier', clf)]
pipe = Pipeline(estimators)

In [7]:
# Defines params distributions
hyperparams_dist = [{
    'classifier__architecture': [(20,6), (20,)],
    'classifier__regularization': [0, 0.1, 0.2],
    'classifier__activation': ['relu', 'sigmoid'],
    'classifier__dropout_rate': [0, 0.1, 0.2],
}]

In [8]:
# Performs randomized search
validator = RandomizedSearchCV(pipe, cv=3, param_distributions=hyperparams_dist,
                               scoring='f1', n_jobs=-1, verbose=0, n_iter=4)
validator.fit(X,y)
utils.report_search(validator.cv_results_)


Model with rank: 1
Mean validation score: 0.557 (std: 0.013)
Parameters: {'classifier__regularization': 0, 'classifier__dropout_rate': 0.1, 'classifier__architecture': (20,), 'classifier__activation': 'sigmoid'}

Model with rank: 2
Mean validation score: 0.472 (std: 0.034)
Parameters: {'classifier__regularization': 0.2, 'classifier__dropout_rate': 0.1, 'classifier__architecture': (20,), 'classifier__activation': 'relu'}

Model with rank: 3
Mean validation score: 0.418 (std: 0.061)
Parameters: {'classifier__regularization': 0.2, 'classifier__dropout_rate': 0.2, 'classifier__architecture': (20,), 'classifier__activation': 'relu'}


In [9]:
# Selects best configuration after search
best = validator.best_estimator_

In [10]:
# Retrains the model now on the available dataset (without validation/dev set). This step is optional!!!
best.fit(X, y.ravel())

Pipeline(memory=None,
         steps=[('normalizer',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('classifier',
                 <tensorflow.python.keras.wrappers.scikit_learn.KerasClassifier object at 0x000001E689240588>)],
         verbose=False)

In [11]:
# Training performance
y_pred = best.predict(X)
utils.report_classification(y, y_pred, title='Train')

Train (Metrics): 

Accuracy: 0.87
F1 Score: 0.58
Recall: 0.46
Precision: 0.79

Confusion Matrix:
 [[6172  198]
 [ 880  750]]


In [12]:
# Holdout performance
y_pred = best.predict(X_holdout)
utils.report_classification(y_holdout, y_pred, title='Holdout')

Holdout (Metrics): 

Accuracy: 0.86
F1 Score: 0.56
Recall: 0.44
Precision: 0.77

Confusion Matrix:
 [[1540   53]
 [ 227  180]]
