###  Created by Luis A. Sanchez-Perez (alejand@umich.edu).
<p><span style="color:green"><b>Copyright &#169;</b> Do not distribute or use without authorization from author.</span></p>

### Experiment
Performs model selection of the following hyperparameters applied to the bank dataset (customers leaving):
* Network architecture
* Alpha (Regularization)
* Activation function

This is perform using a [RandomizedSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html).

In [1]:
from sklearn import datasets
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import numpy as np
import pandas as pd
import scipy
import utils
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Loads dataset from file
dataset = pd.read_csv('E:/datasets/classification/bank_exiting.csv')
predictors = dataset.iloc[:,3:-1].values
responses = dataset.iloc[:,-1].values
# Encoding categorical data
encoder_x1 = LabelEncoder()
predictors[:,1] = encoder_x1.fit_transform(predictors[:,1]) # only 0 or 1 after this (just one column needed)
encoder_x2 = LabelEncoder()
predictors[:,2] = encoder_x2.fit_transform(predictors[:,2]) # more than two categories (use onehotencoder)
ct = ColumnTransformer([('country_category', OneHotEncoder(categories='auto'),[1])], remainder='passthrough')
predictors = ct.fit_transform(predictors)
predictors = predictors[:,1:]

In [3]:
# Splits intro training/holdout
X, X_holdout, y, y_holdout = train_test_split(predictors, responses, test_size=0.2)

In [4]:
# Creates pipeline
clf = MLPClassifier(solver='adam', max_iter=1000)
sc = StandardScaler()
estimators = [('normalizer', sc), ('network', clf)]
pipe = Pipeline(estimators)
pipe

Pipeline(memory=None,
         steps=[('normalizer',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('network',
                 MLPClassifier(activation='relu', alpha=0.0001,
                               batch_size='auto', beta_1=0.9, beta_2=0.999,
                               early_stopping=False, epsilon=1e-08,
                               hidden_layer_sizes=(100,),
                               learning_rate='constant',
                               learning_rate_init=0.001, max_fun=15000,
                               max_iter=1000, momentum=0.9, n_iter_no_change=10,
                               nesterovs_momentum=True, power_t=0.5,
                               random_state=None, shuffle=True, solver='adam',
                               tol=0.0001, validation_fraction=0.1,
                               verbose=False, warm_start=False))],
         verbose=False)

In [5]:
# Defines parameters distributions
hyperparams_dist = [{
    'network__hidden_layer_sizes': [(100, 20), (100), (20)],
    'network__alpha': scipy.stats.distributions.uniform(loc=0, scale=1), # uniform [0,10]
    'network__activation': ['logistic', 'relu']
}]
# Performs randomized search
validator = RandomizedSearchCV(pipe, cv=3, param_distributions=hyperparams_dist,
                               scoring='f1', n_jobs=-1, verbose=2, n_iter=20)
validator.fit(X,y.ravel())
# Report results
utils.report_search(validator.cv_results_)

Fitting 3 folds for each of 20 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:   25.8s finished



Model with rank: 1
Mean validation score: 0.575 (std: 0.007)
Parameters: {'network__activation': 'relu', 'network__alpha': 0.3447262167121936, 'network__hidden_layer_sizes': (100, 20)}

Model with rank: 2
Mean validation score: 0.563 (std: 0.006)
Parameters: {'network__activation': 'relu', 'network__alpha': 0.25126817145691427, 'network__hidden_layer_sizes': 20}

Model with rank: 3
Mean validation score: 0.559 (std: 0.014)
Parameters: {'network__activation': 'logistic', 'network__alpha': 0.02003188664619049, 'network__hidden_layer_sizes': 20}


In [6]:
# Selects best configuration after search
best = validator.best_estimator_

In [7]:
# Retrains the model now on the available dataset (without validation/dev set). This step is optional!!!
best.fit(X, y.ravel())

Pipeline(memory=None,
         steps=[('normalizer',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('network',
                 MLPClassifier(activation='relu', alpha=0.3447262167121936,
                               batch_size='auto', beta_1=0.9, beta_2=0.999,
                               early_stopping=False, epsilon=1e-08,
                               hidden_layer_sizes=(100, 20),
                               learning_rate='constant',
                               learning_rate_init=0.001, max_fun=15000,
                               max_iter=1000, momentum=0.9, n_iter_no_change=10,
                               nesterovs_momentum=True, power_t=0.5,
                               random_state=None, shuffle=True, solver='adam',
                               tol=0.0001, validation_fraction=0.1,
                               verbose=False, warm_start=False))],
         verbose=False)

In [8]:
# Training performance
y_pred = best.predict(X)
utils.report_classification(y, y_pred, title='Train')

Train (Metrics): 

Accuracy: 0.88
F1 Score: 0.63
Recall: 0.54
Precision: 0.77

Confusion Matrix:
 [[6137  265]
 [ 732  866]]


In [9]:
# Holdout performance
y_pred = best.predict(X_holdout)
utils.report_classification(y_holdout, y_pred, title='Holdout')

Holdout (Metrics): 

Accuracy: 0.86
F1 Score: 0.64
Recall: 0.56
Precision: 0.75

Confusion Matrix:
 [[1478   83]
 [ 194  245]]
