###  Created by Luis Alejandro (alejand@umich.edu)

In [1]:
from sklearn import datasets
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

import numpy as np
import pandas as pd
import time

import sys
sys.path.append('../')
import utils.reports as rp

### Experiment 1
Performs model selection of the following hyperparameters applied to the bank dataset (customers leaving):
* Network architecture
* Alpha (Regularization)

This is perform using cross-validation and a grid search using [GridSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html).


In [2]:
# Loads dataset from file
dataset = pd.read_csv('../../datasets/classification/bank_exiting.csv')
predictors = dataset.iloc[:,3:-1].values
responses = dataset.iloc[:,-1].values
# Encoding categorical data
encoder_x1 = LabelEncoder()
predictors[:,1] = encoder_x1.fit_transform(predictors[:,1]) # only 0 or 1 after this (just one column needed)
encoder_x2 = LabelEncoder()
predictors[:,2] = encoder_x2.fit_transform(predictors[:,2]) # more than two categories (use onehotencoder)
ct = ColumnTransformer([('country_category', OneHotEncoder(categories='auto'),[1])], remainder='passthrough')
predictors = ct.fit_transform(predictors)
predictors = predictors[:,1:]
X,X_holdout,y,y_holdout = train_test_split(predictors, responses, test_size = 0.2, random_state = 0)
# Feature scaling
sc = StandardScaler()
X = sc.fit_transform(X)
X_holdout = sc.transform(X_holdout)

In [3]:
# Performs grid search
start = time.perf_counter()
architecture_choices = [(20, 6), (6), (20)]
alpha_choices = [0,0.1,0.3,0.8,1,2,10]

hyperparams = [{
    'hidden_layer_sizes': architecture_choices,
    'alpha': alpha_choices
}]

mdl = MLPClassifier(activation='logistic')
validator = GridSearchCV(mdl, cv=3, param_grid=hyperparams, scoring='accuracy', n_jobs=-1,verbose = 1)
validator.fit(X,y)
end = time.perf_counter()
print('Elapsed time: ', end - start, ' seconds\n')
rp.report_grid_search(validator.cv_results_)

Fitting 3 folds for each of 21 candidates, totalling 63 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done  63 out of  63 | elapsed:   13.0s finished


Elapsed time:  14.971740399999998  seconds

Model with rank: 1
Mean validation score: 0.849 (std: 0.004)
Parameters: {'alpha': 0, 'hidden_layer_sizes': 20}

Model with rank: 2
Mean validation score: 0.842 (std: 0.005)
Parameters: {'alpha': 0.1, 'hidden_layer_sizes': 20}

Model with rank: 3
Mean validation score: 0.838 (std: 0.009)
Parameters: {'alpha': 0, 'hidden_layer_sizes': 6}





In [4]:
# Perform evaluation in the holdout set
y_pred = validator.predict(X_holdout)
rp.report_classification(y_holdout,y_pred,title='Holdout')

Holdout (Metrics): 

Accuracy:  0.86
F1 Score:  0.60
Recall:  0.50
Precision:  0.74

Confusion Matrix:
 [[1526   69]
 [ 204  201]]


### Experiment 2
Performs model selection of the following hyperparameters applied to the wine dataset:
* Network architecture
* Alpha (Regularization)
* Activation function

This is perform using cross-validation and a grid search using [GridSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html). Notice that no standarization is applied in this case

In [5]:
# Load dataset
dataset = datasets.load_wine()
predictors = dataset.data
responses = dataset.target
X,X_holdout,y,y_holdout = train_test_split(predictors, responses, test_size = 0.2,stratify=responses)

In [6]:
# Performs grid search
start = time.perf_counter()
architecture_choices = [(100,20), (100,), (20,)]
alpha_choices = [0,0.1,0.3,0.8,1,2,10]
activation_choices = ['logistic', 'relu']

hyperparams = [{
    'hidden_layer_sizes': architecture_choices,
    'alpha': alpha_choices,
    'activation': activation_choices
}]

mdl = MLPClassifier(max_iter = 200)
validator = GridSearchCV(mdl, cv=3, param_grid=hyperparams, scoring='accuracy', n_jobs=-1,verbose = 1)
validator.fit(X,y)
end = time.perf_counter()
print('Elapsed time: ', end - start, ' seconds\n')
rp.report_grid_search(validator.cv_results_)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.


Fitting 3 folds for each of 42 candidates, totalling 126 fits


[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    0.5s


Elapsed time:  1.9279146000000011  seconds

Model with rank: 1
Mean validation score: 0.944 (std: 0.010)
Parameters: {'activation': 'logistic', 'alpha': 1, 'hidden_layer_sizes': (100,)}

Model with rank: 2
Mean validation score: 0.937 (std: 0.001)
Parameters: {'activation': 'logistic', 'alpha': 2, 'hidden_layer_sizes': (100,)}

Model with rank: 3
Mean validation score: 0.936 (std: 0.018)
Parameters: {'activation': 'relu', 'alpha': 10, 'hidden_layer_sizes': (100,)}



[Parallel(n_jobs=-1)]: Done 126 out of 126 | elapsed:    1.7s finished


In [7]:
# Perform evaluation in the holdout set
y_pred = validator.predict(X_holdout)
rp.report_classification(y_holdout,y_pred,avg='macro',title='Holdout')

Holdout (Metrics): 

Accuracy:  0.94
F1 Score:  0.95
Recall:  0.94
Precision:  0.96

Confusion Matrix:
 [[11  1  0]
 [ 0 14  0]
 [ 0  1  9]]


### Experiment 3
Performs model selection of the following hyperparameters applied to the wine dataset:
* Network architecture
* Alpha (Regularization)
* Activation function

This is perform using cross-validation and a grid search using [GridSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html). Since we want to apply standarization, we must use a [Pipeline](https://scikit-learn.org/stable/modules/compose.html#pipeline) to correctly standarize and evaluate the models during the cross-validation.

In [8]:
# Load dataset
dataset = datasets.load_wine()
predictors = dataset.data
responses = dataset.target
X,X_holdout,y,y_holdout = train_test_split(predictors, responses, test_size = 0.2,stratify=responses)

In [9]:
# Performs grid search
start = time.perf_counter()

sc = StandardScaler()
clf = MLPClassifier(max_iter = 200)
estimators = [('normalizer', sc), ('classifier', clf)]
pipe = Pipeline(estimators)

architecture_choices = [(100,20), (100,), (20,)]
alpha_choices = [0,0.1,0.3,0.8,1,2,10]
activation_choices = ['logistic', 'relu']

hyperparams = [{
    'classifier__hidden_layer_sizes': architecture_choices,
    'classifier__alpha': alpha_choices,
    'classifier__activation': activation_choices
}]

validator = GridSearchCV(pipe, cv=3, param_grid=hyperparams, scoring='accuracy', n_jobs=-1,verbose = 1)
validator.fit(X,y)
end = time.perf_counter()
print('Elapsed time: ', end - start, ' seconds\n')
rp.report_grid_search(validator.cv_results_)

Fitting 3 folds for each of 42 candidates, totalling 126 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 126 out of 126 | elapsed:    2.4s finished


Elapsed time:  2.7501041000000015  seconds

Model with rank: 1
Mean validation score: 0.979 (std: 0.017)
Parameters: {'classifier__activation': 'relu', 'classifier__alpha': 0.3, 'classifier__hidden_layer_sizes': (100,)}

Model with rank: 1
Mean validation score: 0.979 (std: 0.017)
Parameters: {'classifier__activation': 'relu', 'classifier__alpha': 1, 'classifier__hidden_layer_sizes': (100,)}

Model with rank: 1
Mean validation score: 0.979 (std: 0.017)
Parameters: {'classifier__activation': 'relu', 'classifier__alpha': 2, 'classifier__hidden_layer_sizes': (100,)}





In [10]:
# Perform evaluation in the holdout set
y_pred = validator.predict(X_holdout)
rp.report_classification(y_holdout,y_pred,avg='macro',title='Holdout')

Holdout (Metrics): 

Accuracy:  0.97
F1 Score:  0.97
Recall:  0.97
Precision:  0.98

Confusion Matrix:
 [[12  0  0]
 [ 0 14  0]
 [ 0  1  9]]
