<a href="https://colab.research.google.com/github/ab-sa/Statistical-Machine-Learning-2/blob/main/Lecture9_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RepeatedKFold, cross_val_score, GridSearchCV
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KernelDensity
from sklearn.preprocessing import LabelEncoder
from sklearn.base import BaseEstimator, ClassifierMixin
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from tensorflow.keras.optimizers import SGD

In [None]:
Credit = pd.read_csv('Credit.csv')
Credit['Balance_multi'] = pd.cut(Credit.Balance, bins=[-1,300,700,2000],labels=[0,1,2])
Credit = Credit.drop(['ID', 'Balance', 'Limit', 'Rating'], axis=1)
print('Dimension of the data: ' + str(Credit.shape))
print(Credit.head())
print(Credit['Balance_multi'].value_counts(normalize=True))

**Artificial Neural Nets (ANN)**: Fit a classification NN with Balance_multi as response:
- There are many different libraries with many different ways to a fit a NN in python. I use ***keras*** here, but feel free to look around for other options as well.
- Let's start with fiting a simple ANN model with only two layers:
  - layer 1: 10 hidden nodes
  - layer 2: 6 hidden nodes

In [None]:
# data: dropping the credit-card related features (Limit & Rating) to make the classification problem more challenging
X = pd.get_dummies(Credit.drop(['Balance_multi'], axis=1))
y = Credit['Balance_multi']
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# standardizing the input
std_scale = StandardScaler().fit(x_train)
X_train_std = std_scale.transform(x_train)
X_test_std = std_scale.transform(x_test)

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y_train)
encoded_y_train = encoder.transform(y_train)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y_train = np_utils.to_categorical(encoded_y_train)

# building the ANN model
ANN_classifier = Sequential()
# Defining the Input layer and FIRST hidden layer,both are same!
# relu means Rectifier linear unit function
# input_dim: number of features in your training data
ANN_classifier.add(Dense(units=10, input_dim=13, activation='relu'))

# Defining the SECOND hidden layer, here we have not defined input because it is
# second layer and it will get input as the output of first hidden layer
ANN_classifier.add(Dense(units=6, activation='relu'))

# Defining the Output layer
# for a classification problem, the output layer is defined as:
# for a binary outcome: activation = 'softmax'
# for a Multiclass (more than two classes) outcome: activation ='softmax'
# And output_dim will be equal to the number of factor levels (here, it will be 3)
ANN_classifier.add(Dense(units=3, activation='softmax'))

# Optimizer: the algorithm of SGD to keep updating weights
# loss: the loss function to measure the accuracy
# metrics: the way we will compare the accuracy after each step of SGD
ANN_classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# fitting the Neural Network on the training data
ANN_Model_1 = ANN_classifier.fit(X_train_std, dummy_y_train, batch_size=10 , epochs=10, verbose=1)

Measure the accuracy of our simple ANN model:

In [None]:
# Predictions (scores) on testing data
ANN_preds = ANN_classifier.predict(X_test_std)
# Predictions (labels) on testing data
ANN_preds_class = ANN_preds.argmax(axis=-1)

print(classification_report(y_test, ANN_preds_class))
print(confusion_matrix(y_test, ANN_preds_class))

Tuning an ANN model by using GridSearchCV

In [None]:
# Function to generate Deep ANN model 
def make_classification_ann(Optimizer_Trial, Neurons_Trial_1, Neurons_Trial_2):
    
    # Creating the classifier ANN model
    classifier = Sequential()
    classifier.add(Dense(units=Neurons_Trial_1, input_dim=13, activation='relu'))
    classifier.add(Dense(units=Neurons_Trial_2, activation='relu'))
    classifier.add(Dense(units=3, activation='softmax'))
    classifier.compile(optimizer=Optimizer_Trial, loss='categorical_crossentropy', metrics=['accuracy'])
            
    return classifier


Parameter_Trials = {'batch_size':[10,20,30],
                    'epochs':[10,20],
#                    'Optimizer_Trial':['adam'],
                    'Optimizer_Trial':['adam', 'rmsprop'],
                    'Neurons_Trial_1': [5,10,30],
                    'Neurons_Trial_2': [5,10,30]
                   }

# Creating the classifier ANN
classifierModel = KerasClassifier(make_classification_ann, verbose=0)

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
encoded_y = encoder.transform(y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_y)

# Creating the Grid search space
# See different scoring methods by using sklearn.metrics.SCORERS.keys()
grid_search = GridSearchCV(estimator=classifierModel, param_grid=Parameter_Trials, cv=5)

########################################

# Measuring how much time it took to find the best params
import time
StartTime=time.time()

# Running Grid Search for different paramenters
grid_search.fit(X, dummy_y, verbose=0)

EndTime=time.time()
print("Total Time Taken: ", round((EndTime-StartTime)/60), 'Minutes')

########################################

# printing the best parameters
print('#### Best hyperparamters ####')
grid_search.best_params_

**Artificial Neural Nets (ANN)**: Fit a regression NN with Balance as response:


In [None]:
# Function to generate Deep ANN model 
def make_regression_ann(Optimizer_Trial, Neurons_Trial_1, Neurons_Trial_2):
    
    # Creating the classifier ANN model
    regressor = Sequential()
    regressor.add(Dense(units=Neurons_Trial_1, input_dim=13, activation='relu'))
    regressor.add(Dense(units=Neurons_Trial_2, activation='relu'))
    regressor.add(Dense(units=1, activation='linear'))
    regressor.compile(optimizer=Optimizer_Trial, loss='mean_squared_error', metrics=['mse'])
            
    return regressor


Parameter_Trials = {'batch_size':[10,20,30],
                    'epochs':[10,20],
                    'Optimizer_Trial':['adam', 'rmsprop'],
                    'Neurons_Trial_1': [5,10,30],
                    'Neurons_Trial_2': [5,10,30]
                   }

# Creating the classifier ANN
RegressionModel = KerasRegressor(make_regression_ann, verbose=0)

# Creating the Grid search space
# See different scoring methods by using sklearn.metrics.SCORERS.keys()
grid_search_reg = GridSearchCV(estimator=RegressionModel, param_grid=Parameter_Trials, cv=5)

########################################

# Measuring how much time it took to find the best params
import time
StartTime=time.time()

# Running Grid Search for different paramenters
grid_search_reg.fit(X, y, verbose=0)

EndTime=time.time()
print("Total Time Taken: ", round((EndTime-StartTime)/60), 'Minutes')

########################################

# printing the best parameters
print('#### Best hyperparamters ####')
grid_search_reg.best_params_

To adjust the decay parameter (shrinkage) with keras, you need to modify the optimizer:

In [None]:
def make_regression_ann(decay_rate, Neurons_Trial_1, Neurons_Trial_2):

    # you can also pass different values for momentum & lr (other inputs of SGD) to find their optimal values as well
    sgd = SGD(decay=decay_rate, nesterov=False)

    # Creating the classifier ANN model
    regressor = Sequential()
    regressor.add(Dense(units=Neurons_Trial_1, input_dim=13, activation='relu'))
    regressor.add(Dense(units=Neurons_Trial_2, activation='relu'))
    regressor.add(Dense(units=1, activation='linear'))
    regressor.compile(optimizer=sgd, loss='mean_squared_error', metrics=['mse'])
            
    return regressor

# for simplicity, only including the best parameters from the previouse GridSearchCV.
# Idealy, you need to repeate the fitting process with all combinations again
Parameter_Trials = {'batch_size':[10],
                    'epochs':[20],
                    'decay_rate':[1,0.01,0.001],
                    'Neurons_Trial_1': [10],
                    'Neurons_Trial_2': [30]
                   }

# Creating the classifier ANN
RegressionModel = KerasRegressor(make_regression_ann, verbose=0)

# Creating the Grid search space
# See different scoring methods by using sklearn.metrics.SCORERS.keys()
grid_search_reg = GridSearchCV(estimator=RegressionModel, param_grid=Parameter_Trials, cv=5)

########################################

# Measuring how much time it took to find the best params
import time
StartTime=time.time()

# Running Grid Search for different paramenters
grid_search_reg.fit(X, y, verbose=0)

EndTime=time.time()
print("Total Time Taken: ", round((EndTime-StartTime)/60), 'Minutes')

########################################

# printing the best parameters
print('#### Best hyperparamters ####')
grid_search_reg.best_params_