<a href="https://colab.research.google.com/github/ab-sa/Statistical-Machine-Learning3/blob/main/Lecture10_NN_Lec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RepeatedKFold, cross_val_score, GridSearchCV
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KernelDensity
from sklearn.preprocessing import LabelEncoder
from sklearn.base import BaseEstimator, ClassifierMixin
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from tensorflow.keras.optimizers import SGD

  import pandas.util.testing as tm


In [2]:
Prostate = pd.read_csv('Prostate.csv')
Prostate.head()

Unnamed: 0,lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45,lpsa
0,-0.579818,2.769459,50,-1.386294,0,-1.386294,6,0,-0.430783
1,-0.994252,3.319626,58,-1.386294,0,-1.386294,6,0,-0.162519
2,-0.510826,2.691243,74,-1.386294,0,-1.386294,7,20,-0.162519
3,-1.203973,3.282789,58,-1.386294,0,-1.386294,6,0,-0.162519
4,0.751416,3.432373,62,-1.386294,0,-1.386294,6,0,0.371564


**Artificial Neural Nets (ANN)**: Fit a regression NN with Balance as response:


In [4]:
# Function to generate Deep ANN model 
def make_regression_ann(Optimizer_Trial, Neurons_Trial_1, Neurons_Trial_2):
    
    # Creating the classifier ANN model
    regressor = Sequential()
    regressor.add(Dense(units=Neurons_Trial_1, input_dim=8, activation='relu'))
    regressor.add(Dense(units=Neurons_Trial_2, activation='relu'))
    regressor.add(Dense(units=1, activation='linear'))
    regressor.compile(optimizer=Optimizer_Trial, loss='mean_squared_error', metrics=['mse'])
            
    return regressor


Parameter_Trials = {'batch_size':[10,20,30],
                    'epochs':[10,20],
                    'Optimizer_Trial':['adam', 'rmsprop'],
                    'Neurons_Trial_1': [5,10,30],
                    'Neurons_Trial_2': [5,10,30]
                   }

# Creating the classifier ANN
RegressionModel = KerasRegressor(make_regression_ann, verbose=0)

# Creating the Grid search space
# See different scoring methods by using sklearn.metrics.SCORERS.keys()
grid_search_reg = GridSearchCV(estimator=RegressionModel, param_grid=Parameter_Trials, cv=5)

########################################
X = Prostate.drop(['lpsa'], axis=1)
y = Prostate['lpsa']

# Measuring how much time it took to find the best params
import time
StartTime=time.time()

# Running Grid Search for different paramenters
grid_search_reg.fit(X, y, verbose=0)

EndTime=time.time()
print("Total Time Taken: ", round((EndTime-StartTime)/60), 'Minutes')

########################################

# printing the best parameters
print('#### Best hyperparamters ####')
grid_search_reg.best_params_



Total Time Taken:  8 Minutes
#### Best hyperparamters ####


{'Neurons_Trial_1': 30,
 'Neurons_Trial_2': 5,
 'Optimizer_Trial': 'rmsprop',
 'batch_size': 10,
 'epochs': 20}

To adjust the decay parameter (shrinkage) with keras, you need to modify the optimizer:

In [5]:
def make_regression_ann(decay_rate, Neurons_Trial_1, Neurons_Trial_2):

    # you can also pass different values for momentum & lr (other inputs of SGD) to find their optimal values as well
    sgd = SGD(decay=decay_rate, nesterov=False)

    # Creating the classifier ANN model
    regressor = Sequential()
    regressor.add(Dense(units=Neurons_Trial_1, input_dim=8, activation='relu'))
    regressor.add(Dense(units=Neurons_Trial_2, activation='relu'))
    regressor.add(Dense(units=1, activation='linear'))
    regressor.compile(optimizer=sgd, loss='mean_squared_error', metrics=['mse'])
            
    return regressor

# for simplicity, only including the best parameters from the previouse GridSearchCV.
# Idealy, you need to repeate the fitting process with all combinations again
Parameter_Trials = {'batch_size':[5],
                    'epochs':[20],
                    'decay_rate':[1,0.01,0.001],
                    'Neurons_Trial_1': [30],
                    'Neurons_Trial_2': [5]
                   }

# Creating the classifier ANN
RegressionModel = KerasRegressor(make_regression_ann, verbose=0)

# Creating the Grid search space
# See different scoring methods by using sklearn.metrics.SCORERS.keys()
grid_search_reg = GridSearchCV(estimator=RegressionModel, param_grid=Parameter_Trials, cv=5)

########################################

# Measuring how much time it took to find the best params
import time
StartTime=time.time()

# Running Grid Search for different paramenters
grid_search_reg.fit(X, y, verbose=0)

EndTime=time.time()
print("Total Time Taken: ", round((EndTime-StartTime)/60), 'Minutes')

########################################

# printing the best parameters
print('#### Best hyperparamters ####')
grid_search_reg.best_params_



Total Time Taken:  0 Minutes
#### Best hyperparamters ####


{'Neurons_Trial_1': 30,
 'Neurons_Trial_2': 5,
 'batch_size': 5,
 'decay_rate': 0.01,
 'epochs': 20}