# Neural Network

In [1]:
# if packages not installed yet:

# import sys
# !conda install --yes --prefix {sys.prefix} numpy
# !conda install --yes --prefix {sys.prefix} pandas
# !conda install --yes --prefix {sys.prefix} tensorflow
# !conda install --yes --prefix {sys.prefix} scikit-learn
# !conda install --yes --prefix {sys.prefix} keras

In [2]:
import sys
import pandas as pd
import time
import random
from imblearn.under_sampling import NearMiss

import numpy as np
from numpy import loadtxt
from numpy.random import seed

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_transformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV, KFold
from sklearn.metrics import roc_auc_score
from sklearn.base import TransformerMixin # for fit_transform method needed in custom transformer for auc score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

## Data preperation

In [3]:
# function to reset all RNG's to seed 23
def reset_random_seeds():
   tf.random.set_seed(23) # tensorflow's seed
   np.random.seed(23) # numpy's seed
   random.seed(23) # python's seed

In [4]:
# import data
data = pd.read_csv (r'bank-additional-full.csv', sep = ';', engine= 'python')
#data = data.head(1000)
length = data.shape[0]
data.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [5]:
# select variables
cats_to_use = ['age', 'default', 'contact', 'month', 'previous', 'poutcome', 'emp.var.rate', 'euribor3m', 'nr.employed', 'y']
data = data[cats_to_use]

# 'age', 'job', 'marital', 'education', 'default', 'housing', 'loan',
#       'contact', 'month', 'day_of_week', 'duration', 'campaign', 'pdays',
#       'previous', 'poutcome', 'emp.var.rate', 'cons.price.idx',
#       'cons.conf.idx', 'euribor3m', 'nr.employed', 'y'

In [6]:
# save lists of categorical and numerical variables
cat_cols = ['default', 'contact', 'month', 'poutcome', 'y']
num_cols = ['age', 'previous', 'emp.var.rate', 'euribor3m', 'nr.employed']

# create column transformer to 1 one-hot-encode cat vars and 2 noralise num vars
ct = make_column_transformer(
    (OneHotEncoder(drop='first'), cat_cols), # drop first column (reference)
    (StandardScaler(), num_cols),
)

# transform base table (pandas df -> numpy array)
base = ct.fit_transform(data)

# convert base table to p.df for ease of use (numpy array -> pandas df)
base_temp = pd.DataFrame(base, columns=ct.get_feature_names_out().tolist())
base_temp

Unnamed: 0,onehotencoder__default_unknown,onehotencoder__default_yes,onehotencoder__contact_telephone,onehotencoder__month_aug,onehotencoder__month_dec,onehotencoder__month_jul,onehotencoder__month_jun,onehotencoder__month_mar,onehotencoder__month_may,onehotencoder__month_nov,onehotencoder__month_oct,onehotencoder__month_sep,onehotencoder__poutcome_nonexistent,onehotencoder__poutcome_success,onehotencoder__y_yes,standardscaler__age,standardscaler__previous,standardscaler__emp.var.rate,standardscaler__euribor3m,standardscaler__nr.employed
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.533034,-0.349494,0.648092,0.712460,0.331680
1,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.628993,-0.349494,0.648092,0.712460,0.331680
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,-0.290186,-0.349494,0.648092,0.712460,0.331680
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,-0.002309,-0.349494,0.648092,0.712460,0.331680
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.533034,-0.349494,0.648092,0.712460,0.331680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41183,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,3.164336,-0.349494,-0.752343,-1.495186,-2.815697
41184,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.573445,-0.349494,-0.752343,-1.495186,-2.815697
41185,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.533034,-0.349494,-0.752343,-1.495186,-2.815697
41186,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.381527,-0.349494,-0.752343,-1.495186,-2.815697


In [7]:
# check list of column names in base table
base_temp.columns.tolist()

['onehotencoder__default_unknown',
 'onehotencoder__default_yes',
 'onehotencoder__contact_telephone',
 'onehotencoder__month_aug',
 'onehotencoder__month_dec',
 'onehotencoder__month_jul',
 'onehotencoder__month_jun',
 'onehotencoder__month_mar',
 'onehotencoder__month_may',
 'onehotencoder__month_nov',
 'onehotencoder__month_oct',
 'onehotencoder__month_sep',
 'onehotencoder__poutcome_nonexistent',
 'onehotencoder__poutcome_success',
 'onehotencoder__y_yes',
 'standardscaler__age',
 'standardscaler__previous',
 'standardscaler__emp.var.rate',
 'standardscaler__euribor3m',
 'standardscaler__nr.employed']

In [8]:
# seperate base table into X and y and convert to numpy array (base pandas df -> y numpy array + X numpy array)
y = base_temp['onehotencoder__y_yes'].values
X = base_temp.drop(columns=['onehotencoder__y_yes']).values

# save and check dimensions of X 
(X_length, X_vars) = X.shape
X_length, X_vars

(41188, 19)

In [9]:
y.mean()

0.11265417111780131

In [10]:
# reset RNG's
reset_random_seeds()

# undersample data to get 50/50 success ratio using near-miss-1
undersample = NearMiss(version=1)
X, y = undersample.fit_resample(X, y)

(X_length, X_vars) = X.shape
X_length, X_vars

(9280, 19)

In [11]:
y.mean()

0.5

In [12]:
# create train/test split
train_features, test_features, train_targets, test_targets = train_test_split(X, y, test_size=0.2, random_state=23)

## The model
First try a model with some initial hyperparameters as a 'baseline', then perform hyperparameter tuning using grid search and random search.

In [13]:
# function returns keras NN
def create_model(hiddenLayerOne=10, learnRate=0.01):
    # reset RNG's
    reset_random_seeds()
    
    # define model (input layer (X_vars-d) > hidden layer (12-d) > output layer (1-d))
    model = tf.keras.models.Sequential()
    model.add(Dense(hiddenLayerOne, input_dim=X_vars, activation='sigmoid')) # input + hidden layer: 12 nodes + relu (TUNE #NODES!)
    model.add(Dense(1, activation='sigmoid')) # output layer: 1 node + sigmoid

    # compile model (Adam performs well (source?), AUC for comparison)
    model.compile(
        loss='binary_crossentropy', 
        optimizer=Adam(learning_rate=learnRate), 
        metrics=['accuracy']) # tf.keras.metrics.AUC()
    
    # return compiled model
    return model

When the tf model's hyperparameters are tuned using gridsearch, it outputs scores in a different format than SKLearn's AUC metric expects. Therefore we need the following custom transformer: 

In [14]:
# class to transform tensorflow's scoring outputs
class MyLabelBinarizer(TransformerMixin):
    def __init__(self, *args, **kwargs):
        self.encoder = LabelBinarizer(*args, **kwargs)
    def fit(self, x, y=0):
        self.encoder.fit(x)
        return self
    def transform(self, x, y=0):
        return self.encoder.transform(x)

### Hyperparameter tuning using grid search
This algorithm runs 3x3x3x3 = 81 model configurations on a dataset of 902 observations in approximately 50 minutes.

In [15]:
# for model timing
time.ctime()

'Tue Apr  5 01:32:37 2022'

In [None]:
# create model and wrap into sklearn compatible classifier
model = KerasClassifier(build_fn=create_model, verbose=0)

# define hyperparameter search space
hiddenLayerOne = [3, 6, 10]
learnRate = [1e-1, 1e-2, 1e-3]
batchSize = [16, 32, 64, 128]
epochs = [10, 30, 50, 80]

# create dictionary from search space
grid = dict(
    hiddenLayerOne=hiddenLayerOne,
    learnRate=learnRate,
    batch_size=batchSize,
    epochs=epochs
)

# create 10-fold cross validation generator
cv = KFold(n_splits=10)

# create random searcher with 10-fold cv and start tuning process
model_grid = GridSearchCV(estimator=model, param_grid=grid, n_jobs=1, cv=cv, verbose=5, scoring='roc_auc')
grid_res = model_grid.fit(train_features, train_targets)

# summarise grid search info
bestScore = grid_res.best_score_
bestParams = grid_res.best_params_
print("[INFO] best score is {:.2f} using {}".format(bestScore,
    bestParams))

# for model timing
time.ctime()

Fitting 10 folds for each of 144 candidates, totalling 1440 fits
Instructions for updating:
Please use `model.predict()` instead.
[CV 1/10] END batch_size=16, epochs=10, hiddenLayerOne=3, learnRate=0.1;, score=0.888 total time=   3.0s
[CV 2/10] END batch_size=16, epochs=10, hiddenLayerOne=3, learnRate=0.1;, score=0.893 total time=   2.7s
[CV 3/10] END batch_size=16, epochs=10, hiddenLayerOne=3, learnRate=0.1;, score=0.915 total time=   2.4s
[CV 4/10] END batch_size=16, epochs=10, hiddenLayerOne=3, learnRate=0.1;, score=0.889 total time=   2.7s
[CV 5/10] END batch_size=16, epochs=10, hiddenLayerOne=3, learnRate=0.1;, score=0.897 total time=   2.3s
[CV 6/10] END batch_size=16, epochs=10, hiddenLayerOne=3, learnRate=0.1;, score=0.887 total time=   2.7s
[CV 7/10] END batch_size=16, epochs=10, hiddenLayerOne=3, learnRate=0.1;, score=0.912 total time=   2.6s
[CV 8/10] END batch_size=16, epochs=10, hiddenLayerOne=3, learnRate=0.1;, score=0.894 total time=   2.4s
[CV 9/10] END batch_size=16, e

### Hyperparameter tuning using random search

In [17]:
# for model timing
time.ctime()

'Thu Mar 31 18:53:11 2022'

In [18]:
# create model and wrap into sklearn compatible classifier
model = KerasClassifier(build_fn=create_model, verbose=0)

# # define hyperparameter search space
# hiddenLayerOne = [0, 5, 18]
# learnRate = [1e-2, 1e-3, 1e-4]
# batchSize = [5, 10, 20]
# epochs = [10, 30, 80]

model_params = {
    # randomly sample numbers from 4 to 204 estimators
    'n_estimators': randint(4,200),
    # normally distributed max_features, with mean .25 stddev 0.1, bounded between 0 and 1
    'max_features': truncnorm(a=0, b=1, loc=0.25, scale=0.1),
    # uniform distribution from 0.01 to 0.2 (0.01 + 0.199)
    'min_samples_split': uniform(0.01, 0.199)
}

# create dictionary from search space
grid = dict(
    hiddenLayerOne=hiddenLayerOne,
    learnRate=learnRate,
    batch_size=batchSize,
    epochs=epochs
)

# create 10-fold cross validation generator
cv = KFold(n_splits=10)

# create random searcher with 10-fold cv and start tuning process
searcher = RandomizedSearchCV(
    estimator=model, 
    n_jobs=1, 
    cv=cv,
    param_distributions=model_params,
    scoring='accuracy') # n-jobs=-1 ensures multiple cores are used
searchResults = searcher.fit(train_features, train_targets)

# summarise random search info
bestScore = searchResults.best_score_
bestParams = searchResults.best_params_
print("[INFO] best score is {:.2f} using {}".format(bestScore,bestParams))

# for model timing
time.ctime()

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
[INFO] best score is 0.84 using {'learnRate': 0.001, 'hiddenLayerOne': 18, 'epochs': 10, 'batch_size': 20}


'Thu Mar 31 18:58:28 2022'

### Baseline model:

In [14]:
# create model
model = create_model()

# fit model on the dataset
model.fit(train_features, train_targets, epochs=100, batch_size=10)

# evaluate model, print AUC
_, accuracy = model.evaluate(X, y, verbose=0)
print('Accuracy: %.2f' %(accuracy*100))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

## Notes

In [19]:
# CODE FROM BOOSTED TREES
predictions = model.predict(test_features)


# Use score method to get accuracy of model
accuracy = metrics.accuracy_score(test_targets, predictions)
print("Accuracy: ", + np.round(accuracy , 3))

print("Confusion Matrix:")
print(confusion_matrix(test_targets, predictions))

print("Classification Report")
print(classification_report(test_targets, predictions))
    
#Beginning the plotting of ROC-curve
pred_prob = classifier.predict_proba(test_features)
fpr, tpr, thresh = roc_curve(test_targets, pred_prob[:,1], pos_label=1)
    
#Plot roc curves
plt.plot(fpr, tpr, linestyle='--',color='orange', label='SVM')

# title
plt.title('ROC curve')
# x label
plt.xlabel('False Positive Rate')
# y label
plt.ylabel('True Positive rate')

plt.legend(loc='best')
plt.savefig('ROC',dpi=300)
plt.show();
    
#AUC Score
auc_score = roc_auc_score(test_targets, pred_prob[:,1])
print("AUC Score: " + str(np.round(auc_score , 3)))
    
#Log-loss function
print("Log-Loss: " + str(np.round(log_loss(test_targets, predictions),3)))

AttributeError: 'KerasClassifier' object has no attribute 'model'