## Training the ANN with Stochastic Gradient Descent
* Randomly initialize weights to small number close to zero
* Imput the first observation of your dataset into input layer, each feature in one input node.
* Forward Propagation : weights and activation function. (Rectifier for hidden layer and Sigmoid for Output layer.)
* Compare the predicted results with actual result. Measure the generated Errors.
* Back Propagation : Update the weights based on error. Learning rate decides the the quantity of update.
* Update for each observation is reinforcement learning; while done after a batch is batch learning.
* When whole training passed through, it completes an epoch.

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [3]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

### We want to predict whether a customer shall leave the bank or not.

In [4]:
# Encoding categorical data
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer([('oh_enc',OneHotEncoder(sparse=False),[1,2])], remainder = 'passthrough')
X = ct.fit_transform(X)
X = np.delete(X, [0,3], axis = 1)

In [5]:
X

array([[0.0, 0.0, 0.0, ..., 1, 1, 101348.88],
       [0.0, 1.0, 0.0, ..., 0, 1, 112542.58],
       [0.0, 0.0, 0.0, ..., 1, 0, 113931.57],
       ...,
       [0.0, 0.0, 0.0, ..., 0, 1, 42085.58],
       [1.0, 0.0, 1.0, ..., 1, 0, 92888.52],
       [0.0, 0.0, 0.0, ..., 1, 0, 38190.78]], dtype=object)

In [6]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [7]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train.astype(float))
X_test = sc.transform(X_test.astype(float))

In [8]:
X_train

array([[-0.5698444 ,  1.74309049, -1.09168714, ...,  0.64259497,
        -1.03227043,  1.10643166],
       [ 1.75486502, -0.57369368,  0.91601335, ...,  0.64259497,
         0.9687384 , -0.74866447],
       [-0.5698444 , -0.57369368, -1.09168714, ...,  0.64259497,
        -1.03227043,  1.48533467],
       ...,
       [-0.5698444 , -0.57369368,  0.91601335, ...,  0.64259497,
        -1.03227043,  1.41231994],
       [-0.5698444 ,  1.74309049, -1.09168714, ...,  0.64259497,
         0.9687384 ,  0.84432121],
       [ 1.75486502, -0.57369368, -1.09168714, ...,  0.64259497,
        -1.03227043,  0.32472465]])

### Now let's build the ANN model!

In [9]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [10]:
#Initialising the ANN
#There are two ways of initializing a neural Network : Sequence of layers or a Graph
classifier = Sequential()

In [11]:
#Adding the first input layer and first hidden layer
classifier.add(Dense(units = 6, kernel_initializer= 'uniform', activation= 'relu', input_dim = 11))

In [12]:
#Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer= 'uniform', activation= 'relu'))

In [13]:
#Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer= 'uniform', activation= 'sigmoid'))
#If output has more than two categories then change units = no. of categories and activation = softmax

In [14]:
#Compiling the ANN
classifier.compile(optimizer='adam', loss= 'binary_crossentropy', metrics= ['accuracy'])

In [15]:
# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x11e719080>

In [16]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [17]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[1545   50]
 [ 257  148]]
              precision    recall  f1-score   support

           0       0.86      0.97      0.91      1595
           1       0.75      0.37      0.49       405

   micro avg       0.85      0.85      0.85      2000
   macro avg       0.80      0.67      0.70      2000
weighted avg       0.84      0.85      0.82      2000



### Use the ANN model to predict if the customer with the following informations will leave the bank: 
* Geography: France
* Gender: Male
* Credit Score: 600
* Age: 40 years old
* Tenure: 3 years
* Balance: 60000
* Number of Products: 2
* Does this customer have a credit card ? Yes
* Is this customer an Active Member: Yes
* Estimated Salary: 50000

### So should we say goodbye to that customer ?

In [18]:
new_pred = classifier.predict(sc.transform(np.array([[0.0, 0.0, 1.0, 600.0, 40.0, 3.0, 60000.0, 2.0, 1.0, 1.0, 50000.0]])))
print(new_pred > 0.5)

[[False]]


### We don't need to say goodbye to the Customer.

### Now We shall apply k-fold cross validation technique for evaluation of performance of our ANN model 

In [19]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

def build_classifier(optimizer = 'adam'):
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer='uniform', activation='relu', input_dim = 11))
    classifier.add(Dense(units = 6, kernel_initializer='uniform', activation='relu'))
    classifier.add(Dense(units = 1, kernel_initializer='uniform', activation='sigmoid'))
    classifier.compile(optimizer=optimizer, loss= 'binary_crossentropy', metrics= ['accuracy'])
    return classifier

In [20]:
classifier = KerasClassifier(build_fn = build_classifier, batch_size =10, epochs = 10)

In [21]:
accuracies = cross_val_score(estimator = classifier, X= X_train, y= y_train, cv=10, n_jobs = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [23]:
print(accuracies.mean())
print(accuracies.std())

0.835374994650483
0.014448723263352194


#### We get a fairly low bias but a very low variance. Thus no need of Dropout regularization for reducing overfitting.

### For Dropout Regularization: 
            from keras.layers import Dropout
#### Within each layers add:
            classifier.add(Dropout(p=0.1))

### Now We shall apply GridSearch technique for hyperparmeter tuning of our ANN model 

In [22]:
from sklearn.model_selection import GridSearchCV
classifier = KerasClassifier(build_fn = build_classifier)

In [26]:
batch_size  =[10,50]
#epochs = [50,100]
#optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
#learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
#momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
#init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
#weight_constraint = [1, 2, 3, 4, 5]
#dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
#neurons = [1, 5, 10, 15, 20, 25, 30]

parameters = dict(batch_size = batch_size, epochs = [10])
grid_search = GridSearchCV(estimator = classifier, param_grid = parameters, scoring = 'accuracy', cv = 10)
grid_search = grid_search.fit(X_train, y_train)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [28]:
print(grid_search.best_params_)
print(grid_search.best_score_)

{'batch_size': 10, 'epochs': 10}
0.833
