In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('Churn_Modelling.csv')

In [3]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [5]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ..., 
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [6]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
#Country - convert categories into numbers
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])

X

array([[619, 0, 'Female', ..., 1, 1, 101348.88],
       [608, 2, 'Female', ..., 0, 1, 112542.58],
       [502, 0, 'Female', ..., 1, 0, 113931.57],
       ..., 
       [709, 0, 'Female', ..., 0, 1, 42085.58],
       [772, 1, 'Male', ..., 1, 0, 92888.52],
       [792, 0, 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [7]:
#Gender
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
X

array([[619, 0, 0, ..., 1, 1, 101348.88],
       [608, 2, 0, ..., 0, 1, 112542.58],
       [502, 0, 0, ..., 1, 0, 113931.57],
       ..., 
       [709, 0, 0, ..., 0, 1, 42085.58],
       [772, 1, 1, ..., 1, 0, 92888.52],
       [792, 0, 0, ..., 1, 0, 38190.78]], dtype=object)

In [8]:
#Dummy variables
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
#Drop first column - Dummy variable trap?
#Each category maps to pair of values
#France - 0.0, 0
X = X[:, 1:]

In [9]:
X

array([[  0.00000000e+00,   0.00000000e+00,   6.19000000e+02, ...,
          1.00000000e+00,   1.00000000e+00,   1.01348880e+05],
       [  0.00000000e+00,   1.00000000e+00,   6.08000000e+02, ...,
          0.00000000e+00,   1.00000000e+00,   1.12542580e+05],
       [  0.00000000e+00,   0.00000000e+00,   5.02000000e+02, ...,
          1.00000000e+00,   0.00000000e+00,   1.13931570e+05],
       ..., 
       [  0.00000000e+00,   0.00000000e+00,   7.09000000e+02, ...,
          0.00000000e+00,   1.00000000e+00,   4.20855800e+04],
       [  1.00000000e+00,   0.00000000e+00,   7.72000000e+02, ...,
          1.00000000e+00,   0.00000000e+00,   9.28885200e+04],
       [  0.00000000e+00,   0.00000000e+00,   7.92000000e+02, ...,
          1.00000000e+00,   0.00000000e+00,   3.81907800e+04]])

In [10]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [11]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [12]:
X_train

array([[-0.5698444 ,  1.74309049,  0.16958176, ...,  0.64259497,
        -1.03227043,  1.10643166],
       [ 1.75486502, -0.57369368, -2.30455945, ...,  0.64259497,
         0.9687384 , -0.74866447],
       [-0.5698444 , -0.57369368, -1.19119591, ...,  0.64259497,
        -1.03227043,  1.48533467],
       ..., 
       [-0.5698444 , -0.57369368,  0.9015152 , ...,  0.64259497,
        -1.03227043,  1.41231994],
       [-0.5698444 ,  1.74309049, -0.62420521, ...,  0.64259497,
         0.9687384 ,  0.84432121],
       [ 1.75486502, -0.57369368, -0.28401079, ...,  0.64259497,
        -1.03227043,  0.32472465]])

In [13]:
X_test

array([[ 1.75486502, -0.57369368, -0.55204276, ...,  0.64259497,
         0.9687384 ,  1.61085707],
       [-0.5698444 , -0.57369368, -1.31490297, ...,  0.64259497,
        -1.03227043,  0.49587037],
       [-0.5698444 ,  1.74309049,  0.57162971, ...,  0.64259497,
         0.9687384 , -0.42478674],
       ..., 
       [-0.5698444 ,  1.74309049, -0.74791227, ...,  0.64259497,
        -1.03227043,  0.71888467],
       [ 1.75486502, -0.57369368, -0.00566991, ...,  0.64259497,
         0.9687384 , -1.54507805],
       [ 1.75486502, -0.57369368, -0.79945688, ...,  0.64259497,
        -1.03227043,  1.61255917]])

In [17]:
import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [18]:
# Initialising the ANN
classifier = Sequential()

In [19]:
# Adding the input layer and the first hidden layer
# 11 - number of input dimensions - 11 columns
# 6 - 11+1/2 - number of hidden layers, 1 indicates binary outcome
# kernel_initializer - initialises the weights randomly and to be small numbers
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))

In [20]:
# Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))

In [21]:
# Adding the output layer
#Sigmoid used to get probabilities as our output
#Can then use these to "rank" the customers in order of probability to leave
#Softmax used for >1 output
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

In [22]:
# Compiling the ANN
#optimizer - algorithm used to find optimal weights, adam = SGD
#loss - loss function within SGD algorithm. Need to optimize to find optimal weights
# binary_crossentropy - our outputs are binary
#metrics - expects a list
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [23]:
# Fitting the ANN to the Training set
#update weights either after each observation or after a batch
#batch size = 10 updates weights after each 10 observations

#epoch - every observation has passed through the classifier once
#100 epochs
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1c233f5898>

In [24]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

In [25]:
y_pred

array([[ 0.21351111],
       [ 0.29432389],
       [ 0.14588986],
       ..., 
       [ 0.16272601],
       [ 0.12282191],
       [ 0.18370497]], dtype=float32)

In [26]:
#Convert probabilities into the predicted result of 1/0
#if y_pred > 0.5 - true, else - false
y_pred = (y_pred > 0.5)

In [29]:
y_pred

array([[False],
       [False],
       [False],
       ..., 
       [False],
       [False],
       [False]], dtype=bool)

In [27]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [28]:
cm

array([[1537,   58],
       [ 261,  144]])

In [31]:
acc = (1537+144)/2000
acc

0.8405

# ANN Homework

Use our ANN model to predict if the customer with the following informations will leave the bank: 

- Geography: France
- Credit Score: 600
- Gender: Male
- Age: 40 years old
- Tenure: 3 years
- Balance: \$60000
- Number of Products: 2
- Does this customer have a credit card ? Yes
- Is this customer an Active Member: Yes
- Estimated Salary: \$50000


So should we say goodbye to that customer ?

In [34]:
#New Data
#Observation needs to be in horizontal not vertical vector
#our data is in rows - not columns

#France corresponds to pair of dummy variables 0 and 0
#First 0 is entered as 0.0 to supress warnings when scaling integers in the next cell
new_data = np.array([[0.0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])

In [35]:
#Transform
new_data = sc.transform(new_data)
new_data

array([[-0.5698444 , -0.57369368, -0.52111599,  0.91601335,  0.10961719,
        -0.68538967, -0.2569057 ,  0.8095029 ,  0.64259497,  0.9687384 ,
        -0.87203322]])

In [36]:
new_pred = classifier.predict(new_data)
new_pred

array([[ 0.09444336]], dtype=float32)

In [37]:
new_pred = (new_pred > 0.5)
new_pred

array([[False]], dtype=bool)

# k-Fold Cross Validation

In [14]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense

#Use wrappers to use k-fold cross validation from sklearn with keras

Using TensorFlow backend.


In [15]:
def build_classifier():
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier

In [16]:
#Train the classifier using kfold cross validation
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, epochs = 100)

In [17]:
#Contains the accuracies for each fold
#n-jobs = number of cpus to use 
#n-jobs = -1 - all cpus - parallel
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)

Epoch 1/100
Epoch 1/100
Epoch 1/100
Epoch 1/100
Epoch 1/100
Epoch 1/100
Epoch 1/100
Epoch 1/100

Epoch 2/100
 270/7200 [>.............................] - ETA: 4s - loss: 0.5237 - acc: 0.7852Epoch 2/100
 410/7200 [>.............................] - ETA: 3s - loss: 0.5092 - acc: 0.7829
 260/7200 [>.............................] - ETA: 2s - loss: 0.5159 - acc: 0.7808Epoch 2/100
Epoch 2/100
Epoch 3/100
Epoch 3/100

Epoch 3/100
Epoch 3/100
Epoch 3/100
  10/7200 [..............................] - ETA: 55s - loss: 0.3962 - acc: 0.8000

  % delta_t_median)


  10/7200 [..............................] - ETA: 23s - loss: 0.5090 - acc: 0.7000
Epoch 3/100
Epoch 3/100
Epoch 3/100
Epoch 4/100
Epoch 4/100
Epoch 4/100
Epoch 4/100
Epoch 4/100
Epoch 4/100
 420/7200 [>.............................] - ETA: 5s - loss: 0.3980 - acc: 0.8095
Epoch 4/100
Epoch 5/100
Epoch 5/100
Epoch 5/100
Epoch 5/100
Epoch 5/100
 610/7200 [=>............................] - ETA: 5s - loss: 0.4216 - acc: 0.8082
 300/7200 [>.............................] - ETA: 3s - loss: 0.3915 - acc: 0.8267Epoch 5/100
1040/7200 [===>..........................] - ETA: 5s - loss: 0.4080 - acc: 0.8221
Epoch 5/100
Epoch 6/100
Epoch 6/100
Epoch 6/100

  % delta_t_median)


  50/7200 [..............................] - ETA: 9s - loss: 0.5583 - acc: 0.7800
 130/7200 [..............................] - ETA: 10s - loss: 0.5277 - acc: 0.7462Epoch 6/100
 230/7200 [..............................] - ETA: 7s - loss: 0.4774 - acc: 0.7696 
 460/7200 [>.............................] - ETA: 5s - loss: 0.4453 - acc: 0.7913Epoch 6/100
Epoch 6/100
 400/7200 [>.............................] - ETA: 3s - loss: 0.4252 - acc: 0.8125
 690/7200 [=>............................] - ETA: 5s - loss: 0.4420 - acc: 0.8101Epoch 6/100

  10/7200 [..............................] - ETA: 3s - loss: 0.4760 - acc: 0.8000Epoch 7/100
Epoch 7/100
Epoch 7/100
 350/7200 [>.............................] - ETA: 4s - loss: 0.4464 - acc: 0.8057Epoch 7/100
 430/7200 [>.............................] - ETA: 4s - loss: 0.4283 - acc: 0.8116
Epoch 7/100
 610/7200 [=>............................] - ETA: 4s - loss: 0.4298 - acc: 0.8295Epoch 7/100
Epoch 8/100
  90/7200 [..............................] - ETA: 5

  % delta_t_median)


1460/7200 [=====>........................] - ETA: 4s - loss: 0.3912 - acc: 0.8425Epoch 10/100
Epoch 11/100
Epoch 11/100
Epoch 11/100
 200/7200 [..............................] - ETA: 9s - loss: 0.4069 - acc: 0.8350
  40/7200 [..............................] - ETA: 16s - loss: 0.4635 - acc: 0.7750Epoch 11/100
Epoch 11/100
1190/7200 [===>..........................] - ETA: 4s - loss: 0.4309 - acc: 0.8311Epoch 11/100
Epoch 15/100

 690/7200 [=>............................] - ETA: 5s - loss: 0.4522 - acc: 0.8058
Epoch 15/100
  90/7200 [..............................] - ETA: 4s - loss: 0.4240 - acc: 0.8222Epoch 16/100

Epoch 16/100
Epoch 16/100

Epoch 17/100
Epoch 17/100
 900/7200 [==>...........................] - ETA: 4s - loss: 0.3751 - acc: 0.8467
Epoch 17/100
1040/7200 [===>..........................] - ETA: 4s - loss: 0.3787 - acc: 0.8423
Epoch 17/100
Epoch 17/100
Epoch 17/100
1430/7200 [====>.........................] - ETA: 4s - loss: 0.4096 - acc: 0.8301
Epoch 17/100
Epoch 19/100
Ep

Epoch 26/100
Epoch 26/100
 440/7200 [>.............................] - ETA: 4s - loss: 0.3784 - acc: 0.8636Epoch 26/100
Epoch 27/100
  10/7200 [..............................] - ETA: 6s - loss: 0.4129 - acc: 0.7000
Epoch 27/100
 460/7200 [>.............................] - ETA: 5s - loss: 0.3592 - acc: 0.8500
Epoch 27/100
Epoch 27/100
Epoch 27/100
Epoch 27/100

Epoch 28/100
Epoch 28/100
 100/7200 [..............................] - ETA: 4s - loss: 0.4582 - acc: 0.8200Epoch 28/100
Epoch 28/100
1020/7200 [===>..........................] - ETA: 3s - loss: 0.4149 - acc: 0.8324
1070/7200 [===>..........................] - ETA: 4s - loss: 0.3828 - acc: 0.8430Epoch 28/100
Epoch 28/100
1340/7200 [====>.........................] - ETA: 4s - loss: 0.4190 - acc: 0.8269Epoch 28/100
Epoch 29/100
Epoch 29/100
1670/7200 [=====>........................] - ETA: 3s - loss: 0.4065 - acc: 0.8371
Epoch 30/100
Epoch 32/100
Epoch 32/100
Epoch 32/100
Epoch 32/100
Epoch 33/100
 140/7200 [........................

Epoch 39/100
  10/7200 [..............................] - ETA: 3s - loss: 0.2451 - acc: 0.9000Epoch 39/100
Epoch 40/100
 860/7200 [==>...........................] - ETA: 3s - loss: 0.3982 - acc: 0.8372
 440/7200 [>.............................] - ETA: 3s - loss: 0.4083 - acc: 0.8318Epoch 40/100
 770/7200 [==>...........................] - ETA: 3s - loss: 0.3944 - acc: 0.8338
Epoch 40/100
Epoch 40/100
 800/7200 [==>...........................] - ETA: 4s - loss: 0.3780 - acc: 0.8425Epoch 41/100
Epoch 41/100
Epoch 41/100
Epoch 41/100
 350/7200 [>.............................] - ETA: 4s - loss: 0.3275 - acc: 0.8771Epoch 41/100
Epoch 42/100
Epoch 42/100
Epoch 42/100
1320/7200 [====>.........................] - ETA: 4s - loss: 0.4114 - acc: 0.8182Epoch 42/100
1240/7200 [====>.........................] - ETA: 3s - loss: 0.4184 - acc: 0.8226Epoch 42/100
 820/7200 [==>...........................] - ETA: 3s - loss: 0.3653 - acc: 0.8476Epoch 43/100
Epoch 44/100
Epoch 45/100
1380/7200 [====>......

  % delta_t_median)


1440/7200 [=====>........................] - ETA: 3s - loss: 0.3571 - acc: 0.8493
 160/7200 [..............................] - ETA: 5s - loss: 0.3331 - acc: 0.8875Epoch 47/100
 470/7200 [>.............................] - ETA: 3s - loss: 0.3256 - acc: 0.8596Epoch 47/100
Epoch 47/100

  % delta_t_median)


  20/7200 [..............................] - ETA: 230s - loss: 0.5436 - acc: 0.7500

  % delta_t_median)


Epoch 48/100
1570/7200 [=====>........................] - ETA: 6s - loss: 0.3814 - acc: 0.8465Epoch 48/100

Epoch 49/100
Epoch 49/100
Epoch 49/100
Epoch 49/100
1360/7200 [====>.........................] - ETA: 4s - loss: 0.4127 - acc: 0.8235Epoch 49/100
Epoch 49/100
Epoch 50/100

Epoch 50/100
Epoch 50/100
  10/7200 [..............................] - ETA: 2s - loss: 0.3226 - acc: 0.9000Epoch 50/100
Epoch 50/100
Epoch 51/100
Epoch 51/100
1100/7200 [===>..........................] - ETA: 4s - loss: 0.4046 - acc: 0.8400
Epoch 51/100
Epoch 51/100
Epoch 51/100
 610/7200 [=>............................] - ETA: 3s - loss: 0.4239 - acc: 0.8393Epoch 51/100
Epoch 51/100
Epoch 52/100
Epoch 52/100
Epoch 52/100
1420/7200 [====>.........................] - ETA: 4s - loss: 0.4118 - acc: 0.8218
1050/7200 [===>..........................] - ETA: 3s - loss: 0.4119 - acc: 0.8257Epoch 52/100
Epoch 52/100
Epoch 54/100
Epoch 55/100
Epoch 55/100
Epoch 55/100
Epoch 56/100
Epoch 56/100
Epoch 56/100
Epoch 57/100


 620/7200 [=>............................] - ETA: 4s - loss: 0.3663 - acc: 0.8565Epoch 60/100
 540/7200 [=>............................] - ETA: 3s - loss: 0.4079 - acc: 0.8333
1170/7200 [===>..........................] - ETA: 3s - loss: 0.3871 - acc: 0.8410Epoch 60/100
Epoch 60/100
 280/7200 [>.............................] - ETA: 4s - loss: 0.3360 - acc: 0.9000
Epoch 60/100
Epoch 60/100
Epoch 61/100
 110/7200 [..............................] - ETA: 3s - loss: 0.3187 - acc: 0.8818
1150/7200 [===>..........................] - ETA: 4s - loss: 0.3914 - acc: 0.8365Epoch 61/100
1460/7200 [=====>........................] - ETA: 4s - loss: 0.3828 - acc: 0.8411Epoch 61/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 63/100
Epoch 63/100
Epoch 64/100
Epoch 64/100
 430/7200 [>.............................] - ETA: 3s - loss: 0.3877 - acc: 0.8372
1500/7200 [=====>........................] - ETA: 3s - loss: 0.4028 - acc: 0.8267Epoch 64/100
Epoch 64/100
Epoch 65/100
Epoch 65/100
Epoch 66/100
Epoch 6

  % delta_t_median)


  10/7200 [..............................] - ETA: 2s - loss: 0.2334 - acc: 0.9000


  % delta_t_median)


Epoch 73/100
Epoch 73/100
Epoch 73/100
Epoch 73/100
Epoch 74/100
Epoch 74/100
Epoch 74/100
Epoch 74/100
Epoch 74/100
 220/7200 [..............................] - ETA: 5s - loss: 0.4573 - acc: 0.8045 
Epoch 74/100
Epoch 75/100
Epoch 76/100
1390/7200 [====>.........................] - ETA: 3s - loss: 0.3871 - acc: 0.8381
  10/7200 [..............................] - ETA: 3s - loss: 0.2000 - acc: 1.0000Epoch 76/100
Epoch 76/100
1170/7200 [===>..........................] - ETA: 3s - loss: 0.4186 - acc: 0.8256
1230/7200 [====>.........................] - ETA: 3s - loss: 0.3952 - acc: 0.8423Epoch 76/100
1430/7200 [====>.........................] - ETA: 3s - loss: 0.4142 - acc: 0.8259
 310/7200 [>.............................] - ETA: 4s - loss: 0.4386 - acc: 0.8258Epoch 76/100
 420/7200 [>.............................] - ETA: 4s - loss: 0.4285 - acc: 0.8190
Epoch 76/100
Epoch 77/100
1160/7200 [===>..........................] - ETA: 3s - loss: 0.3904 - acc: 0.8483Epoch 78/100
  60/7200 [.......

 600/7200 [=>............................] - ETA: 4s - loss: 0.4099 - acc: 0.8167Epoch 86/100
Epoch 87/100
Epoch 86/100
 570/7200 [=>............................] - ETA: 4s - loss: 0.3784 - acc: 0.8421Epoch 87/100
Epoch 87/100
Epoch 89/100
Epoch 89/100
 300/7200 [>.............................] - ETA: 3s - loss: 0.3778 - acc: 0.8500Epoch 88/100
Epoch 90/100
Epoch 89/100

Epoch 90/100
Epoch 90/100
Epoch 91/100
Epoch 91/100
Epoch 90/100
1480/7200 [=====>........................] - ETA: 3s - loss: 0.4019 - acc: 0.8399
Epoch 91/100
Epoch 91/100
Epoch 91/100
Epoch 91/100
  10/7200 [..............................] - ETA: 6s - loss: 0.3983 - acc: 0.8000
Epoch 92/100
Epoch 91/100
1440/7200 [=====>........................] - ETA: 3s - loss: 0.3870 - acc: 0.8431
1610/7200 [=====>........................] - ETA: 3s - loss: 0.3939 - acc: 0.8298
Epoch 92/100
Epoch 92/100
Epoch 92/100
Epoch 93/100
Epoch 92/100
Epoch 93/100
Epoch 93/100
 580/7200 [=>............................] - ETA: 3s - loss: 0.3

 940/7200 [==>...........................] - ETA: 4s - loss: 0.4108 - acc: 0.8309Epoch 98/100
1370/7200 [====>.........................] - ETA: 2s - loss: 0.4133 - acc: 0.8299
Epoch 98/100
Epoch 99/100
Epoch 98/100
Epoch 99/100
Epoch 99/100
1110/7200 [===>..........................] - ETA: 4s - loss: 0.4216 - acc: 0.8306Epoch 99/100
Epoch 99/100
Epoch 99/100
Epoch 100/100
 730/7200 [==>...........................] - ETA: 4s - loss: 0.4194 - acc: 0.8274Epoch 99/100
 980/7200 [===>..........................] - ETA: 3s - loss: 0.4150 - acc: 0.8286Epoch 100/100
Epoch 100/100
1150/7200 [===>..........................] - ETA: 3s - loss: 0.3966 - acc: 0.8322
Epoch 100/100
Epoch 1/100
Epoch 2/100
Epoch 2/100
Epoch 3/100
Epoch 3/100
Epoch 4/100
Epoch 4/100
Epoch 5/100
Epoch 5/100
Epoch 6/100
Epoch 6/100
Epoch 7/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 9/100
Epoch 10/100
Epoch 10/100

Epoch 11/100
Epoch 12/100
Epoch 12/100
Epoch 13/100
Epoch 13/100
Epoch 14/100
Epoch 14/100
Epoch 15/100
Epo

Epoch 28/100
Epoch 28/100
Epoch 29/100
Epoch 29/100
Epoch 30/100
Epoch 30/100
Epoch 31/100
Epoch 31/100
Epoch 32/100
Epoch 32/100
Epoch 33/100
Epoch 33/100
Epoch 34/100
Epoch 34/100
Epoch 35/100
Epoch 35/100
Epoch 36/100
Epoch 36/100
Epoch 37/100
Epoch 37/100
Epoch 38/100
Epoch 38/100
Epoch 39/100
Epoch 39/100
Epoch 40/100
 800/7200 [==>...........................] - ETA: 2s - loss: 0.3655 - acc: 0.8362
Epoch 40/100
Epoch 41/100
Epoch 41/100
Epoch 42/100
 860/7200 [==>...........................] - ETA: 1s - loss: 0.4320 - acc: 0.8116
Epoch 42/100
Epoch 43/100
Epoch 43/100
Epoch 44/100
Epoch 44/100
 810/7200 [==>...........................] - ETA: 2s - loss: 0.4007 - acc: 0.8358
Epoch 45/100
Epoch 46/100
Epoch 46/100
Epoch 47/100
Epoch 47/100
Epoch 48/100
Epoch 48/100
Epoch 49/100
Epoch 49/100
Epoch 50/100
Epoch 50/100
Epoch 51/100
Epoch 51/100
Epoch 52/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 55/100
Epoch 56/100
Epoch 56/100
Epoch 57/100
Epoch 57/100
Epoch 58/100


Epoch 69/100
Epoch 69/100
Epoch 70/100
Epoch 70/100
Epoch 71/100
Epoch 71/100
Epoch 72/100
Epoch 72/100
Epoch 73/100
Epoch 73/100
Epoch 74/100
Epoch 74/100
Epoch 75/100
Epoch 75/100
Epoch 76/100
Epoch 76/100
Epoch 77/100
Epoch 77/100
Epoch 78/100
Epoch 78/100
Epoch 79/100
Epoch 79/100
Epoch 80/100
Epoch 80/100
Epoch 81/100
Epoch 81/100
Epoch 82/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 84/100
Epoch 85/100
Epoch 85/100
Epoch 86/100
Epoch 86/100
Epoch 87/100
Epoch 87/100
Epoch 88/100
Epoch 88/100
Epoch 89/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 93/100
Epoch 94/100
Epoch 94/100
Epoch 95/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 97/100
Epoch 98/100
Epoch 98/100
Epoch 99/100
Epoch 99/100
Epoch 100/100

In [18]:
accuracies

array([ 0.83999999,  0.83999999,  0.83375   ,  0.83      ,  0.84874999,
        0.84      ,  0.83      ,  0.8175    ,  0.84124999,  0.8525    ])

In [19]:
mean = accuracies.mean()
mean

0.83737499501556167

In [20]:
variance = accuracies.std()
variance

0.0095434852860238947

# Dropout Regularization

Used to avoid overfitting - large variance in accuracy between training and test set

In [21]:
from keras.layers import Dropout

# Initialising the ANN
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dropout(rate = 0.1)) #increase rate as you try to correct overfitting
#do not go past 0.5 to reduce risk of underfitting

# Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(rate = 0.1))

# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)


  
  del sys.path[0]


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1a16aca9e8>

# Parameter Tuning with Grid Search

In [22]:
from sklearn.model_selection import GridSearchCV

In [25]:
def build_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier

In [26]:
#As before but without batchsize/epoch number
classifier = KerasClassifier(build_fn = build_classifier)

In [27]:
#Dictionary of hyperparameters to test
parameters = {'batch_size': [25, 32],
              'epochs': [100, 500],
              'optimizer': ['adam', 'rmsprop']} #rmsprop recommended for RNNs

In [28]:
#Grid Search
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10)

In [None]:
#Fit to training set - several hours
grid_search = grid_search.fit(X_train, y_train)

In [None]:
#Turorial result - batch = 25, epoch = 500m, optimizer = rmsprop
best_parameters = grid_search.best_params_
best_parameters

In [None]:
#Tutorial result - 0.85
best_accuracy = grid_search.best_score_
best_accuracy

# Homework

Put me one step down on the podium by getting this 86% accuracy with k-Fold Cross Validation.

As a reminder:

Bronze medal: Accuracy between 84% and 85%

Silver medal: Accuracy between 85% and 86%

Gold medal: Accuracy over 86%

Good luck on getting the gold medal.