# This program predicts whether a customer of a certain bank will churn out.

In [2]:
#importing required libraries
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

In [3]:
#changing directory
os.chdir(r"D:\P16-Deep-Learning-AZ\Deep_Learning_A_Z\Volume 1 - Supervised Deep Learning\Part 1 - Artificial Neural Networks (ANN)\Section 4 - Building an ANN\Artificial_Neural_Networks")
os.getcwd()

'D:\\P16-Deep-Learning-AZ\\Deep_Learning_A_Z\\Volume 1 - Supervised Deep Learning\\Part 1 - Artificial Neural Networks (ANN)\\Section 4 - Building an ANN\\Artificial_Neural_Networks'

In [4]:
#loading the dataset
df = pd.read_csv("Churn_Modelling.csv", sep = ",", encoding = "ISO-8859-1")

In [5]:
#checking shape
df.shape

(10000, 14)

In [6]:
#checking head
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## 1. Preprocessing

This process creates a matrix of features.

*Note: RowNumber, CustomerId and Surname are not important. Other 10 features are.*

In [7]:
#creating matrix of features, basically a subset
X = df.iloc[:, 3:13].values
y = df.iloc[:, 13].values

In [8]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

Now, we need to encode categorical features of X. These are Geography and Gender.

In [9]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [10]:
#for Geography
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])

In [11]:
#for Gender
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])

*NOTE: Dummy variables created only for Geography, not Gender because gender has 2 categories, and we want to avoid the dummy variable trap.*

In [12]:
#one hot encoding for Geography
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [13]:
X

array([[1.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 1.0000000e+00,
        1.0000000e+00, 1.0134888e+05],
       [0.0000000e+00, 0.0000000e+00, 1.0000000e+00, ..., 0.0000000e+00,
        1.0000000e+00, 1.1254258e+05],
       [1.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 1.0000000e+00,
        0.0000000e+00, 1.1393157e+05],
       ...,
       [1.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 0.0000000e+00,
        1.0000000e+00, 4.2085580e+04],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00, ..., 1.0000000e+00,
        0.0000000e+00, 9.2888520e+04],
       [1.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 1.0000000e+00,
        0.0000000e+00, 3.8190780e+04]])

In [14]:
#removing first variable to avoid dummy variable trap
X = X[:, 1:]

In [15]:
X

array([[0.0000000e+00, 0.0000000e+00, 6.1900000e+02, ..., 1.0000000e+00,
        1.0000000e+00, 1.0134888e+05],
       [0.0000000e+00, 1.0000000e+00, 6.0800000e+02, ..., 0.0000000e+00,
        1.0000000e+00, 1.1254258e+05],
       [0.0000000e+00, 0.0000000e+00, 5.0200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 1.1393157e+05],
       ...,
       [0.0000000e+00, 0.0000000e+00, 7.0900000e+02, ..., 0.0000000e+00,
        1.0000000e+00, 4.2085580e+04],
       [1.0000000e+00, 0.0000000e+00, 7.7200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 9.2888520e+04],
       [0.0000000e+00, 0.0000000e+00, 7.9200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 3.8190780e+04]])

### Splitting into train and test

In [16]:
from sklearn.model_selection import train_test_split

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Feature Scaling

In [18]:
from sklearn.preprocessing import StandardScaler

In [19]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [20]:
X_train

array([[-0.5698444 ,  1.74309049,  0.16958176, ...,  0.64259497,
        -1.03227043,  1.10643166],
       [ 1.75486502, -0.57369368, -2.30455945, ...,  0.64259497,
         0.9687384 , -0.74866447],
       [-0.5698444 , -0.57369368, -1.19119591, ...,  0.64259497,
        -1.03227043,  1.48533467],
       ...,
       [-0.5698444 , -0.57369368,  0.9015152 , ...,  0.64259497,
        -1.03227043,  1.41231994],
       [-0.5698444 ,  1.74309049, -0.62420521, ...,  0.64259497,
         0.9687384 ,  0.84432121],
       [ 1.75486502, -0.57369368, -0.28401079, ...,  0.64259497,
        -1.03227043,  0.32472465]])

In [21]:
X_test

array([[ 1.62776996, -0.57427105, -0.56129438, ...,  0.66011376,
         0.97628121,  1.62185911],
       [-0.61433742, -0.57427105, -1.33847768, ...,  0.66011376,
        -1.02429504,  0.504204  ],
       [-0.61433742,  1.74133801,  0.58347561, ...,  0.66011376,
         0.97628121, -0.41865644],
       ...,
       [-0.61433742,  1.74133801, -0.76084144, ...,  0.66011376,
        -1.02429504,  0.72775202],
       [ 1.62776996, -0.57427105, -0.0046631 , ...,  0.66011376,
         0.97628121, -1.54162886],
       [ 1.62776996, -0.57427105, -0.81335383, ...,  0.66011376,
        -1.02429504,  1.62356528]])

## 2. Building the ANN

In [22]:
import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [24]:
#initializing the ANN
classifier = Sequential()

In [25]:
classifier

<keras.engine.sequential.Sequential at 0x1a38ff231d0>

Adding layers:

In [26]:
#adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, kernel_initializer = "uniform", activation = "relu", input_dim = 11))

Instructions for updating:
Colocations handled automatically by placer.


In [27]:
#adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = "uniform", activation = "relu"))

In [28]:
#adding output layer
#need only 1 node in the output layer
classifier.add(Dense(units = 1, kernel_initializer = "uniform", activation = "sigmoid"))

In [29]:
#compiling the whole NN by adding the stochastic
classifier.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

In [30]:
#fitting the ANN to the training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

<keras.callbacks.callbacks.History at 0x1a394bb0e48>

Accuracy is 83.36%.

In [31]:
#predicting on the test set
y_pred = classifier.predict(X_test)

In [32]:
#probabilities of leaving bank
y_pred

array([[0.19998315],
       [0.27874526],
       [0.1835531 ],
       ...,
       [0.14484757],
       [0.12836206],
       [0.0932177 ]], dtype=float32)

In [33]:
#convert probs to 0 and 1
y_pred = (y_pred > 0.5)

In [34]:
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [35]:
y_test

array([0, 1, 0, ..., 0, 0, 0], dtype=int64)

In [36]:
#CM to check accuracy on test set
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [37]:
cm

array([[1560,   35],
       [ 288,  117]], dtype=int64)

In [38]:
#checking accuracy
(1560+117)/(1560+35+288+117)

0.8385

## It is done.

### Another use of this algorithm is that the bank can make a ranking of top customers who will leave the bank and analyze them further in order to take certain steps to prevent said customers from leaving the bank.

### Predicting for a new test case

Lets say we have the following information for a customer.

+ Geography: France
+ Credit Score: 600
+ Gender: Male
+ Age: 40 years old
+ Tenure: 3 years
+ Balance: 60000 (in dollars)
+ Number of Products: 2
+ Does this customer have a credit card? Yes
+ Is this customer an Active Member: Yes
+ Estimated Salary: 50000 (in dollars)

So should we say goodbye to that customer?

In [51]:
new_prediction = classifier.predict(sc.transform(np.array([[0.0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])))
new_prediction = (new_prediction > 0.5)                                   

In [52]:
new_prediction

array([[False]])

The customer won't leave the bank.