# Part 1 - Data Preprocessing

Importing the Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

tf.__version__

'2.4.1'

Importing the Dataset

In [2]:
dataset = pd.read_csv('ANN/Churn_Modelling.csv')
X = dataset.iloc[:, 3:-1].values #take all rows and index columns from 3 to 12, cuz the first 3 (row number, name, customer ID) have no affect on results
y = dataset.iloc[:, -1].values #take all rows of column 13 (last one) is the result we're looking for, which is whether they exited or not

print(X)
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
[1 0 1 ... 1 1 0]


In [16]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [14]:
dataset['Exited'].value_counts() #7963 cases of class 0 and 2037 cases of class 1

0    7963
1    2037
Name: Exited, dtype: int64

Encoding Categorical Data

In [3]:
le = LabelEncoder() #creating an instance of the label encoder imported above
X[:, 2] = le.fit_transform(X[:, 2]) #we are encoding column index 2 from array X, which is the gender. 

print(X[0:10,2]) #0 is female, 1 is male

[0 0 0 0 0 1 1 0 1 1]


One Hot Encoding the Geography Column

In [4]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough') #creating an instance of the ColumnTransformer and One Hot Encoding column 1 in X, which is geography.
#this will then split up geography into 3 columns, one for each country (france, germany, spain)

X = np.array(ct.fit_transform(X))

print(X[0:5, 0:3]) #column 1 is France, column 2 is Germany, column 3 is Spain

[[1.0 0.0 0.0]
 [0.0 0.0 1.0]
 [1.0 0.0 0.0]
 [1.0 0.0 0.0]
 [0.0 0.0 1.0]]


Splitting the Dataset into the Training Set and Test Set

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

Feature Scaling

In [6]:
#so here we're just scaling all our values using the following formula: z = (x - u) / s
#x is the input, u is the mean, s is the standard deviation
#https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
#we do this because models normally perform better when numerical input variables are scaled to a standard range
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Part 2 - Making our ANN

Adding EarlyStopping 

In [7]:
#creating an instance of the EarlyStopping module we imported above
#this callback allows us to stop the training if a loss condition is not met after certain epochs
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience = 40) 
#here our mode is min, because we're trying to minimize the loss. if our metric was accuracy, we'd use max instead as the mode. patience = 40 means we wait 40 epochs until it stops 

ANN Function

In [8]:
def Classifier():
  #Initializing
  classifier = tf.keras.models.Sequential()

  #Adding the hidden layers
  classifier.add(tf.keras.layers.Dense(units = 12, kernel_initializer= 'uniform', activation = 'relu')) #we have units = 12. Also initialized randomly via a normal distribution
  classifier.add(tf.keras.layers.Dropout(rate = 0.05)) #turning off a random 10% of neurons during training to reduce overfitting

  classifier.add(tf.keras.layers.Dense(units = 12, kernel_initializer= 'uniform', activation = 'relu')) #we have units = 12 (which is output dimension) from (number indepedent variables + outputs)/2 = (11 + 1)/2 = 6. Also initialized randomly via a normal distribution
  classifier.add(tf.keras.layers.Dropout(rate = 0.05)) #turning off a random 10% of neurons during training to reduce overfitting

  classifier.add(tf.keras.layers.Dense(units = 6, kernel_initializer= 'uniform', activation = 'relu')) #we have units = 6 (which is output dimension) from (number indepedent variables + outputs)/2 = (11 + 1)/2 = 6. Also initialized randomly via a normal distribution
  classifier.add(tf.keras.layers.Dropout(rate = 0.05)) #turning off a random 10% of neurons during training to reduce overfitting

  #Adding the output layer
  classifier.add(tf.keras.layers.Dense(units =1, kernel_initializer= 'uniform', activation = 'sigmoid')) #final layer is units =  1 (which is output dimension) , since we're just predicting exited or not
  #we used sigmoid function as the output layer, since sigmoid gives us the probability of the output being a 0 or 1 
  #Compiling the ANN
  classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics = ['accuracy']) #so here we're not using mse as the loss cuz its a binary classification
  #use loss = categorical_crossentropy if we have more than 2 outcomes
  #adam optimizer is usually one of the best ones for performing stochastic gradient descent  
  return classifier

In [9]:
classifier = Classifier()

# Part 3 - Training the ANN

Training the ANN on the Training Set

In [10]:
classifier.fit(x=X_train, y=y_train, batch_size = 16,epochs=300, validation_data=(X_test, y_test), callbacks=[es])
#we chose batch size = 64 because training is batches is more efficient when training, since we compare several predictions and several results into a batch

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f3b360943c8>

# Part 4 - Making our Predictions

In [11]:
#12,12,6 with 0.05 dropout and 16 batch size
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5) #just to now round the values to true or false

from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(y_test, y_pred)) #very good precision and accuracy
print(confusion_matrix(y_test,y_pred)) #only misclassified 1 which is good

              precision    recall  f1-score   support

           0       0.88      0.98      0.93      2006
           1       0.83      0.46      0.59       494

    accuracy                           0.87      2500
   macro avg       0.85      0.72      0.76      2500
weighted avg       0.87      0.87      0.86      2500

[[1959   47]
 [ 268  226]]


Use the ANN model to predict if the customer with the following informations will leave the bank:

Geography: Germany

Credit Score: 700

Gender: Female

Age: 35 years old

Tenure: 2 years

Balance: $ 117000

Number of Products: 3

Does this customer have a credit card? Yes

Is this customer an Active Member: Yes

Estimated Salary: $ 117000

In [13]:
hw_pred = classifier.predict(sc.transform([[0, 1, 0, 700, 0, 35, 3, 34300, 3, 1, 1, 117000]]))
hw_pred = (hw_pred > 0.5) #just to now round the values to true or false
print(hw_pred)

[[ True]]
