# Import Packages

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
# Checking the tensorflow version installed in your device

print(tf.__version__)

2.3.1


# Exploiratory Data Analysis

In [3]:
data = pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
print("The shape of the dataset is:", data.shape)

The shape of the dataset is: (10000, 14)


Dropping the columns that are not useful.

In [5]:
x = data.drop(labels=['RowNumber', 'CustomerId', 'Surname', 'Exited'], axis=1)
y = data['Exited']

In [6]:
x.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


# Data Preprocessing

In [7]:
from sklearn.preprocessing import LabelEncoder

In [8]:
# This step is not necessary. This is just for demonstration

label = LabelEncoder()
x['Geography'] = label.fit_transform(x['Geography'])
x['Gender'] = label.fit_transform(x['Gender'])
x.head(10)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,0,42,2,0.0,1,1,1,101348.88
1,608,2,0,41,1,83807.86,1,0,1,112542.58
2,502,0,0,42,8,159660.8,3,1,0,113931.57
3,699,0,0,39,1,0.0,2,0,0,93826.63
4,850,2,0,43,2,125510.82,1,1,1,79084.1
5,645,2,1,44,8,113755.78,2,1,0,149756.71
6,822,0,1,50,7,0.0,2,1,1,10062.8
7,376,1,0,29,4,115046.74,4,1,0,119346.88
8,501,0,1,44,4,142051.07,2,0,1,74940.5
9,684,0,1,27,2,134603.88,1,1,1,71725.73


Here, we are converting categorical features to numerical value using one-hot encoding. Because this way we get numerical values that can be compared but categorical features cannot be compared that way. <br>
We need not convert 'Gender' column using one-hot encoding because the it consists of only two unique vlaues and hence can be represented as binary digits 0 and 1.

In [9]:
x = pd.get_dummies(x, drop_first=True, columns=['Geography'])
x.head(10)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_1,Geography_2
0,619,0,42,2,0.0,1,1,1,101348.88,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,1
5,645,1,44,8,113755.78,2,1,0,149756.71,0,1
6,822,1,50,7,0.0,2,1,1,10062.8,0,0
7,376,0,29,4,115046.74,4,1,0,119346.88,1,0
8,501,1,44,4,142051.07,2,0,1,74940.5,0,0
9,684,1,27,2,134603.88,1,1,1,71725.73,0,0


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [11]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.2)

In [12]:
print("The shape of x_train", x_train.shape)
print("The shape of x_test", x_test.shape)
print("The shape of y_train", y_train.shape)
print("The shape of y_test", y_test.shape)

The shape of x_train (2000, 11)
The shape of x_test (8000, 11)
The shape of y_train (2000,)
The shape of y_test (8000,)


In [13]:
normalizer = StandardScaler()
x_train = normalizer.fit_transform(x_train)
x_test = normalizer.fit_transform(x_test)
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [14]:
x_train[0]

array([-1.0570066 , -1.11114215, -1.6124297 ,  0.67942324,  1.02014191,
       -0.90761583,  0.62593007, -1.02634696,  0.34570972, -0.59043854,
        1.71377008])

# Train Model

In [15]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense

In [16]:
model = Sequential()
model.add(Dense(x_train.shape[1], input_dim=x_train.shape[1], activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [17]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [18]:
model.fit(x_train, y_train, epochs=40, batch_size=15, verbose=1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7fe9600eba90>

# Test Model

In [19]:
y_predicted = model.predict_classes(x_test)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [20]:
model.evaluate(x_test, y_test)



[0.36879634857177734, 0.8457499742507935]

Our model gave the accuracy of 84.5%. Let's check the accuracy using sklearn.accuracy_score. This gives the same result as that given by tensorflow.

In [22]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [23]:
accuracy_score(y_test, y_predicted)

0.84575

In [24]:
confusion_matrix(y_test, y_predicted)

array([[6067,  317],
       [ 917,  699]])