In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Flatten

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
url='https://raw.githubusercontent.com/Yashchandel/Bank_users/master/Customer_Churn_Modelling.csv'
dataset=pd.read_csv(url)
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


Now we see that row number and customerId is irrelevent feature for predicting user behaviour

In [4]:
X=dataset.drop(labels=['RowNumber','CustomerId','Surname','Exited'],axis=1)
Y=dataset['Exited']

In [5]:
X.head(10)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1
5,645,Spain,Male,44,8,113755.78,2,1,0,149756.71
6,822,France,Male,50,7,0.0,2,1,1,10062.8
7,376,Germany,Female,29,4,115046.74,4,1,0,119346.88
8,501,France,Male,44,4,142051.07,2,0,1,74940.5
9,684,France,Male,27,2,134603.88,1,1,1,71725.73


In [6]:
Y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

Converting 'Geography'(categoriacl feature) into one_hot encoding

In [7]:
X=pd.get_dummies(X,columns=['Geography'])
X.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,619,Female,42,2,0.0,1,1,1,101348.88,1,0,0
1,608,Female,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,Female,42,8,159660.8,3,1,0,113931.57,1,0,0
3,699,Female,39,1,0.0,2,0,0,93826.63,1,0,0
4,850,Female,43,2,125510.82,1,1,1,79084.1,0,0,1


Converting 'Gender' into binary labels

In [8]:
from sklearn.preprocessing import LabelEncoder
label=LabelEncoder()
X['Gender']=label.fit_transform(X['Gender'])
X.head(10)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,1,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,1,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0,1
5,645,1,44,8,113755.78,2,1,0,149756.71,0,0,1
6,822,1,50,7,0.0,2,1,1,10062.8,1,0,0
7,376,0,29,4,115046.74,4,1,0,119346.88,0,1,0
8,501,1,44,4,142051.07,2,0,1,74940.5,1,0,0
9,684,1,27,2,134603.88,1,1,1,71725.73,1,0,0


**Feature Standardization**


In [9]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=0)

In [10]:
X_train

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
493,531,0,47,6,0.00,1,0,0,194998.34,1,0,0
6839,724,1,36,6,94615.11,2,1,1,10627.21,0,1,0
170,484,0,29,4,130114.39,1,1,0,164017.89,1,0,0
4958,655,1,37,6,109093.41,2,1,0,1775.52,1,0,0
4271,465,1,33,5,0.00,2,0,1,78698.09,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
6727,745,0,36,9,0.00,1,1,0,19605.18,1,0,0
4674,850,1,41,1,176958.46,2,0,1,125806.30,0,1,0
6399,601,1,48,9,163630.76,1,0,1,41816.49,0,1,0
872,722,0,35,2,163943.89,2,1,1,15068.18,1,0,0


In [11]:
from sklearn.preprocessing import scale

In [12]:
X_train=scale(X_train)

In [13]:
X_train

array([[-1.24021723, -1.09665089,  0.77986083, ...,  0.998002  ,
        -0.57812007, -0.57504086],
       [ 0.75974873,  0.91186722, -0.27382717, ..., -1.002002  ,
         1.72974448, -0.57504086],
       [-1.72725557, -1.09665089, -0.9443559 , ...,  0.998002  ,
        -0.57812007, -0.57504086],
       ...,
       [-0.51484098,  0.91186722,  0.87565065, ..., -1.002002  ,
         1.72974448, -0.57504086],
       [ 0.73902369, -1.09665089, -0.36961699, ...,  0.998002  ,
        -0.57812007, -0.57504086],
       [ 0.95663657,  0.91186722, -1.32751517, ..., -1.002002  ,
        -0.57812007,  1.73900686]])

In [14]:
X_test=scale(X_test)

In [15]:
X_test

array([[-1.95320227, -1.09333878,  0.07734873, ..., -1.00601811,
         1.72058015, -0.56888201],
       [ 0.64884591, -1.09333878, -1.42183934, ..., -1.00601811,
         1.72058015, -0.56888201],
       [ 0.13460714,  0.91462959,  2.32613085, ..., -1.00601811,
        -0.58119931,  1.75783377],
       ...,
       [-1.19212889,  0.91462959,  0.17104799, ...,  0.99401789,
        -0.58119931, -0.56888201],
       [ 1.10137603, -1.09333878,  1.48283756, ..., -1.00601811,
        -0.58119931,  1.75783377],
       [ 0.97795873, -1.09333878, -0.76594456, ..., -1.00601811,
        -0.58119931,  1.75783377]])

In [16]:
np.mean(X_train,axis=0)

array([ 7.10542736e-17, -2.04281037e-17, -3.27848859e-16, -1.24344979e-17,
        6.21724894e-18, -1.90958360e-17,  5.86197757e-17, -4.44089210e-19,
       -3.37507799e-17,  7.10542736e-18,  8.92619312e-17,  3.41948692e-17])

In [17]:
np.mean(X_test,axis=0)

array([-3.53495011e-16, -1.06581410e-17, -1.76747506e-16,  1.02140518e-16,
        1.91846539e-16,  1.59872116e-16, -9.41469125e-17,  0.00000000e+00,
       -2.38919995e-16, -5.68434189e-17,  4.08562073e-17,  3.01980663e-17])

In [18]:
np.std(X_train,axis=0)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [19]:
np.std(X_test,axis=0)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

**Build ANN**

In [20]:
model=Sequential()
model.add(Dense(X.shape[1],activation='relu',input_dim=X.shape[1]))
model.add(Dense(128,activation='relu'))
model.add(Dense(1,activation='sigmoid'))

In [21]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [22]:
model.fit(X_train,Y_train.to_numpy(),batch_size=1,epochs=10,verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f558fb82fd0>

In [23]:
model.evaluate(X_test,Y_test,batch_size=1)



[0.3518204689025879, 0.8560000061988831]