In [48]:
import tensorflow as tf
tf.reset_default_graph()

In [49]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.cross_validation import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn import model_selection
#from textblob import TextBlob, Word
from nltk.stem.snowball import SnowballStemmer
%matplotlib inline
from importlib import reload
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import (accuracy_score, f1_score,average_precision_score, confusion_matrix,average_precision_score, precision_score, recall_score, roc_auc_score)
import warnings


## Read Data

In [50]:
data = pd.read_csv('bank.csv')#, encoding = 'unicode_escape')

In [51]:
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [52]:
data.shape

(10000, 14)

In [53]:
data.dtypes

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [54]:
bankdata = data.drop(['CustomerId', 'RowNumber', 'Surname'], axis=1)

In [55]:
bankdata.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Distinguish feature and target set

In [56]:
x=bankdata.drop(["Exited"], axis=1)
y=bankdata["Exited"]

In [57]:
x.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [58]:
y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

### Convert Geo and Gender to encoded columns

In [59]:
x.dtypes

CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
dtype: object

In [60]:
x=pd.get_dummies(x, prefix='Geo', columns=['Geography'])

In [61]:
x.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geo_France,Geo_Germany,Geo_Spain
0,619,Female,42,2,0.0,1,1,1,101348.88,1,0,0
1,608,Female,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,Female,42,8,159660.8,3,1,0,113931.57,1,0,0
3,699,Female,39,1,0.0,2,0,0,93826.63,1,0,0
4,850,Female,43,2,125510.82,1,1,1,79084.1,0,0,1


In [62]:
x=pd.get_dummies(x, prefix='Gen', columns=['Gender'])

In [63]:
x.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geo_France,Geo_Germany,Geo_Spain,Gen_Female,Gen_Male
0,619,42,2,0.0,1,1,1,101348.88,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,1,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,0,0,1,0
3,699,39,1,0.0,2,0,0,93826.63,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,1,1,0


### Check the normality of data 

In [64]:
#import seaborn as sns
#sns.pairplot(x, diag_kind='kde')

## Split into test and train data

In [65]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)

In [66]:
x_train.shape

(7000, 13)

In [67]:
y_test.shape

(3000,)

## Build the initial model

# Initialize Sequential model
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Reshape((13,),input_shape=(13,)))

#Normalize the data
model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.Dense(60, activation='sigmoid'))
model.add(tf.keras.layers.Dense(40, activation='sigmoid'))
model.add(tf.keras.layers.Dense(20, activation='sigmoid'))
model.add(tf.keras.layers.Dense(10, activation='sigmoid'))

#Add Dense Layer which provides 1 Outputs after applying softmax
model.add(tf.keras.layers.Dense(1, activation='softmax'))

sgd_optimizer = tf.keras.optimizers.SGD(lr=0.03)
#Compile the model
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

In [68]:
#Initialize Sequential model
#tf.reset_default_graph()
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Reshape((13,),input_shape=(13,)))
model.add(tf.keras.layers.BatchNormalization())

#Add hidden layers
model.add(tf.keras.layers.Dense(60, activation='sigmoid'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(60, activation='sigmoid'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(60, activation='sigmoid'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

#Create optimizer with non-default learning rate
sgd_optimizer = tf.keras.optimizers.SGD(lr=0.03)

#Compile the model
model.compile(optimizer=sgd_optimizer, loss='binary_crossentropy', metrics=['accuracy'])

#Model Summary
model.summary()

#Train the model
model.fit(x_train, y_train, validation_data=(x_test, y_test),epochs=10,batch_size=10)

pred = model.predict(x_test)

print("Accuracy Score using threshold of .5 using round function:", accuracy_score(y_test, pred.round()))
print(confusion_matrix(y_test, pred.round()))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 13)                0         
_________________________________________________________________
batch_normalization (BatchNo (None, 13)                52        
_________________________________________________________________
dense (Dense)                (None, 60)                840       
_________________________________________________________________
batch_normalization_1 (Batch (None, 60)                240       
_________________________________________________________________
dense_1 (Dense)              (None, 60)                3660      
_________________________________________________________________
batch_normalization_2 (Batch (None, 60)                240       
_________________________________________________________________
dense_2 (Dense)              (None, 60)                3660      
__________

In [69]:
#tf.reset_default_graph()
from keras.models import Sequential
from keras.layers import Dense

clf = Sequential()

clf.add(Dense(units = 60, kernel_initializer = "uniform", activation= "relu", input_dim=13))
clf.add(Dense(units = 60, kernel_initializer = "uniform", activation= "relu"))
clf.add(Dense(units = 60, kernel_initializer = "uniform", activation= "relu"))
clf.add(Dense(units = 1, kernel_initializer = "uniform", activation= "sigmoid"))

clf.compile(optimizer="adam", loss = "binary_crossentropy", metrics=["accuracy"])

clf.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size = 20, epochs=20)

pred = clf.predict(x_test)
print("Accuracy Score using threshold of .5 using round function:", accuracy_score(y_test, pred.round()))
print(confusion_matrix(y_test, pred.round()))

Train on 7000 samples, validate on 3000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy Score using threshold of .5 using round function: 0.7986666666666666
[[2396    0]
 [ 604    0]]
