In [1]:
#Importing libraries 

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 

In [2]:
dataset = pd.read_csv('Churn_Modelling.csv')

In [3]:
dataset.head() 

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
X = dataset.iloc[:, 3:13]   #features 
y = dataset.iloc[:, -1]     #target variable

In [5]:
geography = pd.get_dummies(dataset['Geography'], drop_first = True)
gender = pd.get_dummies(dataset['Gender'], drop_first = True) 

In [6]:
X = pd.concat([X, geography, gender], axis = 1)

In [7]:
X = X.drop(['Geography', 'Gender'], axis = 1)

In [8]:
#Splitting the dataset into training and testing set 
from sklearn.model_selection import train_test_split 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [9]:
#Feature Scaling

from sklearn.preprocessing import StandardScaler 
sc = StandardScaler() 
X_train = sc.fit_transform(X_train) 
X_test = sc.transform(X_test) 

In [10]:
#Perform Hyperparameter optimization 

from keras.wrappers.scikit_learn import KerasClassifier 
from sklearn.model_selection import GridSearchCV 

from keras.models import Sequential 
from keras.layers import Dropout
from keras.layers import Dense, Activation, Embedding, Flatten, LeakyReLU, BatchNormalization
from keras.activations import relu, sigmoid 

In [11]:
def create_model(layers, activation):
    model = Sequential()
    for i, nodes in enumerate(layers):
        if i==0:
            model.add(Dense(nodes,input_dim=X_train.shape[1]))
            model.add(Activation(activation))
        else:
            model.add(Dense(nodes))
            model.add(Activation(activation))
    model.add(Dense(1)) # Note: no activation beyond this point
    
    model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])
    return model
    
model = KerasClassifier(build_fn=create_model, verbose=0)

  model = KerasClassifier(build_fn=create_model, verbose=0)


In [12]:
layers = [[20], [40,20], [45, 30, 15]]
#trying out three different sets 
#first one with just one layer with 20 neurons 
#second one with two layers with 40, 20 neurons respectively 
#similarly third one with three hidden layers 

activations = ['sigmoid', 'relu']
param_grid = dict(layers=layers, activation=activations, batch_size = [128,256], epochs = [30])
grid = GridSearchCV(estimator=model, param_grid = param_grid, cv = 5)

In [13]:
grid_result = grid.fit(X_train, y_train)

In [16]:
print(grid_result.best_score_, grid_result.best_params_)

0.8548750042915344 {'activation': 'relu', 'batch_size': 128, 'epochs': 30, 'layers': [45, 30, 15]}


From this, we see that the best score obtained after trying all the different values of the parameters was `0.8548` and this was obtained when we selected - 
- <b> Activation </b> = ReLU
- <b> Batch Size </b> = 128
- <b> Epochs </b> = 30 
- <b> Hidden Layers </b> = 3 
- <b> Number of neurons in the hidden layers </b> = 45, 30, 15

In [17]:
#Predicting on Test set 

pred_y = grid.predict(X_test)
y_pred = (pred_y > 0.5)



In [18]:
from sklearn.metrics import accuracy_score, confusion_matrix

print(accuracy_score(y_test, y_pred))

0.8515


In [19]:
print(confusion_matrix(y_test, y_pred))

[[1536   71]
 [ 226  167]]
