In [3]:
import pandas as pd

In [4]:
import numpy as np

In [5]:
import matplotlib as plt

## Exploratory Data Analysis

In [6]:
df=pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


Removing the unnecesarry columns.

In [7]:
df.drop(columns=['RowNumber','CustomerId','Surname'], inplace=True)

In [8]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


Preparing the dependent and independent features

In [9]:
X=df.drop(columns=['Exited'])

In [10]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [11]:
y=df['Exited']

In [12]:
y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

One hot encoding for categorical features.

In [13]:
geography=pd.get_dummies(X['Geography'],drop_first=True)
geography.head()

Unnamed: 0,Germany,Spain
0,0,0
1,0,1
2,0,0
3,0,0
4,0,1


In [14]:
gender=pd.get_dummies(X['Gender'], drop_first=True)
gender.head()

Unnamed: 0,Male
0,0
1,0
2,0
3,0
4,0


In [15]:
X=pd.concat([X,geography,gender],axis=1)

In [16]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,France,Female,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,1,0


In [17]:
X=X.drop(['Geography','Gender'],axis=1)

In [18]:
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,1,0


In [19]:
from sklearn.model_selection import train_test_split

In [20]:
X_train,X_test,y_train,y_split=train_test_split(X,y,test_size=0.3,random_state=50)

In [21]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()

In [22]:
X_train=sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)

### Implementing Artificial Neural Network

Importing the required libraries.

In [23]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU,PReLU,ELU
from keras.layers import Dropout

Sequential is creating an empty neural network, later each input/hidden/output layer will be added.

The First hidden layer has 10 neurons (units = 10, the initialization technique is 'he_uniform', the activation function is 'relu'. Input_dim is the input layer and it is equal to the number of features = 11.

In [24]:
classifier=Sequential()
classifier.add(Dense(units = 10, kernel_initializer= 'he_uniform',activation='relu',input_dim = 11))

The output layer comes with sigmoid function as it is binary classification problem.

In [25]:
#classifier.add(Dense(units = 10, kernel_initializer= 'he_normal',activation='relu'))
classifier.add(Dense(units = 20, kernel_initializer= 'he_normal',activation='relu'))
classifier.add(Dense(units = 15, kernel_initializer= 'he_normal',activation='relu'))
classifier.add(Dense(units = 1, kernel_initializer= 'glorot_uniform',activation='sigmoid'))

In [26]:
classifier.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

Adam optimizer will be used in our ANN. batch size for each epoch is 10 and we have 100 epochs.

In [27]:
model_history=classifier.fit(X_train, y_train, validation_split=0.33, batch_size=10, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [28]:
y_pred=classifier.predict(X_test)
y_pred=(y_pred > 0.5)

The output of y_pred is probabilities of classes, so we apply a filter to get the them true and false.

In [29]:
y_pred

array([[ True],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [30]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_split,y_pred)

The confusion matrix and the accuracy of the model.

In [31]:
cm

array([[2270,  131],
       [ 324,  275]], dtype=int64)

In [32]:
from sklearn.metrics import accuracy_score
score = accuracy_score(y_split,y_pred)
print(score)

0.8483333333333334


# Keras Tuner

In [114]:
from kerastuner.tuners import RandomSearch
from tensorflow.keras import layers

In [115]:
def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int('num_layers', 2, 20)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='binary_crossentropy',
        metrics=['accuracy'])
    return model

In [116]:
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='project',
    project_name='churn_modelling')

In [117]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [118]:
tuner.search(X_train, y_train,
             epochs=5,
             validation_data=(X_test, y_split))

Trial 5 Complete [00h 02m 11s]
val_accuracy: 0.8574444254239401

Best val_accuracy So Far: 0.8594444592793783
Total elapsed time: 00h 05m 53s
INFO:tensorflow:Oracle triggered exit


In [119]:
tuner.results_summary()

Results summary
Results in project\churn_modelling
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
num_layers: 7
units_0: 64
units_1: 96
learning_rate: 0.01
units_2: 32
units_3: 32
units_4: 32
units_5: 32
units_6: 32
Score: 0.8594444592793783
Trial summary
Hyperparameters:
num_layers: 18
units_0: 480
units_1: 320
learning_rate: 0.0001
units_2: 192
units_3: 320
units_4: 480
units_5: 384
units_6: 320
units_7: 160
units_8: 192
units_9: 320
units_10: 96
units_11: 192
units_12: 448
units_13: 352
units_14: 224
units_15: 416
units_16: 64
units_17: 384
Score: 0.8574444254239401
Trial summary
Hyperparameters:
num_layers: 14
units_0: 384
units_1: 384
learning_rate: 0.01
units_2: 160
units_3: 64
units_4: 192
units_5: 224
units_6: 512
units_7: 32
units_8: 32
units_9: 32
units_10: 32
units_11: 32
units_12: 32
units_13: 32
Score: 0.834333340326945
Trial summary
Hyperparameters:
num_layers: 18
units_0: 192
units_1: 160
learning_rate: 0.01
units_2:

## Hyperparameter Tuning

In [169]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.activations import relu, sigmoid
from keras.layers import Dense, Activation, Embedding, Flatten, LeakyReLU, BatchNormalization, Dropout

In [170]:
def create_model(layers, activation):
    model = Sequential()
    for i, nodes in enumerate(layers):
        if i==0:
            model.add(Dense(nodes,input_dim=X_train.shape[1]))
            model.add(Activation(activation))
            model.add(Dropout(0.3))
        else:
            model.add(Dense(nodes))
            model.add(Activation(activation))
            model.add(Dropout(0.3))
            
    model.add(Dense(units = 1, kernel_initializer= 'glorot_uniform', activation = 'sigmoid')) # Note: no activation beyond this point
    
    model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])
    return model

In [171]:
model = KerasClassifier(build_fn=create_model, verbose=0)

In [172]:
layers=[(20,), (20,40), (45,30,15)]
activations = ['relu','sigmoid']
param_grid = dict(layers=layers, activation=activations, batch_size = [128, 256], epochs=[30])
grid = GridSearchCV(estimator=model, param_grid=param_grid,cv=5)


In [173]:
grid_result = grid.fit(X_train, y_train)

[grid_result.best_score_,grid_result.best_params_]


[0.8557142853736878,
 {'activation': 'relu',
  'batch_size': 128,
  'epochs': 30,
  'layers': (45, 30, 15)}]

In [174]:
y_pred=grid.predict(X_test)
y_predict = (y_pred > 0.5)



In [175]:
cm = confusion_matrix(y_predict,y_split)

In [176]:
cm

array([[2322,  341],
       [  79,  258]], dtype=int64)

In [177]:
score=accuracy_score(y_predict,y_split)

In [178]:
score

0.86