In [1]:
import pandas as pd
import numpy as np

## Importing the dataset

In [134]:
data = pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [135]:
data.shape

(10000, 14)

# Data Preprocessing

### Checking for null values

In [136]:
data.isnull().any()
# No null values

RowNumber          False
CustomerId         False
Surname            False
CreditScore        False
Geography          False
Gender             False
Age                False
Tenure             False
Balance            False
NumOfProducts      False
HasCrCard          False
IsActiveMember     False
EstimatedSalary    False
Exited             False
dtype: bool

In [137]:
x = data.iloc[:,3:-1]
y = data.iloc[:,-1]

## Encoding the categorical variable

In [138]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
x['Gender'] = le.fit_transform(x['Gender'])

In [139]:
# Performing One Hot Encoding in the Geography column as there is no order relationship in this column
x = pd.get_dummies(x, drop_first = True)
x

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,1
2,502,0,42,8,159660.80,3,1,0,113931.57,0,0
3,699,0,39,1,0.00,2,0,0,93826.63,0,0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,1
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,0
9997,709,0,36,7,0.00,1,0,1,42085.58,0,0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0


## Feature Scaling


In [142]:
cols = x.columns
cols

Index(['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Geography_Germany',
       'Geography_Spain'],
      dtype='object')

In [143]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x = sc.fit_transform(x)
x = pd.DataFrame(x, columns = cols)
x

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain
0,-0.326221,-1.095988,0.293517,-1.041760,-1.225848,-0.911583,0.646092,0.970243,0.021886,-0.578736,-0.573809
1,-0.440036,-1.095988,0.198164,-1.387538,0.117350,-0.911583,-1.547768,0.970243,0.216534,-0.578736,1.742740
2,-1.536794,-1.095988,0.293517,1.032908,1.333053,2.527057,0.646092,-1.030670,0.240687,-0.578736,-0.573809
3,0.501521,-1.095988,0.007457,-1.387538,-1.225848,0.807737,-1.547768,-1.030670,-0.108918,-0.578736,-0.573809
4,2.063884,-1.095988,0.388871,-1.041760,0.785728,-0.911583,0.646092,0.970243,-0.365276,-0.578736,1.742740
...,...,...,...,...,...,...,...,...,...,...,...
9995,1.246488,0.912419,0.007457,-0.004426,-1.225848,0.807737,0.646092,-1.030670,-0.066419,-0.578736,-0.573809
9996,-1.391939,0.912419,-0.373958,1.724464,-0.306379,-0.911583,0.646092,0.970243,0.027988,-0.578736,-0.573809
9997,0.604988,-1.095988,-0.278604,0.687130,-1.225848,-0.911583,-1.547768,0.970243,-1.008643,-0.578736,-0.573809
9998,1.256835,0.912419,0.293517,-0.695982,-0.022608,0.807737,0.646092,-1.030670,-0.125231,1.727904,-0.573809


## Splitting into train and test set

In [144]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [145]:
x_train.shape

(8000, 11)

In [146]:
x_test.shape

(2000, 11)

In [147]:
from tensorflow import keras

In [148]:
keras.models.Sequential

tensorflow.python.keras.engine.sequential.Sequential

# Building an ANN

### Initializing the ANN as sequence of layers
- Creating a classifier variable as an object of sequential class, which allows to build an artificial neural network
- This helps to create a sequence of layers
- Sequential is present inside keras which itself is present in tensorflow

In [178]:
classifier = keras.models.Sequential()
classifier

<tensorflow.python.keras.engine.sequential.Sequential at 0x132bc7aeeb8>

### Adding the input layer and the first hidden layer
- Using add function to add layers.
- Dense function helps to add hidden layers in sequential manner

In [179]:
classifier.add(keras.layers.Dense(units = 32, kernel_initializer = 'he_uniform', activation = 'relu', input_dim = 11))
# units = Number of neurons in hidden layer 1
# kernel_initializer = Weight initializing technique used based on different activation function
# activation = activation function used
# input_dim = number of independent features given

### Adding the more hidden layers
- Just use add funtion to add any number of layers and tune the 'units' hyperparameter according to the need

In [180]:
# Adding 2nd hidden layer
classifier.add(keras.layers.Dense(units = 32,kernel_initializer = 'he_uniform', activation = 'relu'))
# Adding a dropout layer
classifier.add(keras.layers.Dropout(0.2))
# Adding 3rd hidden layer
classifier.add(keras.layers.Dense(units = 32,kernel_initializer = 'he_uniform', activation = 'relu'))
# Adding a dropout layer
classifier.add(keras.layers.Dropout(0.2))

In [181]:
classifier.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             (None, 32)                384       
_________________________________________________________________
dense_22 (Dense)             (None, 32)                1056      
_________________________________________________________________
dropout_10 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_23 (Dense)             (None, 32)                1056      
_________________________________________________________________
dropout_11 (Dropout)         (None, 32)                0         
Total params: 2,496
Trainable params: 2,496
Non-trainable params: 0
_________________________________________________________________


### Adding the output layer
- The output layer contains the dimensions of the output
- Here the output is either 1 or 0
- So we need 1 neuron to get the final prediction( final prediction will be either 1 or 0)
- If we have multiclass classification, for example 3 class A,B,C, then after one hot encoding, A will be 1,0,0, B will be 0,1,0 and C will be 0,0,1, so we need 3 neurons in the output layer
- Rectifier activation function is not used in the output layer. Sigmoid is used which helps to give the probabilities also for a class to be 1
    - For non binary classification activation function = 'softmax'

In [182]:
classifier.add(keras.layers.Dense(units = 1,kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

# Training the ANN

### Compiling the ann
- Compiling the ann with an optimizer, loss function and metric- accuracy
- Optimizer- 'adam'-Stochastic Gradient descent to update the weights after each iteration and not after the whole batch as in gradient descent
- Loss function- weights to compute the difference between predictions and the actual values. The weights should be such that the difference is minimized. 
    - For binary classification- 'binary_crossentropy'
    - For non binary classification- 'categorical_crossentropy'
- metrics take arguments as a list

In [183]:
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

### Training the ann on the training set
- Fitting the training data- x_train, y_train
- batch_size- tells in how many batches the predicted values is compared with the original values, default values is 32
- epochs- Number of passes(iterations) to the algorithm are completed. Neural network is trained on number of epochs to improve the accuracy

In [184]:
history = classifier.fit(x_train, y_train,validation_data = (x_test, y_test), batch_size = 32, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [185]:
hist_df = pd.DataFrame(history.history)
hist_df['epochs'] = history.epoch
hist_df

Unnamed: 0,loss,accuracy,val_loss,val_accuracy,epochs
0,0.541395,0.769125,0.437603,0.8150,0
1,0.455353,0.805875,0.414722,0.8280,1
2,0.433605,0.817375,0.405734,0.8360,2
3,0.421816,0.823000,0.395340,0.8425,3
4,0.403269,0.829875,0.382317,0.8530,4
...,...,...,...,...,...
95,0.285038,0.878000,0.370318,0.8585,95
96,0.284569,0.875125,0.368935,0.8570,96
97,0.285970,0.875500,0.371754,0.8530,97
98,0.288939,0.877125,0.367652,0.8595,98


# Making the predictions and evaluating the model
- We will get the probabilty whether a person leaves the bank or not as we have used Sigmoid activation function in the output layer
- Probability is given for class 1, ie probability of a person leaving the bank
- Keeping a threshold value that if probability is > threshold value then there are chances that it is class 1
- We can choose any threshold value

In [186]:
y_pred = classifier.predict(x_test)
y_pred = y_pred > 0.6
y_pred
# False- Not leaving the bank
# True- Leaving the bank

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

### Making the confusion matix

In [187]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1536   59]
 [ 218  187]]


0.8615

- Getting an accuracy of around 86% when using Feed Forward Neural Networks

# Using machine Learning techniques

## Using Decision Trees

In [188]:
from sklearn.tree import DecisionTreeClassifier

In [189]:
dtc = DecisionTreeClassifier(criterion = 'gini', max_depth = 50, max_leaf_nodes = 10, min_samples_leaf = 5)
dtc.fit(x_train,y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=50,
                       max_features=None, max_leaf_nodes=10,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=5, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [190]:
y_pred = dtc.predict(x_test)

In [191]:
cm = confusion_matrix(y_test, y_pred)
cm


array([[1520,   75],
       [ 204,  201]], dtype=int64)

In [192]:
accuracy_score(y_test, y_pred)

0.8605

## Using Random Forest

In [193]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(x_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [194]:
y_pred = rfc.predict(x_test)

In [195]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[1519,   76],
       [ 196,  209]], dtype=int64)

In [196]:
accuracy_score(y_test, y_pred)

0.864

## Using Boosting techniques

In [197]:
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier()
abc.fit(x_train, y_train)
y_pred = abc.predict(x_test)

In [198]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[1507,   88],
       [ 181,  224]], dtype=int64)

In [199]:
accuracy_score(y_test, y_pred)

0.8655

In [200]:
from xgboost import XGBClassifier
xgb = XGBClassifier()
xgb.fit(x_train, y_train)
y_pred = xgb.predict(x_test)

  data = yaml.load(f.read()) or {}
  import pandas.util.testing as tm
  defaults = yaml.load(f)


In [201]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[1491,  104],
       [ 187,  218]], dtype=int64)

In [202]:
accuracy_score(y_test, y_pred)

0.8545

- By usng machine learning algorithms also we are getting accuracy of around 86%.
- So not always neural network will work best for us
- Sometimes we can get low accuracy also by using neural networks
- Majorly, neural networks works well with image data.