In [45]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

%matplotlib inline

In [46]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [47]:
# Importing the dataset
df = pd.read_csv("../archive/Churn_Modelling.csv")

In [48]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [49]:
# Looking at customers at a bank (They have measuring unusual churn rates: people are leaving the bank)
# 1: person stayed in the bank
# 0: person left the bank
# We have to predict models of potential people who might leave the bank!
# We will use indepedent variables to predict whether people will leave the bank

In [50]:
# Making Y-Variable
y = df['Exited']

# Including all but one column
Xs = df.iloc[:, 3:13]

In [51]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

labelencoder_X_geo = LabelEncoder()
Xs.loc[:, "Geography"] = labelencoder_X_geo.fit_transform(Xs.loc[:, "Geography"])
labelencoder_X_gender = LabelEncoder()
Xs.loc[:, "Gender"] = labelencoder_X_gender.fit_transform(Xs.loc[:, "Gender"])

onehotencoder = OneHotEncoder(categorical_features=[Xs.columns.get_loc("Geography")])
Xs = onehotencoder.fit_transform(Xs).toarray()

In [52]:
# Removing one of the dummy variables for the country
Xs = Xs[:, 1:]

In [53]:
# Splitting the data set into training and testing

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(Xs, y, test_size=0.2, random_state=0)

In [54]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)

# We are only using the transform and not the fit transform because we already fit the model
X_test = sc_X.transform(X_test)

In [55]:
# Using the apply function, for every column, we find the total amount of NULL/NA values
df.apply(lambda x: sum(x.isnull()))

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

- Recall that we are using the units 6 as the average of the input and output layer (input=1, output=11, avg=6)

In [56]:
# Initilaizing the ANN model
classifier = Sequential()

# Adding the input layer
classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=11))

# Adding the second input layer (we are still using the recitifier function)
classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu'))

# Adding the final input layer (we are apply sigmoid function bc we need a prob of the outcomes happening)
classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))


In [57]:
# Compiling the ANN: finding the best weight to use
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

- Epoch
    - An epoch describes the number of times the algorithm sees the entire data set. So, each time the algorithm has seen all samples in the dataset, an epoch has completed.

- Iteration
    - An iteration describes the number of times a batch of data passed through the algorithm. In the case of neural networks, that means the forward pass and backward pass. So, every time you pass a batch of data through the NN, you completed an iteration.

- Example: An example might make it clearer.
    - Say you have a dataset of 10 examples (or samples). You have a batch size of 2, and you've specified you want the algorithm to run for 3 epochs.
    - Therefore, in each epoch, you have 5 batches (10/2 = 5). Each batch gets passed through the algorithm, therefore you have 5 iterations per epoch. Since you've specified 3 epochs, you have a total of 15 iterations (5 * 3 = 15) for training.
    
- Batch size defines number of samples that going to be propagated through the network.
    - For instance, let's say you have 1050 training samples and you want to set up batch_size equal to 100. Algorithm takes first 100 samples (from 1st to 100th) from the training dataset and trains network. Next it takes second 100 samples (from 101st to 200th) and train network again. We can keep doing this procedure until we will propagate through the networks all samples. The problem usually happens with the last set of samples. In our example we've used 1050 which is not divisible by 100 without remainder. The simplest solution is just to get final 50 samples and train the network.

In [58]:
# Fit the model in the training set
# our batch size is 10 meaning we will be training 10 of the together (using the average) for 
# all the points 100 times!

classifier.fit(X_train, y_train, batch_size=10, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1a1627a400>

In [59]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [60]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1520,   75],
       [ 195,  210]])

In [61]:
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

total = tn+fp+fn+tp
accuracy = (tn+tp)/total
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1_score = 2*precision*recall/(precision+recall)


print(
    "true pos: {0}\n"
    "false pos: {1}\n"
    "true neg: {2}\n"
    "false neg: {3}\n".format(tp, fp, tn, fn))

print("""
Out of {0} reviews, the model got {1} correct,
Accuacy is: {2:.2f}%
Precision is: {3:.2f}%
Recall is: {4:.2f}%
F1 Score is: {5:.2f}%""".format(total, tn+tp, accuracy, precision, recall, f1_score))

true pos: 210
false pos: 75
true neg: 1520
false neg: 195


Out of 2000 reviews, the model got 1730 correct,
Accuacy is: 0.86%
Precision is: 0.74%
Recall is: 0.52%
F1 Score is: 0.61%


In [62]:
"""
Predict for:
Geography = France
Credit score = 600
Gender = male
Age = 4
Tenure = 3
Balance = 60000a
# of Product = 2
Has Credit Card = Yes
Is Active Member = Yes
Estimated Salary = 50000
"""
new_pred = classifier.predict(np.array[[]])

TypeError: 'builtin_function_or_method' object is not subscriptable

In [None]:
df.columns

In [None]:
valdict = {'Geography'}
DataFrame()