In [None]:
#Artificial Neural Network

In [None]:
#Installing Theano (Open source numerical computation library)

In [None]:
#Installing TensorFlow (Open source numerical computation library)

In [None]:
#Installing Keras (library based on Theano and TensorFlow. 
# Like you use Scikit-Learn to build ML models, you use Keras to build ANN)

# 1. Data Preprocessing

In [None]:
#Part 1 - Data Preprocessing

# Importing the libraries
import numpy as np
np.set_printoptions(threshold=np.inf)
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')

In [None]:
dataset.head()

The are two categorical variables that need to be encoded : Gender and Geography

In [None]:
dataset.info()

In [None]:
dataset.describe()

In [None]:
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [None]:
print(X[:10,:], '\n')

print(y[:10])

In [None]:
# Encoding categorical data

# Encoding the Independent Variable
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Country
labelencoder_X = LabelEncoder()
X[:, 1] = labelencoder_X.fit_transform(X[:, 1])

# Gender
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])

# Giving ordinal feature to our variables
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 1:]

In [None]:
print(X[:10,:], '\n')
print(y[:10])

In [None]:
X.shape

In [None]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
# Feature Scaling (very important)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# 2. Creating our ANN

In [None]:
#Import Keras library and packages

import keras
import sys
from keras.models import Sequential #to initialize NN
from keras.layers import Dense #used to create layers in NN
from keras.layers import Dropout

In [None]:
#Initialising the ANN - Defining as a sequence of layers or a Graph

classifier = Sequential()

In [None]:
#Adding the input layer 
#units - number of nodes to add to the hidden layer. 
#Tip: Average of nodes in the input layer and the number of nodes in the output layer. 11+2/2 = 6
#kernel_initializer - randomly initialize the weight with small numbers close to zero, according to uniform distribution.
#activation - Activation function. 
#input_dim - number of nodes in the input layer, that our hidden layer should be expecting
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11 ))
classifier.add(Dropout(p=0.1))

In [None]:
#Adding Second hidden layer
# There is no need to specify the input dimensions since our network already knows.
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(p=0.1))

In [None]:
#Adding Output layer
# There is no need to specify the input dimensions since our network already knows.
#Units - one node in the output layer
#activation - If there are more than two categories in the output we would use the softmax
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

In [None]:
#Stochastic Gradient Descent
#Compiling the ANN
#optimizer - algorithm to use to find the best weights that will make our system powerful
#loss - Loss function within our optimizer algorithm
#metric - criteria to evaluate the model

classifier.compile(optimizer = 'adam',loss= "binary_crossentropy",metrics=["accuracy"])

In [None]:
#Fitting the ANN to the Training Set
#batch size : number of observations after which we update the weights
#nb_epoch : How many times you train your model
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100  )

# 3. Making Predictions

In [None]:
#Predicting the Test set results
y_pred  = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

y_pred

In [None]:
#Making the COnfusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

cm

In [None]:
accuracy = (1518+196)/(2000)
accuracy

# 4. Predicting a single observation

In [None]:
"""Predict if the customer with the following informations will leave the bank:
Geography: France
Credit Score: 600
Gender: Male
Age: 40
Tenure: 3
Balance: 60000
Number of Products: 2
Has Credit Card: Yes
Is Active Member: Yes
Estimated Salary: 50000"""

#use sc.transform to scale our data. Remember above we created the method sc
new_prediction = classifier.predict(sc.transform(np.array([[0.0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])))
new_prediction = (new_prediction > 0.5)

new_prediction

# 5. Evaluating, Improving and Tuning the ANN

In [None]:
#We need to optimize the way we improve our models due to the bias-variance trade-off

#Import Keras library and packages

import keras
import sys
from keras.models import Sequential #to initialize NN
from keras.layers import Dense #used to create layers in NN

#Evaluating the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

def build_classifier():
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11 ))
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam',loss= "binary_crossentropy",metrics=["accuracy"])
    
#fit our model to the traing data using KerasClassifier
classifier = KerasClassifier(build_fn = build_classifier,  batch_size = 10, epochs = 100  )

#estimator - object to fit the data
#X - data to fit
#y - Target variable to try to predict
#cv - number of train test folds
#n_jobs - number of CPUs to use to do the computation. -1 means 'all CPUs'
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)

In [None]:
mean = accuracies.mean()
variance = accuracies.std()

In [None]:
# Tuning the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense

def build_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier
classifier = KerasClassifier(build_fn = build_classifier)


parameters = {'batch_size': [25, 32],
              'epochs': [100, 500],
              'optimizer': ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_