## 1. Importing Libraries

In [47]:
import numpy as np
import pandas as pd
import tensorflow as tf

## 2. Data Preprocessing

#### Import dataset

In [33]:
bankData = pd.read_csv('Churn_Modelling.csv')

In [34]:
X = bankData.iloc[:, 3:-1].values
y = bankData.iloc[:, -1].values

In [35]:
print(X)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


In [36]:
print(y)

[1 0 1 ... 1 1 0]


### Encoding Categorical Data

In [37]:
# Label encoding Gender column as it only has two categories
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
X[:, 2]

array([0, 0, 0, ..., 0, 1, 0], dtype=object)

In [40]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
X

array([[1.0, 0.0, 1.0, ..., 1, 1, 101348.88],
       [1.0, 0.0, 0.0, ..., 0, 1, 112542.58],
       [1.0, 0.0, 1.0, ..., 1, 0, 113931.57],
       ...,
       [1.0, 0.0, 1.0, ..., 0, 1, 42085.58],
       [0.0, 1.0, 0.0, ..., 1, 0, 92888.52],
       [1.0, 0.0, 1.0, ..., 1, 0, 38190.78]], dtype=object)

### Splitting the dataset into the Training set and Test set¶

In [44]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,  random_state = 0)
print(X_train)

[[0.0 1.0 0.0 ... 0 0 5831.0]
 [1.0 0.0 1.0 ... 1 0 95611.47]
 [1.0 0.0 0.0 ... 1 1 42855.97]
 ...
 [1.0 0.0 1.0 ... 1 0 181429.87]
 [1.0 0.0 0.0 ... 1 1 148750.16]
 [0.0 1.0 0.0 ... 1 0 118855.26]]


### Featrue Scaling

In [42]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [43]:
print(X_train)

[[-1.76021608  1.76021608 -1.01558815 ... -1.55362351 -1.03446007
  -1.64080994]
 [ 0.56811207 -0.56811207  0.98465111 ...  0.64365658 -1.03446007
  -0.07927152]
 [ 0.56811207 -0.56811207 -1.01558815 ...  0.64365658  0.96668786
  -0.99684012]
 ...
 [ 0.56811207 -0.56811207  0.98465111 ...  0.64365658 -1.03446007
   1.4133552 ]
 [ 0.56811207 -0.56811207 -1.01558815 ...  0.64365658  0.96668786
   0.84496184]
 [-1.76021608  1.76021608 -1.01558815 ...  0.64365658 -1.03446007
   0.32500428]]


## 3. Building ANN

### Initialize ANN

In [None]:
# Sequential of Layers -
# Sequential class to build sequence of layers
ann = td.keras.models.Sequential()

### Add input and first hidden layer

In [None]:
# Use Dense class to add different Layers
# No of activation units/nodes = 6
# Activation function = Relu

# Input Layer
ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))

# First Hidden Layer
ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))

# Add output layer - 
# No of activation units/nodes = 1 - for output variables
# Activation function = Sigmoid for classification probalility
ann.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

## 4. Training ANN

In [None]:
# Compile the ANN
ann.compile(optimiser = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train the model on training set
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

## 5. Make Prediction

Use our ANN model to predict if the customer with the following informations will leave the bank:

Geography: France

Credit Score: 600

Gender: Male

Age: 40 years old

Tenure: 3 years

Balance: $ 60000

Number of Products: 2

Does this customer have a credit card ? Yes

Is this customer an Active Member: Yes

In [None]:
print(ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5)

### Predicting test results

In [None]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

### Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

## 6. Dump the model using pickle or joblib for external application use

In [None]:
# Use pickle
import pickle

# save the iris classification model as a pickle file
model_pkl_file = 'bankCustomer_Retension_pickle.pkl' 

with open(model_pkl_file, 'wb') as file:  
    pickle.dump(ann, file)
    
# Using JobLib
import joblib
filename = 'bankCustomer_Retension_joblib.sav'
joblib.dump(ann, filename)