# Artifical Neural Network

Importing the Libraries

In [61]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [62]:
tf.__version__

'2.17.0'

## Part 1 - Data Preprocessing

Importing the dataset

In [63]:
dataset = pd.read_csv('Churn_Modelling.csv')

# The first  3 lines are irrelavant data so we avoid that all
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

In [64]:
print(X)
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
[1 0 1 ... 1 1 0]


### Encoding the categorical data

Label encoding the "Gender" column

In [65]:
# Changing the gender to 0s and 1s

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,2] = le.fit_transform(X[:,2])

print(X[:,2])

[0 0 0 ... 0 1 0]


One Hot encoding the "Geography" column

In [66]:
from sklearn.compose import ColumnTransformer

# One hot encoding class
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers= [('encoder',OneHotEncoder(),[1])],remainder='passthrough')

X = np.array(ct.fit_transform(X))

# After one hot encoding the dummy variable moves to the index 0
print(X)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


Splitting data into training and test set

In [67]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [68]:
print(X_train)
print(X_test)
print(y_train)
print(y_test)

[[0.0 0.0 1.0 ... 1 0 163830.64]
 [0.0 1.0 0.0 ... 1 1 57098.0]
 [1.0 0.0 0.0 ... 1 0 185630.76]
 ...
 [1.0 0.0 0.0 ... 1 0 181429.87]
 [0.0 0.0 1.0 ... 1 1 148750.16]
 [0.0 1.0 0.0 ... 1 0 118855.26]]
[[0.0 1.0 0.0 ... 1 1 192852.67]
 [1.0 0.0 0.0 ... 1 0 128702.1]
 [0.0 0.0 1.0 ... 1 1 75732.25]
 ...
 [0.0 0.0 1.0 ... 1 0 141533.19]
 [0.0 1.0 0.0 ... 1 1 11276.48]
 [0.0 1.0 0.0 ... 1 0 192950.6]]
[0 0 0 ... 0 0 1]
[0 1 0 ... 0 0 0]


Feature Scaling

In [69]:
# Feature scaling is cumpulsory for DEEP LEARNING for all data
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train )
X_test = sc.transform(X_test )

# feature scaling only applied after train test split

print(X_train)
print()
print(X_test)

[[-1.01460667 -0.5698444   1.74309049 ...  0.64259497 -1.03227043
   1.10643166]
 [-1.01460667  1.75486502 -0.57369368 ...  0.64259497  0.9687384
  -0.74866447]
 [ 0.98560362 -0.5698444  -0.57369368 ...  0.64259497 -1.03227043
   1.48533467]
 ...
 [ 0.98560362 -0.5698444  -0.57369368 ...  0.64259497 -1.03227043
   1.41231994]
 [-1.01460667 -0.5698444   1.74309049 ...  0.64259497  0.9687384
   0.84432121]
 [-1.01460667  1.75486502 -0.57369368 ...  0.64259497 -1.03227043
   0.32472465]]

[[-1.01460667  1.75486502 -0.57369368 ...  0.64259497  0.9687384
   1.61085707]
 [ 0.98560362 -0.5698444  -0.57369368 ...  0.64259497 -1.03227043
   0.49587037]
 [-1.01460667 -0.5698444   1.74309049 ...  0.64259497  0.9687384
  -0.42478674]
 ...
 [-1.01460667 -0.5698444   1.74309049 ...  0.64259497 -1.03227043
   0.71888467]
 [-1.01460667  1.75486502 -0.57369368 ...  0.64259497  0.9687384
  -1.54507805]
 [-1.01460667  1.75486502 -0.57369368 ...  0.64259497 -1.03227043
   1.61255917]]


## Part 2 - Building the ANN

Initializing the  ANN

In [70]:
# In the new TF-2  , the keras has been merged
ann = tf.keras.models.Sequential()

Adding i/p layer and First hidden Layer

In [71]:
# relu means Rectifier Acttivation Function

ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))

Adding the 2nd hidden layer

In [72]:
# Both the 1st and 2nd hidden layet have 6 neurons in it
# We can change the  hyperparameter(unit) vlaue to get better accuracy

ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))

Adding Output layer

In [73]:
# sigmoid activation func gives the probablility
ann.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

## Part 3 - Training the ANN

Compilling the ANN

In [74]:
# compile have optimizer, loss and metrics
# loss function is also called cost function

# adam - recommended for stochastic gradient descent
# binary_crossentropy - binary output so , if categorical , categorical_crossentropy is used

ann.compile(optimizer= 'adam', loss= 'binary_crossentropy', metrics= ['accuracy'])

Training the ANN on the Training set

In [75]:
# recommended batch size 32
# epochs  need to be some large number
ann.fit(X_train, y_train, batch_size= 32, epochs = 100)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6033 - loss: 0.6634
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7902 - loss: 0.5113
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8111 - loss: 0.4507
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8120 - loss: 0.4384
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8173 - loss: 0.4207
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8224 - loss: 0.4134
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8309 - loss: 0.3977
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8327 - loss: 0.3938
Epoch 9/100
[1m250/250[0m [32

<keras.src.callbacks.history.History at 0x79f228e49390>

## Part 4 - Making Prediction and evaluating the model

Predicting the result of single Observation


Use our ANN model to predict if the customer with the following informations will leave the bank:

Geography: France

Credit Score: 600

Gender: Male

Age: 40 years old

Tenure: 3 years

Balance: $ 60000

Number of Products: 2

Does this customer have a credit card? Yes

Is this customer an Active Member: Yes

Estimated Salary: $ 50000


In [76]:
# scaling need to be same so sc.transform is used
# dont use fit transform that  gives mean and sd

print(ann.predict(sc.transform([[1,0,1,600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5 )

# This customer wont leave the bank

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[[False]]


Predicting Test set Result

In [77]:
y_pred = ann.predict(X_test)

# probability above 0.5 only the person leave the bank
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


Confusion Matrics

In [78]:
from sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1514   81]
 [ 201  204]]


0.859

In [79]:
# 1514 correct pred that customer stays the bank
# 204 correct pred the customer leaves the bank
# 201 incorrect pred that customer stays the bank
# 81 incorrect pred that customer leaves the bank