# Artificial Neural Network

### Importing the libraries

In [79]:
# 09/01/2024
# IDE: VSC
# Section 36: Artificial Neural Networks
# Video [335, 339]

# pip install tensorflow==2.14.0

import numpy as np
import pandas as pd
import tensorflow as tf

In [80]:
tf.__version__

'2.14.0'

## Part 1 - Data Preprocessing

### Importing the dataset

In [81]:
dataset = pd.read_csv('Churn_Modelling.csv')

# we can exclude columns: RowNumber (0), CustomerId (1), Surname (2)
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

In [82]:
print(X)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


In [83]:
print(y)

[1 0 1 ... 1 1 0]


### Encoding categorical data

Label Encoding the "Gender" column

In [84]:
# there is no missing data in the dataset: encoding categorical data
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

# Transform male (1) and female (0)
# X[:, 2] --> all the rows but only the column of index 2
X[:, 2] = le.fit_transform(X[:, 2])

In [85]:
print(X)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


One Hot Encoding the "Geography" column

In [86]:
# Origin: data_preprocessing_tools

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# transformers=[('encoder', ... )] --> encoding transformation
# transformers=[ ... , OneHotEncoder() --> class that will do the transformation
# transformers=[ ..., [1]) ] --> column where you want to apply the encoding
# remainder='passthrough' --> if you do not include this parameter, you don't get the !country columns
# countries: France (1.0 0.0 0.0), Spain, Germany
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')

# it is absolutely compulsory to have X as a numpy array, this will be expected by the future ML models
X = np.array(ct.fit_transform(X))

In [87]:
print(X)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


### Splitting the dataset into the Training set and Test set

In [88]:
# Origin: data_preprocessing_tools

from sklearn.model_selection import train_test_split

# test_size: recommended size of the split
# --> 80% observation on the training set (8 customers go on the training set)
# --> 20% in the test set (2 customers go in the test set)
# random_state: fixing the seed so that we get the same split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [89]:
print(X_train)

[[0.0 1.0 0.0 ... 0 1 124749.08]
 [1.0 0.0 0.0 ... 0 0 41104.82]
 [0.0 1.0 0.0 ... 1 1 45750.21]
 ...
 [1.0 0.0 0.0 ... 1 1 92027.69]
 [1.0 0.0 0.0 ... 1 1 101168.9]
 [0.0 1.0 0.0 ... 1 0 33462.94]]


In [90]:
print(X_test)

[[1.0 0.0 0.0 ... 1 1 97057.28]
 [1.0 0.0 0.0 ... 1 0 66526.01]
 [1.0 0.0 0.0 ... 0 1 90537.47]
 ...
 [0.0 0.0 1.0 ... 0 1 161571.79]
 [0.0 1.0 0.0 ... 1 1 165257.31]
 [0.0 1.0 0.0 ... 1 1 49025.79]]


In [91]:
print(y_train)

[0 0 1 ... 1 0 1]


In [92]:
print(y_test)

[0 0 0 ... 0 0 0]


### Feature Scaling

In [93]:
# Origin: data_preprocessing_tools
# Feature Scaling is absolutely compulsory for DL (Deep Learning)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

# fit_transform for all the variables that are not dummy variables in X_train
# fit: will get the mean and standard deviation of each feature
# transform: applies the standardization formula so that all values can be in the same scale
X_train = sc.fit_transform(X_train)

# The features of the test set need to be scaled by the same scaler that was used on the training set
X_test = sc.transform(X_test)

## Part 2 - Building the ANN

### Initializing the ANN

In [94]:
# ANN (Artificial Neural Network)
# neurons here: [CreditScore, Geography, Gender, ... , EstimatedSalary]
ann = tf.keras.models.Sequential()

### Adding the input layer and the first hidden layer

In [95]:
# units=6 --> 6 neurons (Hyperparameter)
# activation='relu' --> re_ctifier l_inear u_nit --> rectifier activation function

# activation function:
# sigmoid: yes or no, 0 or 1
# softmax: classification with >2 categories or classes to predict
# no activation function: regression, continuous real number

ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### Adding the second hidden layer

In [96]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### Adding the output layer

In [97]:
# units=1 --> only 1 output neuron is needed to encode a binary outcome
# activation='sigmoid' --> it allows to get a probability prediction
# activation='softmax' --> predicting more than a binary outcome

ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

## Part 3 - Training the ANN

### Compiling the ANN

In [98]:
# the best optimizers are the ones that can perform gradient descent
# stochastic gradient descent: it updates the weights to reduce the loss error between predictions and results

# non-binary loss function--> loss= 'categorical_crossentropy'
ann.compile(optimizer= 'adam', loss= 'binary_crossentropy', metrics= ['accuracy'])

### Training the ANN on the Training set

In [99]:
# fit method --> will train the cnn on the training set
# batch_size --> we do the comparison of trainning vs test in batches instead of 1vs1
ann.fit(X_train, y_train, batch_size=32, epochs=50)

Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x175d6735750>

## Part 4 - Making the predictions and evaluating the model

### Predicting the result of a single observation

**Homework**

Use our ANN model to predict if the customer with the following informations will leave the bank: 

Geography: France

Credit Score: 600

Gender: Male

Age: 40 years old

Tenure: 3 years

Balance: \$ 60000

Number of Products: 2

Does this customer have a credit card ? Yes

Is this customer an Active Member: Yes

Estimated Salary: \$ 50000

So, should we say goodbye to that customer ?

### Data preprocessing

In [100]:
# CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
# CreditScore and Geography flip:
# France,600,Male,40,3,60000,2,1,1,50000,?

# France --> 1.0 0.0 0.0
# Male --> 1
# credit card --> yes (1)
# active member --> yes (1)

single_prediction = [[1,0,0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]]
single_prediction = np.array(single_prediction)

print(single_prediction)

[[    1     0     0   600     1    40     3 60000     2     1     1 50000]]


In [101]:
# Scaling: we use transform (fit_transform would cause info leakage)
# The features of the single_prediction need to be scaled by the same scaler that was used on the training set
# You use transform on the single_prediction without re-fitting the scaler. Reusing the parameters learned 
# from the training set helps maintain the same scale and prevents introducing information from the
# single_prediction into the training process.
single_prediction = sc.transform(single_prediction)

print(single_prediction)

[[ 1.   -0.58 -0.57 -0.52  0.92  0.11 -0.7  -0.26  0.8   0.64  0.98 -0.87]]


In [102]:
# predict method input must be a 2D array:
ann.predict(single_prediction)



array([[0.05]], dtype=float32)

In [103]:
print('Customer leaves: ', ann.predict(single_prediction) > 0.5)

Customer leaves:  [[False]]


**Solution**

Therefore, our ANN model predicts that this customer stays in the bank!

**Important note 1:** Notice that the values of the features were all input in a double pair of square brackets. That's because the "predict" method always expects a 2D array as the format of its inputs. And putting our values into a double pair of square brackets makes the input exactly a 2D array.

**Important note 2:** Notice also that the "France" country was not input as a string in the last column but as "1, 0, 0" in the first three columns. That's because of course the predict method expects the one-hot-encoded values of the state, and as we see in the first row of the matrix of features X, "France" was encoded as "1, 0, 0". And be careful to include these values in the first three columns, because the dummy variables are always created in the first columns.

### Predicting the Test set results

In [104]:
# False == 0
print((0>1)*10)
# True == 1
print((0<1)*10)

0
10


In [105]:
# origin: logistic_regression.ipyng
# we now want to display the predicted results and the test results

y_pred = ann.predict(X_test)
# auto-transforming it into 0's (False) and 1's (True)
y_pred = (y_pred > 0.5)
# precision = 2 --> 2 decimals
np.set_printoptions(precision=2)

# reshape(len(y_pred),1) --> display vertically
# axis can take 2 values (0, 1)
# --> 0: horizontal concatenation
# --> 1: vertical concatenation

# print(np.concatenate((y_pred, y_test), 0))
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), 1))

# Left column (prediction) | Right column (test)

 1/63 [..............................] - ETA: 1s

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


### Making the Confusion Matrix

In [106]:
# origin: logistic_regression.ipyng

# scikit-learn metrics module
from sklearn.metrics import confusion_matrix, accuracy_score

# confusion matrix
'''
                        Actual value: True   Actual value: False
Predicted value: True   True (TT)            False (FT)
Predicted value: False  False (TF)           True (FF)
'''

cm = confusion_matrix(y_test, y_pred)
print(cm)
# correct predictions of the class 0: 1529 customer stays in the bank
# correct predictions of the class 1: 197  customer leaves the bank
# accuracy score = 1529+197 / 1529+197+218+56 = 1726/2000 = 0.863

accuracy_score(y_test, y_pred)

[[1517   68]
 [ 205  210]]


0.8635