# Artificial Neural Network

### Importing the libraries

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

In [5]:
tf.__version__

'2.18.0'

## Part 1 - Data Preprocessing

### Importing the Dataset

In [6]:
dataset = pd.read_csv('./data/Churn_Modelling.csv')
X = dataset.iloc[:, 3:-1].values # Features (exclude the first 3 columns)
y = dataset.iloc[:, -1].values  # Labels

In [7]:
print(y)

[1 0 1 ... 1 1 0]


### Encoding categorical data

#### Label Encoding the "Gender" column

In [8]:
#from sklearn.preprocessing import LabelEncoder
#le = LabelEncoder()
#X[:, 2] = le.fit_transform(X[:, 2])

In [9]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

#### One Hot Encoding the "Geography" column

In [10]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

ct = ColumnTransformer(
    transformers=[
        ('encoder_location', OneHotEncoder(), [1]), # hot encode the location
        ('encoder_gender', OrdinalEncoder(), [2]) # convert the gender to a categorcial value        
    ], 
    remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [11]:
pd.DataFrame(X)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,1.0,0.0,0.0,0.0,619,42,2,0.0,1,1,1,101348.88
1,0.0,0.0,1.0,0.0,608,41,1,83807.86,1,0,1,112542.58
2,1.0,0.0,0.0,0.0,502,42,8,159660.8,3,1,0,113931.57
3,1.0,0.0,0.0,0.0,699,39,1,0.0,2,0,0,93826.63
4,0.0,0.0,1.0,0.0,850,43,2,125510.82,1,1,1,79084.1
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,1.0,0.0,0.0,1.0,771,39,5,0.0,2,1,0,96270.64
9996,1.0,0.0,0.0,1.0,516,35,10,57369.61,1,1,1,101699.77
9997,1.0,0.0,0.0,0.0,709,36,7,0.0,1,0,1,42085.58
9998,0.0,1.0,0.0,1.0,772,42,3,75075.31,2,1,0,92888.52


In [12]:
X[0]

array([1.0, 0.0, 0.0, 0.0, 619, 42, 2, 0.0, 1, 1, 1, 101348.88],
      dtype=object)

#### Splitting the dataset into the Training set and Test set

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

#### Feature Scaling

In [14]:
# Must do this for neural networking... must be applied to all features IMPORTANT!!!


In [15]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train) # only fitted to prevent info leakage
X_test = sc.transform(X_test)

In [16]:
pd.DataFrame(X_test)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-1.014607,1.754865,-0.573694,-1.091687,-0.552043,-0.368904,1.044737,0.879303,-0.921591,0.642595,0.968738,1.610857
1,0.985604,-0.569844,-0.573694,-1.091687,-1.314903,0.109617,-1.031415,0.429722,-0.921591,0.642595,-1.032270,0.495870
2,-1.014607,-0.569844,1.743090,-1.091687,0.571630,0.301026,1.044737,0.308583,-0.921591,0.642595,0.968738,-0.424787
3,0.985604,-0.569844,-0.573694,0.916013,1.416961,-0.656016,-0.339364,0.575336,-0.921591,-1.556190,-1.032270,-0.187777
4,-1.014607,1.754865,-0.573694,0.916013,0.571630,-0.081791,0.006661,1.389611,0.809503,0.642595,0.968738,0.616842
...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.985604,-0.569844,-0.573694,-1.091687,-0.263393,-1.421650,-1.377440,-1.215717,0.809503,0.642595,0.968738,1.404319
1996,0.985604,-0.569844,-0.573694,-1.091687,-0.665441,-0.368904,0.698712,-1.215717,0.809503,0.642595,-1.032270,-0.511196
1997,-1.014607,-0.569844,1.743090,0.916013,-0.747912,-0.273200,-1.377440,1.297455,0.809503,0.642595,-1.032270,0.718885
1998,-1.014607,1.754865,-0.573694,0.916013,-0.005670,-0.464608,-0.339364,1.059752,-0.921591,0.642595,0.968738,-1.545078


## Part 2 - Building the ANN

#### Initializing the ANN

In [17]:
ann = tf.keras.models.Sequential() # keras is part of tensorflow in v2, this initializes our neural network as a sequence of layers

#### Adding the input layer and the first hidden layer

In [18]:
# Units is the number of neurons in the first hidden layer (first layer are the features)
# ReLU is the rectifier activation function
# object of the dense class to create the fully connected layer (layers has the tools for adding them)

ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

#### Adding the second hidden layer

In [19]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

#### Adding the output layer

In [20]:
# dense means fully connected
# Also if had 3 categories then 3, but if binary just 1 is fine for 0/1 think hot-encoding for the output

ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))  # if categorical use softmax

## Part 3 - Train the ANN

#### Compiling the ANN

In [21]:
# Need the optimizer, loss function, and metrics
# can choose from several metrics to measure during training
# optimizer allows for stochastic gradient decent (adam)
# loss is way to compute difference (for binary, always use 'binary_crossentropy')
# if non-binary, then 'categorical_crossentropy'
# metrics are a list of ones to use

ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

I0000 00:00:1737825914.363363  146974 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6096 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2070 with Max-Q Design, pci bus id: 0000:01:00.0, compute capability: 7.5


#### Training the ANN on the Training set

In [22]:
# training
# testing
# batch size (32 is default) = how many to run and compare all at once, number of predictions
# number of epochs

ann.fit(X_train, y_train, batch_size = 32, epochs = 100) 

Epoch 1/100


I0000 00:00:1737825915.397499  147187 service.cc:148] XLA service 0x7ff48c001940 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1737825915.397698  147187 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 2070 with Max-Q Design, Compute Capability 7.5
2025-01-25 11:25:15.435339: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1737825915.572293  147187 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 60/250[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 3ms/step - accuracy: 0.4568 - loss: 0.7750 

I0000 00:00:1737825916.376462  147187 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5871 - loss: 0.6707
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8011 - loss: 0.4677 
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7981 - loss: 0.4489 
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8083 - loss: 0.4310 
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8172 - loss: 0.4219 
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8158 - loss: 0.4203 
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8224 - loss: 0.4208 
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8247 - loss: 0.4118 
Epoch 9/100
[1m250/250[0m [32m━━━━

<keras.src.callbacks.history.History at 0x7ff5b55392a0>

## Part 4 - Making the predictions and evaluating the model

#### Predicting the result of a single observation

In [23]:
# must always be a 2d array
# 1,0,0 is France, so use ct.transform on the matrix
# don't forget scaling!!! ... don't fit again
customer = ct.transform([[600, 'France','Male', 40, 3, 60000, 2, 1, 1, 50000]])

#customer = [[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]]
sc_customer = sc.transform(customer)


In [24]:
print('Probability: ', ann.predict(sc_customer)[0][0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
Probability:  0.05583287


In [25]:
print('Customer Leave: ', ann.predict(sc_customer) > 0.5)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Customer Leave:  [[False]]


#### Predicting the Test set results

In [26]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

# Left side is prediction, Right side truth
# 0 stayed, 1 left
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step  
[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


#### Making the Confusion Matrix

In [27]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1517   78]
 [ 199  206]]


0.8615