In [7]:
# Artificial Neural Network

# Importing the libraries
import numpy as np
import pandas as pd
import tensorflow as tf      # tensorflow: To build and train the Artificial Neural Network (ANN).

In [8]:
tf.__version__

'2.19.0'

In [9]:
# Part 1 - Data Preprocessing

# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:-1].values 

# dataset.iloc[:, 3:-1]: Selects all rows (:) and columns from index 3 up to (but not including) the last column.it includes columns:
# 3: CreditScore
# 4: Geography
# 5: Gender
# 6: Age
# 7: Tenure
# 8: Balance
# 9: NumOfProducts
# 10: HasCrCard
# 11: IsActiveMember
# 12: EstimatedSalary

y = dataset.iloc[:, -1].values
# dataset.iloc[:, -1]: Selects all rows and only the last column (index -1), i.e. Exited

print("X- ",X)
print("Y- ",y)

X-  [[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
Y-  [1 0 1 ... 1 1 0]


In [10]:
# Encoding categorical data
# Label Encoding the "Gender" column
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
print(X)
# Gender column (index 2 in X) is label encoded:
# 'Male' → 1, 'Female' → 0

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


In [11]:
# One Hot Encoding the "Geography" column
from sklearn.compose import ColumnTransformer
# ColumnTransformer: Allows you to apply different transformations to different columns.
from sklearn.preprocessing import OneHotEncoder
# OneHotEncoder: Performs the one-hot encoding on categorical columns.
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')

# transformers=[...]: A list of transformations to apply. Each entry is a tuple:
# 'encoder': Name of the transformer (can be anything).
# OneHotEncoder(): The transformer to apply (i.e., apply one-hot encoding).
# [1]: Apply the encoder to column index 1, which is the "Geography" column in your X.
# 🔹 remainder='passthrough':
# This tells ColumnTransformer to keep the other columns as-is, and not drop them.So it only encodes "Geography", and leaves the rest untouched.

X = np.array(ct.fit_transform(X))

# ct.fit_transform(X):
# Learns the categories (France, Spain, Germany) and transforms the column into one-hot encoded format.
# np.array(...): Converts the resulting output (which might be a sparse matrix) into a standard NumPy array.

print(X)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


In [12]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# Splits dataset into:
# X_train, y_train: 80% training data
# X_test, y_test: 20% testing data
# random_state = 0: Ensures reproducibility of the split.


In [13]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# ANN performance improves if features are on the same scale.
# StandardScaler: Standardizes all input features (mean=0, std=1).
# fit_transform() is used on training data.
# transform() is applied to test data using the same scaling.

In [14]:
# Part 2 - Building the ANN

# Initializing the ANN
ann = tf.keras.models.Sequential()
# Creates a sequential neural network model.

In [15]:
# Adding the input layer and the first hidden layer
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

# Adds the first hidden layer with:
# 6 neurons
# relu (Rectified Linear Unit) activation function (introduces non-linearity and helps learn complex patterns)
# Linear for positive values (slope = 1)
# Flat for negative values (slope = 0)

In [16]:
# Adding the second hidden layer
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

In [17]:
# Adding the output layer
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Adds output layer:
# 1 neuron (binary classification: churn or not)
# sigmoid activation outputs value between 0 and 1 (interpreted as probability)

In [18]:
# Part 3 - Training the ANN

# Compiling the ANN
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# adam: Advanced gradient descent optimizer (fast and adaptive).
# binary_crossentropy: Loss function for binary classification.
# metrics=['accuracy']: Track accuracy during training.

# The loss function tells the model how wrong it is.The lower the loss, the better the predictions.It guides the gradient descent optimizer.
# Gradient Descent is the optimization algorithm used to minimize the loss function by updating the model's weights.Imagine your model is trying to find the lowest point in a valley — that’s where the loss is smallest (best performance). Gradient descent is how it walks downhill.
# The model makes a prediction.
# It compares the prediction to the true label using the loss function.
# It computes the gradient (slope) of the loss w.r.t. each weight.
# It adjusts weights slightly in the direction that reduces the loss.

In [19]:
# Training the ANN on the Training set
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

# Batch size = 32: 32 samples processed at a time before weight updates.
# 100 epochs: Full passes over training data, Model sees every sample 100 times, in batches of 32

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7150 - loss: 0.6088
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8005 - loss: 0.4742
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8086 - loss: 0.4346
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8174 - loss: 0.4294
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8185 - loss: 0.4134
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8271 - loss: 0.4105
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8390 - loss: 0.3941
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8419 - loss: 0.3839
Epoch 9/100
[1m250/250[0m [32

<keras.src.callbacks.history.History at 0x18738470b60>

In [20]:
# Part 4 - Making the predictions and evaluating the model

# Predicting the result of a single observation

"""
Homework Assignment:
Use our ANN model to predict if the customer with the following informations will leave the bank: 
Geography: France
Credit Score: 600
Gender: Male
Age: 40 years old
Tenure: 3 years
Balance: $ 60000
Number of Products: 2
Does this customer have a credit card? Yes
Is this customer an Active Member: Yes
Estimated Salary: $ 50000
So, should we say goodbye to that customer?

Write a function that predicts the result using the above using the ANN session.
"""
ann.predict(sc.transform([[1,123,123,600,1,40,3,60000,2,1,1,50000]]))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step


array([[1.]], dtype=float32)

In [21]:
# Predicting the Test set results
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

# y_pred = ann.predict(X_test)
# y_pred = (y_pred > 0.5)

print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
# Shows side-by-side predicted and actual values for comparison.

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [22]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1509   86]
 [ 196  209]]


0.859