In [2]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

In [3]:
#load dataset
data = pd.read_csv('Churn_Modelling.csv')
data.head(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
#first three columns are not useful for prediction
#Preprocessing the dataset
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
data.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
# Encode categorical variables
#Encoding the 'Gender' column
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])
data.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [None]:
#Encoding the 'Geography' column using OneHotEncoder
from sklearn.preprocessing import OneHotEncoder
one_hot_encoder_geo = OneHotEncoder()
data_geo = one_hot_encoder_geo.fit_transform(data[['Geography']])


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [12]:
one_hot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [9]:
data_geo.toarray()

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [16]:
# Convert to DataFrame and concatenate with the original data   
data_geo_df = pd.DataFrame(data_geo.toarray(), columns=one_hot_encoder_geo.get_feature_names_out(['Geography']))
data = pd.concat([data, data_geo_df], axis=1)

In [17]:
data.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,France,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,France,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [18]:
#Let's save the label encoder and one hot encoder for future use as pickle files
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('one_hot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(one_hot_encoder_geo, file)

In [19]:
#Let's now divide the dataset into features and target variable
X = data.drop(['Exited', 'Geography'], axis=1)
y = data['Exited']
X.head(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0.0,0.0,1.0


In [20]:
#Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)   

In [21]:
# Let's scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)   

In [23]:
#Save the scaler for future use as a pickle file
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

### Implementing ANN

In [25]:
import tensorflow as tf
from tensorflow.keras.models import Sequential #for sequential model
from tensorflow.keras.layers import Dense, Dropout #for adding layers to the model
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard #for early stopping and tensorboard
import datetime #for tensorboard

In [27]:
(X_train_scaled.shape[1],)

(12,)

In [48]:
#Define the ANN model
model = Sequential([
    Dense(
    units=64, #first hidden layer with 64 neurons connected with input layer
    activation='relu', #activation function for first hidden layer
    input_shape=(X_train_scaled.shape[1],)),
    Dense(
    units=32, #second hidden layer with 32 neurons connected with first hidden layer
    activation='relu'), #activation function for second hidden layer
    Dense(
    units=1, #third hidden layer with 16 neurons connected with second hidden layer
    activation='sigmoid')#activation function for output layer)
    ]
    )

In [49]:
model.summary() 
#summary of the model. 
# This provides a detailed overview of the model architecture, including the number of parameters in each layer and the total number of parameters in the model.
# The parameters are the weights and biases that the model will learn during training.

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 64)                832       
                                                                 
 dense_10 (Dense)            (None, 32)                2080      
                                                                 
 dense_11 (Dense)            (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [50]:
#Compile the model
model.compile(
    optimizer='adam', #optimizer for the model. 'adam' has fixed learning rate
    loss='binary_crossentropy', #binary_crossentropy is used for binary classification problems
    metrics=['accuracy']) #metrics for the model

In [51]:
## Set up the tensorboard callback to log the training process
#This callback will log the training process and allow us to visualize it using TensorBoard.
#This will create a new directory with a timestamp in the format YYYYMMDD-HHMMSS.
#This will allow us to visualize the training process using TensorBoard.

from tensorflow.keras.callbacks import EarlyStopping, TensorBoard  #EarlyStopping is used to stop training when the model stops improving.

# Create a TensorBoard callback
import datetime
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") #The log will be saved in the logs/fit directory with a timestamp.

# Create a TensorBoard callback
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
#log_dir is the directory where the logs will be saved.
#histogram_freq=1 indicates that the histogram will be logged every epoch.

In [None]:
# Set up early stopping to prevent overfitting
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
#patience is the number of epochs with no improvement after which training will be stopped. Play with this number to see how it affects the training.
#monitor is the metric to be monitored. In this case, it is the validation loss. 
#restore_best_weights is used to restore the best weights of the model when the training stops.
#This will help to prevent overfitting and improve the performance of the model.

In [53]:
y_train.shape

(8000,)

In [54]:
# Train the model
history = model.fit(
    X_train_scaled, #training data. We use scaled data for training the model.
    y_train, #training labels
    validation_data=(X_test_scaled, y_test), #validation data. We use scaled data for validation.
    epochs=50, #This is the number of times the model will see the entire training data.
    callbacks=[tensorflow_callback, early_stopping_callback], #callbacks for tensorboard and early stopping
    verbose=1) #verbose is used to print the progress of the training process

Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50


In [56]:
model.save('model.h5') 
#save the model as a h5 file. This will save the entire model including the architecture, weights, and optimizer state. 
#This will allow us to load the model later and use it for predictions without having to recompile it or retrain it.

  saving_api.save_model(


In [62]:
# Launch tensorboard to visualize the training process
#load tensorboard extension
%reload_ext tensorboard

In [66]:

# In the terminal, run the following command:
# tensorboard --logdir logs/fit 
%tensorboard --logdir logs/fit
# The above command will start a local server and provide a link to access the TensorBoard dashboard.
# You can open the link in your web browser to visualize the training process.

Reusing TensorBoard on port 6006 (pid 29316), started 2:17:39 ago. (Use '!kill 29316' to kill it.)