In [129]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle
import tensorflow


In [130]:
data = pd.read_csv('Churn_Modelling.csv')
data.head(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


### Preprocessing the data


In [131]:
### drop the irrelevant columns
data = data.drop(columns=['RowNumber','CustomerId','Surname'],axis=1
                 )
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [132]:
### Encode the categorical variables
label_encoder_gender = LabelEncoder()
data['Gender']= label_encoder_gender.fit_transform(data['Gender'])
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


In [133]:
### onehot encoding the Geography columns 
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']])
print(geo_encoder)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>
  Coords	Values
  (0, 0)	1.0
  (1, 2)	1.0
  (2, 0)	1.0
  (3, 0)	1.0
  (4, 2)	1.0
  (5, 2)	1.0
  (6, 0)	1.0
  (7, 1)	1.0
  (8, 0)	1.0
  (9, 0)	1.0
  (10, 0)	1.0
  (11, 2)	1.0
  (12, 0)	1.0
  (13, 0)	1.0
  (14, 2)	1.0
  (15, 1)	1.0
  (16, 1)	1.0
  (17, 2)	1.0
  (18, 2)	1.0
  (19, 0)	1.0
  (20, 0)	1.0
  (21, 2)	1.0
  (22, 2)	1.0
  (23, 0)	1.0
  (24, 0)	1.0
  :	:
  (9975, 1)	1.0
  (9976, 0)	1.0
  (9977, 0)	1.0
  (9978, 0)	1.0
  (9979, 0)	1.0
  (9980, 2)	1.0
  (9981, 1)	1.0
  (9982, 1)	1.0
  (9983, 0)	1.0
  (9984, 1)	1.0
  (9985, 0)	1.0
  (9986, 1)	1.0
  (9987, 2)	1.0
  (9988, 0)	1.0
  (9989, 2)	1.0
  (9990, 1)	1.0
  (9991, 0)	1.0
  (9992, 2)	1.0
  (9993, 0)	1.0
  (9994, 0)	1.0
  (9995, 0)	1.0
  (9996, 0)	1.0
  (9997, 0)	1.0
  (9998, 1)	1.0
  (9999, 0)	1.0


In [134]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [135]:
geo_encoder_df = pd.DataFrame(geo_encoder.toarray(),columns =onehot_encoder_geo.get_feature_names_out(['Geography']))

In [136]:
### combine the one hot encoded columns with the original data 
data = pd.concat([data.drop('Geography',axis=1),geo_encoder_df],axis=1)
data.head(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [137]:
### save the encoders and sscaler
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)
with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo,file)    

In [138]:
data.head(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [139]:
### Divivde the dataset into independent and dependent features
x = data.drop('Exited',axis=1)
y = data['Exited']

In [140]:
### Dividing the data into training and testing set 
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=42)

In [141]:
### scaling the Feature 
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [142]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)
    
    

In [143]:
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


## ANN Implementation

In [144]:
import tensorflow as tf 

In [145]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [146]:
x_train.shape

(7500, 12)

In [147]:
## Build the model
model =Sequential([
Dense(64,activation = 'relu',input_shape=(x_train.shape[1],)),  #HL1 connected with input layer
Dense(32,activation='relu'), ## HL2
Dense(1,activation='sigmoid')    ## output layer
       
    
]
       
)

In [148]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 64)                832       
                                                                 
 dense_10 (Dense)            (None, 32)                2080      
                                                                 
 dense_11 (Dense)            (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [149]:
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy()
loss

<keras.src.losses.BinaryCrossentropy at 0x1ae0eec67d0>

In [150]:
### compile the model
model.compile(optimizer=opt ,loss="binary_crossentropy",metrics=['accuracy'])

In [151]:
### Set up the Tensorboard
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir,histogram_freq=1)


In [152]:
### set up Early Stopping
Early_Stopping_callback = EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

#### Training the model 

In [153]:
train_model = model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=100,
callbacks=[tensorflow_callback,Early_Stopping_callback]                     
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100


In [154]:
model.save('model.h5')

  saving_api.save_model(


In [155]:
### Load Tensorboard Extention
%load_ext tensorboard


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [156]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 24208), started 8:26:51 ago. (Use '!kill 24208' to kill it.)