# Step1 : Installation and Setup

In [1]:
import tensorflow as tf

In [2]:
tf.__version__


'2.13.0'

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Step 2 : Data Preprocessing


In [4]:
dataset = pd.read_csv("Churn_Modelling.csv")

In [5]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
#. Selecting the dependent and independent variables
X = dataset.drop(labels=['RowNumber','CustomerId','Surname','Exited'], axis=1)
y = dataset['Exited']

In [7]:
# Encoding the categorical data
from sklearn.preprocessing import LabelEncoder

In [8]:
label_1 = LabelEncoder()
X['Geography'] = label_1.fit_transform(X['Geography'])

In [9]:
label_2 = LabelEncoder()
X['Gender'] = label_2.fit_transform(X['Gender'])

In [10]:
X = pd.get_dummies(data=X,drop_first=True,dtype="int",columns=['Geography'])

In [11]:
y

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

In [12]:
X.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_1,Geography_2
0,619,0,42,2,0.0,1,1,1,101348.88,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,1


In [13]:
# splitting the dataset into train and test set
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size= 0.2,random_state =0)

In [14]:
# Feature Scaling 
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [15]:
X_train

array([[ 0.16958176, -1.09168714, -0.46460796, ...,  1.10643166,
        -0.5698444 ,  1.74309049],
       [-2.30455945,  0.91601335,  0.30102557, ..., -0.74866447,
         1.75486502, -0.57369368],
       [-1.19119591, -1.09168714, -0.94312892, ...,  1.48533467,
        -0.5698444 , -0.57369368],
       ...,
       [ 0.9015152 ,  0.91601335, -0.36890377, ...,  1.41231994,
        -0.5698444 , -0.57369368],
       [-0.62420521, -1.09168714, -0.08179119, ...,  0.84432121,
        -0.5698444 ,  1.74309049],
       [-0.28401079, -1.09168714,  0.87525072, ...,  0.32472465,
         1.75486502, -0.57369368]])

In [16]:
X_test

array([[-0.55204276, -1.09168714, -0.36890377, ...,  1.61085707,
         1.75486502, -0.57369368],
       [-1.31490297, -1.09168714,  0.10961719, ...,  0.49587037,
        -0.5698444 , -0.57369368],
       [ 0.57162971, -1.09168714,  0.30102557, ..., -0.42478674,
        -0.5698444 ,  1.74309049],
       ...,
       [-0.74791227,  0.91601335, -0.27319958, ...,  0.71888467,
        -0.5698444 ,  1.74309049],
       [-0.00566991,  0.91601335, -0.46460796, ..., -1.54507805,
         1.75486502, -0.57369368],
       [-0.79945688,  0.91601335, -0.84742473, ...,  1.61255917,
         1.75486502, -0.57369368]])

# Building the Model

In [17]:
# creating an object( initialising the ANN)
model = tf.keras.models.Sequential()

In [18]:
#Adding input layer and first hidden layer
# 1)units(neurons) = 6
# 2)activation_function = relu
# 3)input dimension = 11
model.add(tf.keras.layers.Dense(units=6,activation="relu",input_dim=11))

In [19]:
X.shape

(10000, 11)

In [20]:
# adding second hidden layer
model.add(tf.keras.layers.Dense(units=6,activation="relu"))


In [21]:
#output layer
model.add(tf.keras.layers.Dense(units=1,activation="sigmoid"))

In [22]:
# compiling the model
model.compile(optimizer="adam",loss="binary_crossentropy",metrics =["accuracy"])

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 6)                 72        
                                                                 
 dense_1 (Dense)             (None, 6)                 42        
                                                                 
 dense_2 (Dense)             (None, 1)                 7         
                                                                 
Total params: 121 (484.00 Byte)
Trainable params: 121 (484.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# Step 4 : Training the model

In [24]:
model.fit(X_train,y_train,batch_size=10,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x14e28d210>

# Step 5 : Model Evaluation and Prediction

In [25]:
# evaluate the model
test_loss,test_accuracy = model.evaluate(X_test,y_test)



In [26]:
print("Test Accuracy : {:.3f}".format(test_accuracy))

Test Accuracy : 0.860


In [27]:
# Model prediction
y_pred_probs = model.predict(X_test)  # Get predicted probabilities for the positive class

# Convert predicted probabilities to binary predictions
y_pred = (y_pred_probs > 0.5).astype("int32")




In [28]:
y_pred

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]], dtype=int32)

In [29]:
y_test

9394    0
898     1
2398    0
5906    0
2343    0
       ..
1037    0
2899    0
9549    0
2740    0
6690    0
Name: Exited, Length: 2000, dtype: int64

In [30]:
y_test = y_test.to_numpy()

In [31]:
print(y_pred[11]), print(y_test[11])

[0]
0


(None, None)

In [32]:
#confusion matrix
from sklearn.metrics import confusion_matrix,accuracy_score

In [33]:
cm = confusion_matrix(y_test,y_pred)

In [34]:
cm

array([[1501,   94],
       [ 185,  220]])

In [35]:
accuracy_score(y_test,y_pred)

0.8605