In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import pickle

In [2]:
data=pd.read_csv("Churn_Modelling.csv")
data.head(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
data=data.drop(['RowNumber','CustomerId','Surname'],axis=1)
data.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
x = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
print(x)
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
[1 0 1 ... 1 1 0]


In [5]:
x[0]

array([619, 'France', 'Female', 42, 2, 0.0, 1, 1, 1, 101348.88],
      dtype=object)

In [6]:
x.dtype

dtype('O')

### Encoding the ind. variables

In [7]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1, 2])], remainder='passthrough')
x = np.array(ct.fit_transform(x))

In [8]:
with open('columnTransformer.pkl','wb') as file:
    pickle.dump(ct,file)

In [9]:
print(x)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


In [10]:
x.shape

(10000, 13)

In [11]:
x = x.astype('float32')

In [12]:
print(x.dtype)

float32


In [13]:
x[0]

array([1.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00,
       0.0000000e+00, 6.1900000e+02, 4.2000000e+01, 2.0000000e+00,
       0.0000000e+00, 1.0000000e+00, 1.0000000e+00, 1.0000000e+00,
       1.0134888e+05], dtype=float32)

## train, test split

In [14]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [15]:
x_train[0], x_train[1]

(array([1.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        1.0000000e+00, 6.8600000e+02, 3.2000000e+01, 6.0000000e+00,
        0.0000000e+00, 2.0000000e+00, 1.0000000e+00, 1.0000000e+00,
        1.7909327e+05], dtype=float32),
 array([0.0000000e+00, 1.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        1.0000000e+00, 6.3200000e+02, 4.2000000e+01, 4.0000000e+00,
        1.1962460e+05, 2.0000000e+00, 1.0000000e+00, 1.0000000e+00,
        1.9597886e+05], dtype=float32))

In [16]:
print(x_test)

[[0.000000e+00 1.000000e+00 0.000000e+00 ... 0.000000e+00 0.000000e+00
  4.178837e+04]
 [1.000000e+00 0.000000e+00 0.000000e+00 ... 1.000000e+00 1.000000e+00
  1.463793e+05]
 [0.000000e+00 0.000000e+00 1.000000e+00 ... 1.000000e+00 0.000000e+00
  5.856131e+04]
 ...
 [1.000000e+00 0.000000e+00 0.000000e+00 ... 1.000000e+00 0.000000e+00
  3.337326e+04]
 [1.000000e+00 0.000000e+00 0.000000e+00 ... 1.000000e+00 0.000000e+00
  7.675599e+04]
 [0.000000e+00 1.000000e+00 0.000000e+00 ... 1.000000e+00 1.000000e+00
  1.076743e+05]]


In [17]:
print(y_train)

[0 0 1 ... 1 1 0]


In [18]:
y_train.shape

(8000,)

## Feature Scaling

In [19]:
scaler=StandardScaler()
x_train[:, 5:]=scaler.fit_transform(x_train[:, 5:])
x_test[:, 5:]=scaler.transform(x_test[:, 5:])

In [20]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [21]:
x_train[0], x_train[1]

(array([ 1.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.3564997 , -0.65578586,  0.34567967, -1.2184706 ,  0.80843616,
         0.64920264,  0.974817  ,  1.3676698 ], dtype=float32),
 array([ 0.        ,  1.        ,  0.        ,  0.        ,  1.        ,
        -0.20389777,  0.29493847, -0.34836912,  0.69683766,  0.80843616,
         0.64920264,  0.974817  ,  1.661254  ], dtype=float32))

In [22]:
x_train.shape

(8000, 13)

In [23]:
x_test.shape

(2000, 13)

In [24]:
y_train.shape

(8000,)

In [25]:
y_test.shape

(2000,)

In [26]:
print(x_train.shape, x_train.dtype)
print(y_train.shape, y_train.dtype)
print(x_test.shape, x_test.dtype)
print(y_test.shape, y_test.dtype)

(8000, 13) float32
(8000,) int64
(2000, 13) float32
(2000,) int64


In [27]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

In [28]:
print(x_train.shape, x_train.dtype)
print(y_train.shape, y_train.dtype)
print(x_test.shape, x_test.dtype)
print(y_test.shape, y_test.dtype)

(8000, 13) float32
(8000,) int64
(2000, 13) float32
(2000,) int64


# ANN Implementation

In [29]:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

2025-01-14 17:06:23.480701: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-14 17:06:23.481540: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-14 17:06:23.485560: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-14 17:06:23.497341: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736899583.517572   21052 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736899583.52

In [30]:
(x_train.shape[1],)

(13,)

In [31]:
## Build Our ANN Model
model=Sequential([
    Input(shape=(x_train.shape[1],)),  # Specify input shape
    Dense(12, activation='relu'),      # HL1
    Dense(6, activation='relu'),       # HL2
    Dense(1, activation='sigmoid') 
]
)

W0000 00:00:1736899586.752100   21052 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [32]:
model.summary()

In [33]:
## compile the model
model.compile(optimizer='adam',loss="binary_crossentropy",metrics=['accuracy'])

In [34]:
## Set up the Tensorboard
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [35]:
## Set up Early Stopping
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [36]:
### Train the model
history=model.fit(
    x_train,y_train,validation_data=(x_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.4180 - loss: 0.8189 - val_accuracy: 0.8040 - val_loss: 0.5285
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8028 - loss: 0.5040 - val_accuracy: 0.8130 - val_loss: 0.4381
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8120 - loss: 0.4452 - val_accuracy: 0.8290 - val_loss: 0.4022
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8226 - loss: 0.4074 - val_accuracy: 0.8390 - val_loss: 0.3783
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8365 - loss: 0.3888 - val_accuracy: 0.8490 - val_loss: 0.3642
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8491 - loss: 0.3744 - val_accuracy: 0.8515 - val_loss: 0.3537
Epoch 7/100
[1m250/25

In [37]:
### Train the model
# history=model.fit(
#     x_train,y_train,validation_data=(x_test,y_test),epochs=100,batch_size=32
# )

In [38]:
model.save('model.h5')



In [39]:
## Load Tensorboard Extension
%load_ext tensorboard

In [40]:
%tensorboard --logdir logs/fit