In [2]:
import pandas as pd
import numpy as np
import matplotlib as plt
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.model_selection import train_test_split
import pickle


In [3]:
## load the dataset
data = pd.read_csv("Churn_Modelling.csv")
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [4]:
data = data.drop(["RowNumber","CustomerId","Surname"],axis=1)


In [5]:
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
data["Geography"].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [7]:
label_encoder_gender = LabelEncoder()
data["Gender"] = label_encoder_gender.fit_transform(data["Gender"])

data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [8]:
data["Gender"].unique()

array([0, 1])

In [9]:
data["Geography"]

0        France
1         Spain
2        France
3        France
4         Spain
         ...   
9995     France
9996     France
9997     France
9998    Germany
9999     France
Name: Geography, Length: 10000, dtype: object

In [10]:
## onhot encoder for Geograpghy

from sklearn.preprocessing import OneHotEncoder

oneHotencoder_geo = OneHotEncoder()

geo_encoder = oneHotencoder_geo.fit_transform(data[["Geography"]])

geo_encoder


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [11]:
oneHotencoder_geo.get_feature_names_out(["Geography"])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [12]:
geo_encoder_df = pd.DataFrame(geo_encoder.toarray(),columns=oneHotencoder_geo.get_feature_names_out(["Geography"]))
geo_encoder_df.head()

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0


In [13]:
## concat

df = pd.concat([data.drop("Geography",axis=1),geo_encoder_df],axis=1)

In [14]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [15]:
## independent and dependent data

X = df.drop("Exited",axis=1)
y = df["Exited"]

In [16]:
## split the dataset
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.22,random_state=42)


In [17]:
## standararize

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [18]:
X_train

array([[-0.19485048, -1.09642404,  0.0094513 , ..., -0.9969278 ,
        -0.57912674,  1.73027616],
       [-0.66249484, -1.09642404,  0.29518832, ..., -0.9969278 ,
         1.72673776, -0.57794242],
       [ 0.55338049, -1.09642404, -0.18104004, ...,  1.00308167,
        -0.57912674, -0.57794242],
       ...,
       [ 0.8651434 , -1.09642404, -0.08579437, ...,  1.00308167,
        -0.57912674, -0.57794242],
       [ 0.15848081,  0.91205588,  0.39043399, ...,  1.00308167,
        -0.57912674, -0.57794242],
       [ 0.47024372,  0.91205588,  1.15239936, ..., -0.9969278 ,
         1.72673776, -0.57794242]], shape=(7800, 12))

## save all in pickle file

In [19]:
with open("label_encoder_gender.pkl","wb") as file:
    pickle.dump(label_encoder_gender,file)

with open("oneHotencoder_geo.pkl","wb") as file:
    pickle.dump(oneHotencoder_geo,file)
with open("scaler.pkl","wb") as file:
    pickle.dump(scaler,file)    

In [20]:
df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


## ANN IMplementation

In [21]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import TensorBoard , EarlyStopping
from tensorflow.keras.layers import Dense

In [25]:
## Build our Model

model = Sequential([
    Dense(64,activation="relu",input_shape =(X_train.shape[1],)),  ## hidden layer1 connected with input
    Dense(32,activation="relu"), ## hidden layer 2
    Dense(1,activation="sigmoid")  ## output layer
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
! pip install tensorflow

In [26]:
model

<Sequential name=sequential_1, built=True>

In [27]:
model.summary()

In [29]:
## compile our model
import tensorflow as tf

opt = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy()

model.compile(optimizer=opt,loss=loss,metrics=["accuracy"])

In [30]:
import datetime

In [None]:
## setup the tensorboard

from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

log_dirn = "logs/fit/"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dirn,histogram_freq=1)

In [36]:
## set up early stopping

early_stopping_callback = EarlyStopping(monitor="val_loss",patience=10,restore_best_weights=True)

In [37]:
## Train the model

history = model.fit(
    X_train,y_train ,
    validation_data = (X_test,y_test),
    epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8581 - loss: 0.3446 - val_accuracy: 0.8636 - val_loss: 0.3527
Epoch 2/100
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8562 - loss: 0.3432 - val_accuracy: 0.8618 - val_loss: 0.3492
Epoch 3/100
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8603 - loss: 0.3402 - val_accuracy: 0.8614 - val_loss: 0.3407
Epoch 4/100
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8610 - loss: 0.3377 - val_accuracy: 0.8641 - val_loss: 0.3481
Epoch 5/100
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8618 - loss: 0.3335 - val_accuracy: 0.8659 - val_loss: 0.3387
Epoch 6/100
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8636 - loss: 0.3311 - val_accuracy: 0.8527 - val_loss: 0.3409
Epoch 7/100
[1m244/24

In [None]:
## save model

model.save("model.h5")



In [40]:
## load Tensorboard  extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
%tensorboard --logdir logs/fit