In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import pickle

In [28]:
data=pd.read_csv("Churn_Modelling.csv")
data.head(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [29]:
data=data.drop(['RowNumber','CustomerId','Surname'],axis=1)
data.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [30]:
x = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
print(x)
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
[1 0 1 ... 1 1 0]


In [31]:
x[0]

array([619, 'France', 'Female', 42, 2, 0.0, 1, 1, 1, 101348.88],
      dtype=object)

In [32]:
x.dtype

dtype('O')

### Encoding the ind. variables

In [33]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1, 2])], remainder='passthrough')
x = np.array(ct.fit_transform(x))

In [34]:
with open('columnTransformer.pkl','wb') as file:
    pickle.dump(ct,file)

In [35]:
print(x)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


In [36]:
x.shape

(10000, 13)

In [37]:
x = x.astype('float32')

In [38]:
print(x.dtype)

float32


In [39]:
x[0]

array([1.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00,
       0.0000000e+00, 6.1900000e+02, 4.2000000e+01, 2.0000000e+00,
       0.0000000e+00, 1.0000000e+00, 1.0000000e+00, 1.0000000e+00,
       1.0134888e+05], dtype=float32)

## train, test split

In [40]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [41]:
x_train[0], x_train[1]

(array([1.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        1.0000000e+00, 6.8600000e+02, 3.2000000e+01, 6.0000000e+00,
        0.0000000e+00, 2.0000000e+00, 1.0000000e+00, 1.0000000e+00,
        1.7909327e+05], dtype=float32),
 array([0.0000000e+00, 1.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        1.0000000e+00, 6.3200000e+02, 4.2000000e+01, 4.0000000e+00,
        1.1962460e+05, 2.0000000e+00, 1.0000000e+00, 1.0000000e+00,
        1.9597886e+05], dtype=float32))

In [42]:
print(x_test)

[[0.000000e+00 1.000000e+00 0.000000e+00 ... 0.000000e+00 0.000000e+00
  4.178837e+04]
 [1.000000e+00 0.000000e+00 0.000000e+00 ... 1.000000e+00 1.000000e+00
  1.463793e+05]
 [0.000000e+00 0.000000e+00 1.000000e+00 ... 1.000000e+00 0.000000e+00
  5.856131e+04]
 ...
 [1.000000e+00 0.000000e+00 0.000000e+00 ... 1.000000e+00 0.000000e+00
  3.337326e+04]
 [1.000000e+00 0.000000e+00 0.000000e+00 ... 1.000000e+00 0.000000e+00
  7.675599e+04]
 [0.000000e+00 1.000000e+00 0.000000e+00 ... 1.000000e+00 1.000000e+00
  1.076743e+05]]


In [43]:
print(y_train)

[0 0 1 ... 1 1 0]


In [44]:
y_train.shape

(8000,)

## Feature Scaling

In [45]:
scaler=StandardScaler()
x_train[:, 5:]=scaler.fit_transform(x_train[:, 5:])
x_test[:, 5:]=scaler.transform(x_test[:, 5:])

In [46]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [47]:
x_train[0], x_train[1]

(array([ 1.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.3564997 , -0.65578586,  0.34567967, -1.2184706 ,  0.80843616,
         0.64920264,  0.974817  ,  1.3676698 ], dtype=float32),
 array([ 0.        ,  1.        ,  0.        ,  0.        ,  1.        ,
        -0.20389777,  0.29493847, -0.34836912,  0.69683766,  0.80843616,
         0.64920264,  0.974817  ,  1.661254  ], dtype=float32))

In [48]:
x_train.shape

(8000, 13)

In [130]:
x_test.shape

(2000, 13)

In [131]:
y_train.shape

(8000,)

In [132]:
y_test.shape

(2000,)

In [133]:
print(x_train.shape, x_train.dtype)
print(y_train.shape, y_train.dtype)
print(x_test.shape, x_test.dtype)
print(y_test.shape, y_test.dtype)

(8000, 13) float32
(8000,) int64
(2000, 13) float32
(2000,) int64


In [134]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

In [135]:
print(x_train.shape, x_train.dtype)
print(y_train.shape, y_train.dtype)
print(x_test.shape, x_test.dtype)
print(y_test.shape, y_test.dtype)

(8000, 13) float32
(8000,) int64
(2000, 13) float32
(2000,) int64


# ANN Implementation

In [136]:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [137]:
(x_train.shape[1],)

(13,)

In [138]:
## Build Our ANN Model
model=Sequential([
    Input(shape=(x_train.shape[1],)),  # Specify input shape
    Dense(12, activation='relu'),      # HL1
    Dense(6, activation='relu'),       # HL2
    Dense(1, activation='sigmoid') 
]
)

In [139]:
model.summary()

In [140]:
## compile the model
model.compile(optimizer='adam',loss="binary_crossentropy",metrics=['accuracy'])

In [141]:
## Set up the Tensorboard
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [142]:
## Set up Early Stopping
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [146]:
### Train the model
history=model.fit(
    x_train,y_train,validation_data=(x_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8620 - loss: 0.3230 - val_accuracy: 0.8565 - val_loss: 0.3437
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8678 - loss: 0.3235 - val_accuracy: 0.8575 - val_loss: 0.3423
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8687 - loss: 0.3172 - val_accuracy: 0.8550 - val_loss: 0.3425
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8703 - loss: 0.3141 - val_accuracy: 0.8590 - val_loss: 0.3459
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8682 - loss: 0.3258 - val_accuracy: 0.8560 - val_loss: 0.3426
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8688 - loss: 0.3166 - val_accuracy: 0.8555 - val_loss: 0.3430
Epoch 7/100
[1m250/25

In [None]:
### Train the model
# history=model.fit(
#     x_train,y_train,validation_data=(x_test,y_test),epochs=100,batch_size=32
# )

In [147]:
model.save('model.h5')



In [148]:
## Load Tensorboard Extension
%load_ext tensorboard

In [149]:
%tensorboard --logdir logs/fit