In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [5]:
df = pd.read_csv('Churn_Modelling.csv')


In [6]:
df = df.drop(columns=['RowNumber', 'CustomerId', 'Surname'],axis = 1)
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [7]:
label_encoder_gender = LabelEncoder()
df['Gender'] = label_encoder_gender.fit_transform(df['Gender'])
df.head()
#OHE for Geographical feature
from sklearn.preprocessing import OneHotEncoder
OHE = OneHotEncoder()
GeoEncode = OHE.fit_transform(df[['Geography']])
GeoEncode
OHE.get_feature_names_out(['Geography'])
# Take it to the df
df_encoded = pd.DataFrame(data=GeoEncode.toarray().astype(int),columns=OHE.get_feature_names_out(['Geography']))
df_encoded
df = pd.concat([df.drop('Geography',axis=1),df_encoded],axis = 1)
df.head()


Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0,0,1


In [8]:
# Training Model
x = df.drop('Exited',axis = 1)
y = df['Exited']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25, random_state=42)
x_test
scaled = StandardScaler()
x_train = scaled.fit_transform(x_train)
x_test = scaled.transform(x_test)


In [10]:
# Saving of the pickle
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender,file)
    
with open('OHE_Geography.pkl', 'wb') as file:
    pickle.dump(OHE,file)
    
# Scaler Pickle
with open('Scaler.pkl','wb') as file:
    pickle.dump(scaled,file)

In [20]:
# Function to create a model and try different parameters
def create_model(neurons = 32, layers = 1):
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_shape=(x_train.shape[1],)))
    
    for _ in range(layers-1):
        model.add(Dense(neurons, activation='relu'))
        
    # model.add(Dense(1, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [21]:
model=KerasClassifier(layers=1,neurons=32,build_fn=create_model,verbose=1)

In [22]:

# Define the grid search parameters
param_grid = {
    'neurons': [16, 32, 64, 128],
    'layers': [1, 2],
    'epochs': [50, 100]
}

In [None]:
grid = GridSearchCV(estimator=model,param_grid=param_grid,n_jobs=-1,cv=3,verbose=1)
grid_result = grid.fit(x_train,y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 3 folds for each of 16 candidates, totalling 48 fits


In [None]:
# Scaler Pickle
with open('Scaler.pkl','wb') as file:
    pickle.dump(scaled,file)
## ANN Implementation
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime
(x_train.shape[1],)
# ANN Model

model = Sequential([
    Dense(64,activation='relu', input_shape=(x_train.shape[1],)), # 1st Hidden Layer, Connected with inputs.
    Dense(32,activation='relu'), # Hidden Layer 2
    Dense(1,activation='sigmoid') # Output Layer
])
model.summary()
## Optimizers
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate = 0.01)
# opt

loss = tensorflow.keras.losses.BinaryCrossentropy()
loss
# Another way
model.compile(optimizer=opt,loss = 'binary_crossentropy',metrics=['accuracy'])
# Setup of TenserBoard to capture and store the Logs
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callbacks = TensorBoard(log_dir=log_dir, histogram_freq=1)
tensorflow_callbacks
# Early stopping
early_stopping_callbacks = EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)
history = model.fit(x_train,y_train, validation_data=(x_test,y_test),epochs=100,
                    callbacks = [tensorflow_callbacks,early_stopping_callbacks])
model.save('model.h5')
# Load Tensorboard Extension
%load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir logs/fit/
%tensorboard !kill 13388