In [62]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle


In [None]:
# load the dataset
data = pd.read_csv("Churn_Modelling.csv")
data.head()

In [None]:
## preprocessing the data
# drop irrelevant columns
data = data.drop(["RowNumber","CustomerId", "Surname"],axis=1)
data

In [None]:
## encode categorical variables
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data["Gender"])
data

In [None]:
## One hot encoding 'geography' column
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[["Geography"]])
geo_encoder

In [None]:
geo_encoded_df = pd.DataFrame(geo_encoder.toarray(),columns = onehot_encoder_geo.get_feature_names_out(["Geography"]))
geo_encoded_df

In [None]:
# combine one hot enocoded columns to original dataset
data = pd.concat([data.drop('Geography',axis=1),geo_encoded_df],axis=1)
data.head()

In [69]:
# save the encoders and scaler
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo,file)

In [70]:
# divide the dataset into independent and dependent features

X= data.drop('Exited',axis=1)
y = data['Exited']

# splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

## scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [None]:
X_train

In [72]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

##

#### ANN implementation

In [73]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

In [74]:
# building ANN model
model = Sequential([
    Dense(64,activation ='relu',input_shape = (X_train.shape[1],)),  # HL1 connected to input layer
    Dense(32,activation = 'relu'),  # HL2
    Dense(1,activation='sigmoid')   # output layer
]) 

In [None]:
model.summary()

In [76]:
opt = tf.keras.optimizers.Adam(learning_rate = 0.01)
loss = tf.keras.losses.BinaryCrossentropy()

In [77]:
# compile the model
model.compile(optimizer=opt, loss="binary_crossentropy",metrics= ['accuracy'])

In [81]:
## set up the Tensorboard

log_dir = "logs/fit" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir = log_dir,histogram_freq=1)

In [82]:
# set up early stopping
early_stopping_callback =EarlyStopping(monitor='val_loss', patience=10,restore_best_weights=True)

In [None]:
# Train the model
history = model.fit(
    X_train,y_train,validation_data =(X_test,y_test), epochs=100,
    callbacks = [tensorflow_callback,early_stopping_callback]
)

In [None]:
model.save('model.h5')

In [None]:
### load tensorboard extension
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit20241002-010948