In [1]:
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import pickle

## 1. Load Churn Modelling Dataset

In [2]:
RANDOM_STATE = 42

In [None]:
# get the root project's path
PROJECT_DIRPATH = Path.cwd().parent
PROJECT_DIRPATH

In [4]:
# get and create (if not exist) the models' path
MODELS_DIRPATH = PROJECT_DIRPATH / 'models'
MODELS_DIRPATH.mkdir(exist_ok=True)

In [None]:
# construct dataset filepath
DATASET_FILENAME = "churn_modelling.csv"
DATASET_FILEPATH = Path.joinpath(PROJECT_DIRPATH, "data", "churn_modelling.csv")
DATASET_FILEPATH

In [None]:
data = pd.read_csv(DATASET_FILEPATH)
data.head()

In [None]:
data.info()

In [None]:
data.describe()

## 2. Data Preprocessing

### 2.1 Drop Irrelevant Columns

In [None]:
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
data

### 2.2 Encode Categorical Values

In [None]:
#  Encode 'Gender' values with LabelEncoder
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])
data

In [None]:
# Encode 'Geography' values with OneHotEncoder
onehot_encoder_geo = OneHotEncoder()
geo_encoded = onehot_encoder_geo.fit_transform(data[['Geography']])
geo_encoded.toarray()

In [None]:
geo_encoded_columns = onehot_encoder_geo.get_feature_names_out(['Geography'])
geo_encoded_df = pd.DataFrame(geo_encoded.toarray(), columns=geo_encoded_columns)
geo_encoded_df

In [None]:
# Combine one-hot encoded 'Geography' columns with the original data
data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)
data.head()

### 2.3 Save the Encoders for Future Uses

In [14]:
with open(MODELS_DIRPATH / 'label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open(MODELS_DIRPATH / 'onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)

### 2.4 Split the Data

In [None]:
data.head()

In [16]:
# Divide the dataset into independent and dependent features
X = data.drop('Exited', axis=1)
y = data['Exited']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)


### 2.5 Scale Features

In [None]:
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train, X_test

### 2.6 Save the Scalers for Future Uses

In [18]:
with open(MODELS_DIRPATH / 'scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

## 3. Implement ANN

In [19]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, TensorBoard
import datetime

### 3.1 Build ANN Model

In [20]:
model = Sequential([
    Dense(64, activation="relu", input_shape=(X_train.shape[1],)),  ## HL1 -> connected with input layer
    Dense(32, activation="relu"),                                   ## HL2
    Dense(1, activation="sigmoid")                                   ## OL
])

In [None]:
model.summary()

### 3.2 Compile the Model (Do Forward & Backward Propagation)

In [22]:
optimizer = tf.optimizers.legacy.Adam(learning_rate=0.01)
loss = tf.losses.BinaryCrossentropy()

model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])


### 3.3 Setup TensorBoard

In [23]:
# Construct TensorBoard log directory
LOG_DIRNAME = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
LOG_DIRPATH = Path.cwd() / "logs" / "fit" / LOG_DIRNAME

# Setup TensorBoard
tensorboard_callback = TensorBoard(log_dir=LOG_DIRPATH, histogram_freq=1)

### 3.4  Setup Early Stopping

In [24]:
# Monitors validation loss for 5 epochs patients
early_stopping_callback = EarlyStopping(monitor="val_loss", patience=11, restore_best_weights=True) 


### 3.5 Train the ANN Model

In [None]:
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    callbacks=[tensorboard_callback, early_stopping_callback],
)

### 3.6 Save ANN Model for Future Uses

In [26]:
ANN_MODEL_FILEPATH = MODELS_DIRPATH / 'model.keras'
model.save(ANN_MODEL_FILEPATH)

### 3.7 Load TensorBoard Extension

In [27]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit