#### **Model Training**

In [None]:
import pandas as pd

data = pd.read_csv('Churn_Modelling.csv')
data.head()

In [None]:
### No missing values present in the data
data.isnull().sum()

In [None]:
data.info()

In [None]:
data.drop(labels=['RowNumber', 'CustomerId', 'Surname'],axis=1,inplace=True)
data.head()

In [None]:
## Check for duplicated records
data.duplicated().sum()

In [None]:
## Independent and dependent features
X = data.drop(labels=['Exited'],axis=1)
Y = data[['Exited']]

In [None]:
data.info()

In [None]:
# Segregating numerical and categorical variables
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(exclude='object').columns
print("Categorical Columns:", categorical_cols)
print("Numerical Columns:", numerical_cols)

In [None]:
data['Gender'].unique()

In [None]:
data['Geography'].unique()

In [None]:
gender_categories = ['Male', 'Female']
geo_categories = ['Germany', 'Spain', 'France']

In [None]:
data['Geography'].value_counts()

In [None]:
data['Gender'].value_counts()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(8,6))
x=0
for i in numerical_cols:
    sns.histplot(data=data,x=i,kde=True)
    print('\n')
    plt.show()

In [None]:
## Do the same for categorical data

plt.figure(figsize=(8,6))
x=0
for i in categorical_cols:
    sns.catplot(data=data,x=i,kind='count')
    print('\n')
    plt.show()

In [None]:
## Correlation Heatmap
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 10))
corr_matrix = data[numerical_cols].corr()

sns.heatmap(
    corr_matrix,
    annot=True,
    fmt=".2f",
    cmap="coolwarm",
    linewidths=0.5,
    annot_kws={"size": 10}
)

plt.title("Correlation Heatmap", fontsize=16, fontweight="bold")
plt.tight_layout()
plt.show()

In [None]:
data.head()

In [None]:
from sklearn.impute import SimpleImputer ## HAndling Missing Values
from sklearn.preprocessing import StandardScaler # HAndling Feature Scaling
from sklearn.preprocessing import OrdinalEncoder # Ordinal Encoding
## pipelines
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [None]:
## Numerical Pipeline
num_pipeline = Pipeline(steps=[('imputer',SimpleImputer(strategy='median')),
                             ('scaler',StandardScaler())]
                        )

# Categorigal Pipeline
cat_pipeline = Pipeline(steps=[('imputer',SimpleImputer(strategy='most_frequent')),
                               ('ordinalencoder',OrdinalEncoder(categories=[geo_categories, gender_categories])),
                               ('scaler',StandardScaler())]
                        )

preprocessor = ColumnTransformer([('num_pipeline',num_pipeline,numerical_cols),
                                ('cat_pipeline',cat_pipeline,categorical_cols)
                            ])

In [None]:
## Train test split
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X, Y, test_size=0.30, random_state=42)

In [None]:
X_train = pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test = pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

In [None]:
X_train.head()

In [None]:
X_test.head()

In [None]:
import pickle

with open("preprocessor.pkl", "wb") as f:
    pickle.dump(preprocessor, f)

#### **ANN IMPLEMENTATION**

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

In [None]:
(X_train.shape[1], )

In [None]:
# dropout layer to prevent overfitting
from tensorflow.keras.layers import Dropout

In [None]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    # Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [None]:
model.summary()

In [None]:
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.001)

In [None]:
## compile model with Recall & Precision
model.compile(
    optimizer=opt,
    loss="binary_crossentropy",
    metrics=[
        'accuracy',
        # tf.keras.metrics.Recall(name='recall'),
        # tf.keras.metrics.Precision(name='precision')
    ]
)

In [None]:
## TensorBoard setup
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
## Early Stopping
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

In [None]:
### Train the model
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    epochs=200,
    callbacks=[tensorflow_callback, early_stopping_callback]
)

In [None]:
model.save('model.h5')

In [None]:
loss=history.history['loss']
acc = history.history['accuracy']
val_loss = history.history['val_loss']
val_acc = history.history['val_accuracy']

In [None]:
epochs = len(loss)

plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
## ðŸ”´ CONFUSION MATRIX & CLASSIFICATION REPORT
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob >= 0.5).astype(int)

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - Churn Prediction")
plt.show()

print("Classification Report:\n")
print(classification_report(y_test, y_pred))

In [None]:
## Load Tensorboard Extension
# %load_ext tensorboard

In [None]:
# %tensorboard --logdir logs/fit/20260112-195914/

DONT USE THIS CODE