In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle



In [2]:
## Load the dataset

data=pd.read_csv("Churn_Modelling.csv")
data.head()##Prints the initial few records records


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
data['Balance']

0            0.00
1        83807.86
2       159660.80
3            0.00
4       125510.82
          ...    
9995         0.00
9996     57369.61
9997         0.00
9998     75075.31
9999    130142.79
Name: Balance, Length: 10000, dtype: float64

In [4]:
## Preprocess the data, Drop irrelevant features

data=data.drop(['RowNumber','CustomerId','Surname'],axis=1)

##Error because the columns have been deleted in first run so during the second run it is showing error

In [5]:
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [6]:
## Encode categorical variable
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])
data  
## Female-0, Male-1

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


In [7]:
##Onehot encode 'Geography' 
##Can't use LabelEncoder because the categories aren't binary, there are more than 2 categories, so 0,1,2 so the model will interpret it as Germany>France>Spain, which is an absurd assumption, hence here we need one hot encoding

from sklearn.preprocessing import OneHotEncoder
onehotgeo=OneHotEncoder(sparse_output=False) ## Won't give sparse matrix -- matrix with mostly zeros and few non-zeros, sparse matrix stores the positions of non-zero values onl to save memory
geoenc=onehotgeo.fit_transform(data[['Geography']]) ## Always double bracket as 2D matrix mandatory
col=onehotgeo.get_feature_names_out()
geo_df=pd.DataFrame(geoenc,columns=col)
print(geoenc)
print(geo_df)

[[1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 ...
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]]
      Geography_France  Geography_Germany  Geography_Spain
0                  1.0                0.0              0.0
1                  0.0                0.0              1.0
2                  1.0                0.0              0.0
3                  1.0                0.0              0.0
4                  0.0                0.0              1.0
...                ...                ...              ...
9995               1.0                0.0              0.0
9996               1.0                0.0              0.0
9997               1.0                0.0              0.0
9998               0.0                1.0              0.0
9999               1.0                0.0              0.0

[10000 rows x 3 columns]


In [8]:
## Combine one hot encoded columsn with the original data

data=pd.concat([data.drop('Geography',axis=1),geo_df],axis=1)

In [9]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [10]:
## Save the encoders and scaler

with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehotgeo,file)

In [11]:
"""
📦 WHY WE USE PICKLE FOR ENCODERS IN MACHINE LEARNING

In machine learning, categorical columns (like 'Gender' or 'Geography') must be converted into numerical form
because models can only process numbers. This is typically done using encoders like:

- LabelEncoder: Converts categories into numeric labels (e.g., Male → 1, Female → 0)
- OneHotEncoder: Converts categories into binary vectors (e.g., France → [1,0,0], Germany → [0,1,0], etc.)

Once these encoders are trained (i.e., "fitted" on the dataset), we should save them for future use so that:
- We don’t need to retrain them every time
- We prevent mismatched encodings that could break predictions

This saving process is called **Pickling**, and it's done using Python’s built-in `pickle` module.
It serializes (saves) Python objects to a file in binary format.

--------------------------------------------------------------------------------
💾 SAVING (PICKLING) THE ENCODERS AFTER FITTING (AS COMMENTS)
--------------------------------------------------------------------------------

# import pickle

# Save the trained LabelEncoder for 'Gender'
# with open('label_encoder_gender.pkl', 'wb') as file:
#     pickle.dump(label_encoder_gender, file)

# Save the trained OneHotEncoder for 'Geography'
# with open('onehot_encoder_geo.pkl', 'wb') as file:
#     pickle.dump(onehotgeo, file)

→ 'wb' stands for "write binary"
→ `pickle.dump()` stores the object to a file

--------------------------------------------------------------------------------
📥 LOADING (UNPICKLING) THE ENCODERS LATER DURING INFERENCE (AS COMMENTS)
--------------------------------------------------------------------------------

# import pickle

# Load the saved LabelEncoder for 'Gender'
# with open('label_encoder_gender.pkl', 'rb') as file:
#     label_encoder_gender = pickle.load(file)

# Load the saved OneHotEncoder for 'Geography'
# with open('onehot_encoder_geo.pkl', 'rb') as file:
#     onehotgeo = pickle.load(file)

→ 'rb' stands for "read binary"
→ `pickle.load()` reads the file and brings the object back into memory

--------------------------------------------------------------------------------
✅ WHY THIS IS IMPORTANT IN REAL-WORLD ML PIPELINES
--------------------------------------------------------------------------------

- Ensures that training and inference pipelines use the SAME transformation logic
- Avoids retraining or errors due to different category mappings
- Makes your code production-ready, reliable, and scalable
- Especially useful when deploying models as APIs, microservices, or apps

Conclusion: Always pickle your encoders, scalers, or any transformation logic after fitting,
and load them back before making predictions on new data. It ensures repeatability, efficiency,
and reliability in your ML workflow.
"""


'\n📦 WHY WE USE PICKLE FOR ENCODERS IN MACHINE LEARNING\n\nIn machine learning, categorical columns (like \'Gender\' or \'Geography\') must be converted into numerical form\nbecause models can only process numbers. This is typically done using encoders like:\n\n- LabelEncoder: Converts categories into numeric labels (e.g., Male → 1, Female → 0)\n- OneHotEncoder: Converts categories into binary vectors (e.g., France → [1,0,0], Germany → [0,1,0], etc.)\n\nOnce these encoders are trained (i.e., "fitted" on the dataset), we should save them for future use so that:\n- We don’t need to retrain them every time\n- We prevent mismatched encodings that could break predictions\n\nThis saving process is called **Pickling**, and it\'s done using Python’s built-in `pickle` module.\nIt serializes (saves) Python objects to a file in binary format.\n\n--------------------------------------------------------------------------------\n💾 SAVING (PICKLING) THE ENCODERS AFTER FITTING (AS COMMENTS)\n---------

In [12]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [13]:
# ✅ Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ✅ Split dataset into independent (X) and dependent (Y) features
X = data.drop('Exited', axis=1)  # Features used to predict (e.g., age, salary, credit score)
Y = data['Exited']               # Target variable (whether the customer exited or not)

# ✅ Split the data into training and testing sets
# test_size=0.2 → 20% of data will be used for testing
# random_state=42 → sets a seed so the same random split happens every time (for reproducibility)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# ✅ Feature Scaling
# ⚠️ Why scale? Many ML models are sensitive to the scale of features.
#    For example, 'Age' might range from 18 to 100, while 'Balance' might go up to 1,000,000.
#    Models like logistic regression, SVM, neural networks, and KNN can give undue importance
#    to features with larger numerical values.

# 🎯 Intuition:
#    Imagine a race between a swimmer, runner, and cyclist—but you measure them in different units (laps, km, pedal strokes).
#    It's unfair unless you bring them all to the same unit.
#    Similarly, we scale all features to the same standard scale (mean = 0, std = 1) so the model can treat them equally.

# ✅ StandardScaler standardizes each feature by removing the mean and scaling to unit variance
scaler = StandardScaler()

# Fit the scaler on training data and transform it
X_train = scaler.fit_transform(X_train)

# Use the same scaler (trained on training data) to transform test data
X_test = scaler.transform(X_test)

# ⚠️ Note: Never fit the scaler on test data — this avoids data leakage.


In [14]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [15]:
import pickle
# Load the saved LabelEncoder for 'Gender'
with open('label_encoder_gender.pkl', 'rb') as file:
    label_encoder_gender = pickle.load(file)

# Load the saved OneHotEncoder for 'Geography'
with open('onehot_encoder_geo.pkl', 'rb') as file:
    onehotgeo = pickle.load(file)

ANN Implementation

In [16]:
import tensorflow as ts
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime




In [17]:
(X_train.shape[1],) ## X_train.shape[0] -- no. of rows, X_train.shape[1] -- no. of columns

## (,) -- tuple to signify the shape

(12,)

In [18]:
## Build our ANN Model
model=Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), ## HL1(Hidden Layer 1, 64 neurons) connected with input layer
    Dense(32,activation='relu'), ## HL2(Hidden Layer 2, 32 neurons), no input connection so no input_shape needed
    Dense(1,activation='sigmoid') ## output layer(1 neuron), sigmoid used as binary classification (0 or 1)
]
)




In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                832       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [20]:
"""
📌 MODEL COMPILATION IN TENSORFLOW / KERAS

🔹 General Syntax:
model.compile(
    optimizer=OPTIMIZER,
    loss=LOSS_FUNCTION,
    metrics=[LIST_OF_METRICS]
)

🔸 Parameters:
- optimizer: Algorithm to update model weights (e.g., "adam", "sgd", or a custom optimizer like Adam(learning_rate=0.01))
- loss: Function to calculate how wrong the model is (e.g., "binary_crossentropy", "mse", "sparse_categorical_crossentropy")
- metrics: List of metrics to monitor during training (e.g., ["accuracy"], ["mae"])

-------------------------------------------------------
✅ Examples:
-------------------------------------------------------

1. 🔹 Binary Classification (e.g., predict 0 or 1):
Used when the target variable has 2 classes.
"""
import tensorflow
opt=tensorflow.keras.optimizers.Adam(learning_rate=0.01) ## many optimizers are there apart from Adam
lo=tensorflow.keras.losses.BinaryCrossentropy()

In [21]:
## compile the model

model.compile(optimizer=opt,loss=lo,metrics=['accuracy']) ## for binary classification this is there, for multi-classification -- sparse cross-entropy

In [22]:
## compile the model

model.compile(optimizer="adam",loss="binary_crossentropy",metrics=['accuracy']) ## for binary classification this is there, for multi-classification -- sparse cross-entropy




In [23]:
"""
📌 SETTING UP TENSORBOARD IN KERAS / TENSORFLOW — FULL INTUITION WITH CODE AND COMMENTS

-------------------------------------------------------
🔹 PURPOSE:
TensorBoard is a visualization tool that lets us monitor the training of our model.
It provides graphs for:
- Loss & accuracy over epochs
- Histograms of weights and biases
- Model graph structure
- Learning rate trends
-------------------------------------------------------
"""

# 🔸 STEP 1: Import necessary modules
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
import datetime

"""
We import:
- TensorBoard: for logging training stats
- EarlyStopping: (optional) stops training early if no improvement
- datetime: to create unique folder names based on current date & time
"""

# 🔸 STEP 2: Create a unique log directory for TensorBoard using datetime
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

"""
📍 Intuition Behind datetime:
- datetime.datetime.now() gives the current date and time.
- strftime("%Y%m%d-%H%M%S") formats it as:
    %Y → Year   (e.g., 2025)
    %m → Month  (e.g., 08)
    %d → Day    (e.g., 03)
    %H → Hour   (24hr format)
    %M → Minute
    %S → Second
- So, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
  might return something like "20250803-225612"
- This makes your log folder: logs/fit20250803-225612
- Each training run has a unique folder → logs don’t overwrite
"""

# 🔸 STEP 3: Initialize TensorBoard callback
tensorboard_callback = TensorBoard(
    log_dir=log_dir,        # where logs will be stored
    histogram_freq=1,       # log histograms every epoch
    write_graph=True,       # log the computational graph
    write_images=False      # optionally log model weights as images
)

In [24]:
"""
📌 EARLY STOPPING IN KERAS / TENSORFLOW — FULL EXPLANATION WITH COMMENTS & GENERAL SYNTAX

🔹 WHAT IS EARLY STOPPING?
EarlyStopping is a built-in Keras callback that:
→ Monitors model performance during training
→ Stops training when it detects no further improvement
→ Optionally restores the model weights from the best epoch

This is useful for:
✅ Preventing overfitting
✅ Saving time and compute
✅ Automatically picking the best version of the model

-------------------------------------------------------
🔹 GENERAL SYNTAX:

EarlyStopping(
    monitor='val_loss',          # Metric to monitor ('val_loss', 'val_accuracy', etc.)
    patience=5,                  # Number of epochs to wait before stopping if no improvement
    restore_best_weights=True,  # Restore model weights from the best epoch
    min_delta=0,                # Minimum change to qualify as an improvement (optional)
    mode='auto',                # 'auto', 'min', or 'max' — auto figures out based on monitored metric
    verbose=1                   # Show logs when training stops (optional)
)

-------------------------------------------------------
🔹 EXAMPLE SETUP:
"""

# 🔸 Import EarlyStopping
from tensorflow.keras.callbacks import EarlyStopping

# 🔸 Define the callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',          # Stop training when validation loss stops improving
    patience=10,                  # Wait 5 epochs for improvement
    restore_best_weights=True,  # Revert to best model after stopping
    verbose=1                   # Print message when training is stopped
)

"""
-------------------------------------------------------
🔹 USAGE IN TRAINING:
Just pass it as a callback to model.fit()

model.fit(
    X_train, Y_train,
    epochs=100,
    validation_data=(X_test, Y_test),
    callbacks=[early_stopping_callback]
)

-------------------------------------------------------
🔸 MONITOR OPTIONS:
- 'val_loss': Most common, looks at validation loss
- 'val_accuracy': For classification tasks

🔸 PATIENCE:
- Number of epochs to allow for no improvement
- E.g., if patience=5, training stops if no better result in 5 rounds

🔸 RESTORE_BEST_WEIGHTS:
- True = after stopping, model weights will revert to the best epoch
- False = model keeps the weights from the last epoch (not recommended)
"""


"\n-------------------------------------------------------\n🔹 USAGE IN TRAINING:\nJust pass it as a callback to model.fit()\n\nmodel.fit(\n    X_train, Y_train,\n    epochs=100,\n    validation_data=(X_test, Y_test),\n    callbacks=[early_stopping_callback]\n)\n\n-------------------------------------------------------\n🔸 MONITOR OPTIONS:\n- 'val_loss': Most common, looks at validation loss\n- 'val_accuracy': For classification tasks\n\n🔸 PATIENCE:\n- Number of epochs to allow for no improvement\n- E.g., if patience=5, training stops if no better result in 5 rounds\n\n🔸 RESTORE_BEST_WEIGHTS:\n- True = after stopping, model weights will revert to the best epoch\n- False = model keeps the weights from the last epoch (not recommended)\n"

In [25]:
history=model.fit(
    X_train, Y_train,
    epochs=100,
    validation_data=(X_test, Y_test),
    callbacks=[tensorboard_callback,early_stopping_callback]
)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 24: early stopping


In [26]:
"""
📦 SAVING MODELS: .h5 VS .pkl FILES — FULL INTUITION & USAGE EXPLAINED

----------------------------------------------------------
🔹 WHAT IS A `.h5` FILE?
----------------------------------------------------------
- `.h5` stands for Hierarchical Data Format version 5
- It is used in TensorFlow/Keras to save an entire deep learning model
- This includes:
  ✅ Model architecture (layers, activations)
  ✅ Trained weights
  ✅ Training configuration (optimizer, loss)
  ✅ Optimizer state (to resume training seamlessly)

📌 Usage:
- Save a model:      model.save("model_name.h5")
- Load a model:      model = load_model("model_name.h5")

----------------------------------------------------------
🔹 WHAT IS A `.pkl` FILE?
----------------------------------------------------------
- `.pkl` is a Python Pickle file — used to serialize and save general Python objects
- Commonly used for:
  ✅ Scikit-learn models (RandomForest, XGBoost, etc.)
  ✅ Preprocessors like LabelEncoder, OneHotEncoder
  ✅ Scalers like StandardScaler, MinMaxScaler
  ✅ Python lists, dicts, etc.

📌 Usage:
- Save an object:    pickle.dump(object, file)
- Load an object:    object = pickle.load(file)

----------------------------------------------------------
🔸 COMPARISON TABLE

| Feature               | .h5 (HDF5 Format)                  | .pkl (Pickle Format)                   |
|-----------------------|------------------------------------|----------------------------------------|
| Purpose               | Save deep learning models          | Save any Python object                 |
| Format Type           | Hierarchical data structure        | Python-specific serialization          |
| Usage Library         | TensorFlow / Keras                 | Python / sklearn / preprocessing       |
| Language Independent  | ✅ Yes (readable in other tools)   | ❌ No (Python-specific)                |
| Contents              | Model + weights + optimizer state  | Any Python object (model, encoder, etc.) |

----------------------------------------------------------
🔹 WHEN TO USE WHAT?

✅ Use `.h5` when working with deep learning (Keras/TensorFlow) models  
✅ Use `.pkl` when saving preprocessing objects, scikit-learn models, or general Python structures

"""


'\n📦 SAVING MODELS: .h5 VS .pkl FILES — FULL INTUITION & USAGE EXPLAINED\n\n----------------------------------------------------------\n🔹 WHAT IS A `.h5` FILE?\n----------------------------------------------------------\n- `.h5` stands for Hierarchical Data Format version 5\n- It is used in TensorFlow/Keras to save an entire deep learning model\n- This includes:\n  ✅ Model architecture (layers, activations)\n  ✅ Trained weights\n  ✅ Training configuration (optimizer, loss)\n  ✅ Optimizer state (to resume training seamlessly)\n\n📌 Usage:\n- Save a model:      model.save("model_name.h5")\n- Load a model:      model = load_model("model_name.h5")\n\n----------------------------------------------------------\n🔹 WHAT IS A `.pkl` FILE?\n----------------------------------------------------------\n- `.pkl` is a Python Pickle file — used to serialize and save general Python objects\n- Commonly used for:\n  ✅ Scikit-learn models (RandomForest, XGBoost, etc.)\n  ✅ Preprocessors like LabelEncoder, 

In [27]:
model.save('model.h5')

  saving_api.save_model(


In [28]:
## Load Tensorboard Extension

%load_ext tensorboard

In [29]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 16716), started 1 day, 14:23:10 ago. (Use '!kill 16716' to kill it.)