<div style="text-align:center;font-size:22pt; font-weight:bold;color:white;border:solid black 1.5pt;background-color:#1e7263;">
    Understanding Model History Object: Classification Task
</div>

In [1]:
# ======================================================================= #
# Course: Deep Learning Complete Course (CS-501)
# Author: Dr. Saad Laouadi
# Institution: Quant Coding Versity Academy
# Date: December 25, 2024
#
# ==========================================================
# Lesson: Understanding Model History Object in Keras
#         Analyzing and Visualizing Training Progress
# ==========================================================
# ## Learning Objectives
# This guide will enable you to:
# 1. Access and interpret the model.fit() history object
# 2. Extract and analyze training metrics over epochs
# 3. Visualize training and validation metrics
# 4. Identify optimal training epochs and model performance
# 5. Detect overfitting through history analysis
# =======================================================================
#          Copyright © Dr. Saad Laouadi 2024
# =======================================================================

In [16]:
# ==================================================== #
#        Load Required Libraries
# ==================================================== #

import os  
import shutil
from datetime import datetime

# Disable Metal API Validation
os.environ["METAL_DEVICE_WRAPPER_TYPE"] = "0"  


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# import tensorflow
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Set styling for better visualization
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("="*72)

%reload_ext watermark
%watermark -a "Dr. Saad Laouadi" -u -d -m

print("="*72)
print("Imported Packages and Their Versions:")
print("="*72)

%watermark -iv
print("="*72)

# Global Config
RANDOM_STATE = 101

Author: Dr. Saad Laouadi

Last updated: 2024-12-30

Compiler    : Clang 14.0.6 
OS          : Darwin
Release     : 24.1.0
Machine     : arm64
Processor   : arm
CPU cores   : 16
Architecture: 64bit

Imported Packages and Their Versions:
tensorflow: 2.16.2
sklearn   : 1.5.1
matplotlib: 3.9.2
pandas    : 2.2.2
keras     : 3.6.0
seaborn   : 0.13.2
numpy     : 1.26.4



In [17]:
def cleanup_directory(directory_path):
    """
    Deletes the specified directory and all its contents.

    Args:
        directory_path (str): Path to the directory to delete.
    """
    if os.path.exists(directory_path) and os.path.isdir(directory_path):
        shutil.rmtree(directory_path)
        print(f"Directory '{os.path.basename(directory_path)}' deleted successfully.")
    else:
        print(f"Directory '{os.path.basename(directory_path)}' does not exist or is not a directory.")

### ML Model Raodmap
1. Read the data
2. Explore the data

3. Processing
    - Numerical features (scaling)
    
4. Splitting the data

In [6]:
# ==================================================== #
#        Implementing ModelCheckpoint 
#        Callback with Synthetic data
# ==================================================== #

In [18]:
# Create synthetic dataset (same as before)
def create_synthetic_data(n_samples=1000, random_state = 0):
    X, y = make_classification(
        n_samples=n_samples,
        n_features=20,
        n_informative=15,
        n_redundant=5,
        random_state=42
    )
    
    y = tf.keras.utils.to_categorical(y)
    
    return train_test_split(X, y, test_size=0.2, random_state=random_state)

# Create a simple neural network model (same as before)
def create_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape = input_shape),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(2, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [19]:
# Main training function with early stopping

# Generate synthetic data
X_train, X_test, y_train, y_test = create_synthetic_data()

# Create model
model = create_model((X_train.shape[1],))

# Create checkpoint directory if it doesn't exist
checkpoint_dir = 'model_checkpoints'
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)

# Define different types of EarlyStopping callbacks

# 1. Basic early stopping monitoring validation loss
early_stopping_basic = EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1
)

# Train the model with early stopping callback
history = model.fit(
    X_train, y_train,
    epochs=100,  # Set a high number of epochs, early stopping will prevent overfitting
    batch_size=32,
    validation_split=0.2,
    callbacks=[
        early_stopping_basic
    ],
    verbose=1
)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.5971 - loss: 0.7338 - val_accuracy: 0.7500 - val_loss: 0.5350
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7660 - loss: 0.4692 - val_accuracy: 0.8000 - val_loss: 0.4316
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8503 - loss: 0.3463 - val_accuracy: 0.8625 - val_loss: 0.3693
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8914 - loss: 0.2717 - val_accuracy: 0.8500 - val_loss: 0.3319
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9123 - loss: 0.2296 - val_accuracy: 0.8687 - val_loss: 0.3023
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9205 - loss: 0.2037 - val_accuracy: 0.8750 - val_loss: 0.2958
Epoch 7/100
[1m20/20[0m [32m━━

In [7]:
# 2. Early stopping with restoration of best weights
early_stopping_with_restore = EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1,
    restore_best_weights=True
)

# Train the model with early stopping callback
history = model.fit(
    X_train, y_train,
    epochs=100,  # Set a high number of epochs, early stopping will prevent overfitting
    batch_size=32,
    validation_split=0.2,
    callbacks=[
        early_stopping_with_restore
    ],
    verbose=1
)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 1.0000 - loss: 0.0156 - val_accuracy: 0.9375 - val_loss: 0.1971
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0159 - val_accuracy: 0.9375 - val_loss: 0.1971
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0140 - val_accuracy: 0.9375 - val_loss: 0.2033
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0113 - val_accuracy: 0.9375 - val_loss: 0.1992
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0110 - val_accuracy: 0.9375 - val_loss: 0.2000
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0125 - val_accuracy: 0.9375 - val_loss: 0.2013
Epoch 7/100
[1m20/20[0m [32m━━

In [8]:
# 3. Early stopping with minimum change threshold
early_stopping_min_delta = EarlyStopping(
    monitor='val_loss',
    patience=5,
    min_delta=0.01,  # Minimum change to qualify as an improvement
    verbose=1
)


# Train the model with early stopping callback
history = model.fit(
    X_train, y_train,
    epochs=100,  
    batch_size=32,
    validation_split=0.2,
    callbacks=[
        early_stopping_min_delta
    ],
    verbose=1
)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 1.0000 - loss: 0.0149 - val_accuracy: 0.9375 - val_loss: 0.2007
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0118 - val_accuracy: 0.9438 - val_loss: 0.2012
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0143 - val_accuracy: 0.9375 - val_loss: 0.2003
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0103 - val_accuracy: 0.9375 - val_loss: 0.1996
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0095 - val_accuracy: 0.9375 - val_loss: 0.2006
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0112 - val_accuracy: 0.9375 - val_loss: 0.2023
Epoch 6: early stopping


In [9]:
# 4. Early stopping monitoring validation accuracy
early_stopping_accuracy = EarlyStopping(
    monitor='val_accuracy',
    mode='max',  # For accuracy, we want to maximize
    patience=5,
    verbose=1
)

# Train the model with early stopping callback
history = model.fit(
    X_train, y_train,
    epochs=100,  # Set a high number of epochs, early stopping will prevent overfitting
    batch_size=32,
    validation_split=0.2,
    callbacks=[
        early_stopping_accuracy
    ],
    verbose=1
)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 1.0000 - loss: 0.0104 - val_accuracy: 0.9375 - val_loss: 0.2056
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0080 - val_accuracy: 0.9375 - val_loss: 0.2026
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0081 - val_accuracy: 0.9375 - val_loss: 0.2042
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0065 - val_accuracy: 0.9375 - val_loss: 0.2017
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0065 - val_accuracy: 0.9375 - val_loss: 0.2048
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0074 - val_accuracy: 0.9375 - val_loss: 0.2035
Epoch 6: early stopping


In [10]:
# Combine with ModelCheckpoint for best practice
checkpoint_best = ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, 'best_model_early_stopping.keras'),
    monitor='val_loss',
    mode='min',
    save_best_only=True,
    verbose=1
)

# Train the model with callbacks
history = model.fit(
    X_train, y_train,
    epochs=100,  # Set a high number of epochs, early stopping will prevent overfitting
    batch_size=32,
    validation_split=0.2,
    callbacks=[
        early_stopping_basic,
        early_stopping_with_restore,
        early_stopping_min_delta,
        early_stopping_accuracy,
        checkpoint_best
    ],
    verbose=1
)

Epoch 1/100
[1m14/20[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 8ms/step - accuracy: 1.0000 - loss: 0.0060
Epoch 1: val_loss improved from inf to 0.20224, saving model to model_checkpoints/best_model_early_stopping.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.0060 - val_accuracy: 0.9375 - val_loss: 0.2022
Epoch 2/100
[1m15/20[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 8ms/step - accuracy: 1.0000 - loss: 0.0053
Epoch 2: val_loss did not improve from 0.20224
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0055 - val_accuracy: 0.9375 - val_loss: 0.2045
Epoch 3/100
[1m15/20[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 8ms/step - accuracy: 1.0000 - loss: 0.0063
Epoch 3: val_loss did not improve from 0.20224
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0061 - val_accuracy: 0.9375 - val_

In [22]:
def demonstrate_early_stopping_scenarios(X_train, y_train, X_test, y_test):
    """
    Demonstrates different early stopping scenarios and their effects
    """
    # Create checkpoint directory
    checkpoint_dir = 'model_checkpoints'
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    # Scenario 1: Basic early stopping
    print("\nScenario 1: Basic Early Stopping")
    model1 = create_model((X_train.shape[1],))
    early_stopping1 = EarlyStopping(
        monitor='val_loss',
        patience=3,
        verbose=1
    )
    history1 = model1.fit(
        X_train, y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping1],
        verbose=1
    )

    # Scenario 2: Early stopping with baseline
    print("\nScenario 2: Early Stopping with Baseline Target")
    model2 = create_model((X_train.shape[1],))
    early_stopping2 = EarlyStopping(
        monitor='val_loss',
        baseline=0.3,  # Stop if we achieve this target loss
        patience=3,
        verbose=1
    )
    history2 = model2.fit(
        X_train, y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping2],
        verbose=1
    )

    # Scenario 3: Early stopping with best weights restoration
    print("\nScenario 3: Early Stopping with Best Weights Restoration")
    model3 = create_model((X_train.shape[1],))
    early_stopping3 = EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    )
    checkpoint3 = ModelCheckpoint(
        filepath=os.path.join(checkpoint_dir, 'scenario3_best_model.keras'),
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    )
    history3 = model3.fit(
        X_train, y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping3, checkpoint3],
        verbose=1
    )

    return {
        'basic': (model1, history1),
        'baseline': (model2, history2),
        'restore_best': (model3, history3)
    }


# Run different scenarios
results = demonstrate_early_stopping_scenarios(X_train, y_train, X_test, y_test)

# You can now compare the results
for scenario, (model, history) in results.items():
    val_loss = min(history.history['val_loss'])
    epochs_run = len(history.history['loss'])
    print(f"\nScenario: {scenario}")
    print(f"Best validation loss: {val_loss:.4f}")
    print(f"Training stopped after {epochs_run} epochs")


Scenario 1: Basic Early Stopping
Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6752 - loss: 0.7237 - val_accuracy: 0.8062 - val_loss: 0.4603
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8016 - loss: 0.4364 - val_accuracy: 0.8687 - val_loss: 0.3593
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8617 - loss: 0.3414 - val_accuracy: 0.8687 - val_loss: 0.3299
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8990 - loss: 0.2680 - val_accuracy: 0.8875 - val_loss: 0.3057
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9033 - loss: 0.2518 - val_accuracy: 0.8875 - val_loss: 0.2863
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9288 - loss: 0.2184 - val_accuracy: 0.8938 - val_loss: 0.2799

In [20]:
# Cleanup 
cleanup_directory(checkpoint_dir)

Directory 'model_checkpoints' deleted successfully.


## Key differences from the ModelCheckpoint example:

 - EarlyStopping is focused on preventing overfitting by monitoring training metrics
 - It includes parameters like patience and min_delta for fine-tuning stopping conditions
 - The restore_best_weights option can automatically restore the model to its best state
 - It can monitor different metrics (loss, accuracy) with different modes (min, max)
 - We set a higher number of epochs (100) since early stopping will prevent unnecessary training