<div style="text-align:center;font-size:22pt; font-weight:bold;color:white;border:solid black 1.5pt;background-color:#1e7263;">
    Understanding Model History Object: Classification Task
</div>

In [None]:
# ======================================================================= #
# Course: Deep Learning Complete Course (CS-501)
# Author: Dr. Saad Laouadi
# Institution: Quant Coding Versity Academy
# Date: December 25, 2024
#
# ==========================================================
# Lesson: Understanding Model History Object in Keras
#         Analyzing and Visualizing Training Progress
# ==========================================================
# ## Learning Objectives
# This guide will enable you to:
# 1. Access and interpret the model.fit() history object
# 2. Extract and analyze training metrics over epochs
# 3. Visualize training and validation metrics
# 4. Identify optimal training epochs and model performance
# 5. Detect overfitting through history analysis
# =======================================================================
#          Copyright © Dr. Saad Laouadi 2024
# =======================================================================

In [None]:
# ==================================================== #
#        Load Required Libraries
# ==================================================== #

import os  
import json

# Disable Metal API Validation
os.environ["METAL_DEVICE_WRAPPER_TYPE"] = "0"  


# Load the following libraries
# numpy, pandas, matplotlib, seaborn
 



# from sklearn load the next modules
# from datasets --> load_breast_cancer
#               -->  train_test_split
#               --> StandardScaler
# Load the next metrix confusion_matrix,
#                              classification_report,
#                              roc_curve,
#                              auc,
#                              precision_recall_curve
#                             

# import tensorflow
import tensorflow as tf

# Set styling for better visualization
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("="*72)

%reload_ext watermark
%watermark -a "Dr. Saad Laouadi" -u -d -m

print("="*72)
print("Imported Packages and Their Versions:")
print("="*72)

%watermark -iv
print("="*72)

# Global Config
SAVE = False

In [1]:
# ==================================================== #
#        Load and Explore the data
# ==================================================== #
# Load Breast Cancer dataset from sklearn

# Create X dataframe from the loaded data

# Create y target

# Perform quick look at the data
# Check the shape
# Get the statistics of the features



In [None]:
# ==================================================== #
#        Prepare the data for model
# ==================================================== #
# Split the data



In [None]:
# Scale the features



In [None]:
# ==================================================== #
#        Model Definition and Compilation
# ==================================================== #
# Define a simple model with two hidden layers, the first with 
# 64 nodes and the second with 32
# add output layer




# Compile with `adam` and use multiple metrics: 'accuracy', 'Precision', 'Recall', 'AUC'



In [None]:
# ==================================================== #
#        Model Training with History
# ==================================================== #
# Train the model and store history
# use 50 epochs and validation split of 0.2




In [None]:
# ==================================================== #
#        Detailed History Object Analysis
# ==================================================== #

# Understanding the basic structure
# Print the type of the history object


# List history methods and attributes (properties)


In [2]:
# Get available metrics


In [None]:
# ==================================================== #
#        Analysis of Training History
# ==================================================== #
# Convert history to DataFrame and print the head of it



In [None]:
# print the epochs


In [None]:
# Add epoch numbers to the dataframe



In [None]:
# Find best epochs for different metrics
best_acc_epoch = history_df['val_accuracy'].idxmax() + 1
best_loss_epoch = history_df['val_loss'].idxmin() + 1

print("\nBest Performance:")
print(f"Best validation accuracy at epoch {best_acc_epoch}")
print(f"Best validation loss at epoch {best_loss_epoch}")

In [None]:
# ==================================================== #
#        Advanced Metrics Visualization
# ==================================================== #
# Create subplots for all metrics
metrics = ['loss', 'accuracy', 'Precision', 'Recall', 'AUC']
n_metrics = len(metrics)
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.ravel()

for idx, metric in enumerate(metrics):
    ax = axes[idx]
    ax.plot(history_df['epoch'], history_df[metric], 
            label=f'Training {metric}', linewidth=2)
    ax.plot(history_df['epoch'], history_df[f'val_{metric}'], 
            label=f'Validation {metric}', linewidth=2)
    ax.set_title(f'Model {metric.capitalize()} over Time')
    ax.set_xlabel('Epoch')
    ax.set_ylabel(metric.capitalize())
    ax.legend()
    
    # Mark best epoch
    if metric == 'loss':
        best_epoch = history_df[f'val_{metric}'].idxmin() + 1
        best_value = history_df[f'val_{metric}'].min()
    else:
        best_epoch = history_df[f'val_{metric}'].idxmax() + 1
        best_value = history_df[f'val_{metric}'].max()
        
    ax.axvline(x=best_epoch, color='r', linestyle='--', alpha=0.3)
    ax.plot(best_epoch, best_value, 'ro')
    ax.text(best_epoch, best_value, f'Best: {best_value:.3f}',
            horizontalalignment='right', verticalalignment='bottom')

plt.tight_layout()
plt.show()

In [None]:
# ==================================================== #
#        Training-Validation Gap Analysis
# ==================================================== #
# Calculate gaps between training and validation
for metric in metrics:
    history_df[f'{metric}_gap'] = (history_df[metric] - 
                                  history_df[f'val_{metric}']).abs()
    
plt.figure(figsize=(12, 6))
for metric in metrics:
    plt.plot(history_df['epoch'], history_df[f'{metric}_gap'], 
             label=f'{metric} gap')
plt.title('Training-Validation Gaps Over Time')
plt.xlabel('Epoch')
plt.ylabel('Absolute Gap')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# ==================================================== #
#        Learning Rate Analysis
# ==================================================== #
# Plot loss vs learning rate
plt.figure(figsize=(10, 6))
plt.plot(history_df['loss'].diff(), label='Loss change')
plt.plot(history_df['val_loss'].diff(), label='Val loss change')
plt.title('Rate of Loss Change')
plt.xlabel('Epoch')
plt.ylabel('Loss Difference')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# ==================================================== #
#        Export Training History
# ==================================================== #
# Save history to CSV for further analysis
history_df.to_csv('training_history.csv', index=False)

# Create training summary
training_summary = {
    'total_epochs': len(history_df),
    'best_val_accuracy': history_df['val_accuracy'].max(),
    'best_val_loss': history_df['val_loss'].min(),
    'final_val_accuracy': history_df['val_accuracy'].iloc[-1],
    'final_val_loss': history_df['val_loss'].iloc[-1],
    'best_accuracy_epoch': best_acc_epoch,
    'best_loss_epoch': best_loss_epoch
}

print("\nTraining Summary:")
for metric, value in training_summary.items():
    print(f"{metric}: {value}")

In [None]:
# ============================================================== #
#          Advanced Analysis of History Object
# ============================================================== #


# ==================================================== #
#        Epoch Information
# ==================================================== #
# Number of epochs trained
print("\nNumber of epochs trained:", len(history.epoch))
print("Epoch numbers:", history.epoch)  # 0-based indexing

In [None]:
# ==================================================== #
#        Model Configuration
# ==================================================== #
# Access model configuration from history
print("\nModel Configuration:")
print("Model used:", history.model)
print("\nModel Parameters:", history.model.count_params())
print("\nModel Architecture:")
history.model.summary()

In [None]:
# ==================================================== #
#        Training Params
# ==================================================== #
# Extract training parameters
print("\nTraining Parameters:", history.params.keys())
print()
print("Batch Size:", history.params.get('batch_size'))
print("Number of Epochs:", history.params.get('epochs'))
print("Steps per Epoch:", history.params.get('steps'))
print("Samples:", history.params.get('samples'))
print("Validation Steps:", history.params.get('validation_steps'))

In [None]:
# ==================================================== #
#        Metric Trends Analysis
# ==================================================== #
# Calculate trend statistics
for metric in history.history.keys():
    if 'val' not in metric:               # Only for training metrics
        values = history.history[metric]
        print(f"\n{metric.capitalize()} Trends:")
        print(f"\t{'Starting value:':<20} {values[0]:.4f}")
        print(f"\t{'Ending value:':<20} {values[-1]:.4f}")
        print(f"\t{'Improvement:':<20} {values[-1] - values[0]:.4f}")
        print(f"\t{'Best value:':<20} {max(values):.4f}")
        print(f"\t{'Worst value:':<20} {min(values):.4f}")

In [None]:
# ==================================================== #
#        Learning Rate Tracking
# ==================================================== #
# If learning rate was tracked
if 'lr' in history.history:
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['lr'])
    plt.title('Learning Rate over Time')
    plt.xlabel('Epoch')
    plt.ylabel('Learning Rate')
    plt.grid(True)
    plt.show()

In [None]:
# ==================================================== #
#        Stop Reason Analysis
# ==================================================== #
# If early stopping was used
if hasattr(history, 'stop_training'):
    print("\nEarly Stopping Information:")
    print("Training stopped early:", history.stop_training)

In [None]:
# ==================================================== #
#        Custom Metrics Storage
# ==================================================== #
# Store specific metrics for later use
best_performance = {
    'best_accuracy': max(history.history['accuracy']),
    'best_val_accuracy': max(history.history['val_accuracy']),
    'best_loss': min(history.history['loss']),
    'best_val_loss': min(history.history['val_loss']),
    'final_accuracy': history.history['accuracy'][-1],
    'final_val_accuracy': history.history['val_accuracy'][-1]
}

In [None]:
# Save to file
if SAVE:
    with open('training_metrics.json', 'w') as f:
        json.dump(best_performance, f, indent=4)

In [None]:
# ==================================================== #
#        Convergence Analysis
# ==================================================== #
# Check if model converged
def check_convergence(history, threshold=0.001):
    """Check if training has converged based on loss improvement"""
    losses = history.history['loss']
    if len(losses) < 5:  # Need at least 5 epochs to check
        return False
    
    # Check last 5 epochs for improvement
    recent_improvement = abs(losses[-1] - losses[-5])
    return recent_improvement < threshold

converged = check_convergence(history)
print("\nConvergence Analysis:")
print(f"Model converged: {converged}")