In [1]:
import os

In [2]:
%pwd

'c:\\Users\\gupta\\Documents\\GitHub\\Brain_Tumor_Detection\\research'

In [3]:
os.chdir("../")
         

In [4]:
%pwd

'c:\\Users\\gupta\\Documents\\GitHub\\Brain_Tumor_Detection'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class PrepareCallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path

In [6]:
from CNN_Classifier.constants import *
from CNN_Classifier.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])


    
    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
        )

        return prepare_callback_config




In [8]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time

In [15]:
class PrepareCallback:
    def __init__(self, config: PrepareCallbacksConfig):
        self.config = config


    
    @property
    def _create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        # Use Pathlib's `/` operator instead of os.path.join
        tb_running_log_dir = self.config.tensorboard_root_log_dir / f"tb_logs_at_{timestamp}"

        # Convert Pathlib Path to string for TensorBoard callback
        return tf.keras.callbacks.TensorBoard(log_dir=str(tb_running_log_dir))
    

    @property
    def _create_ckpt_callbacks(self):
        return tf.keras.callbacks.ModelCheckpoint(
            filepath=str(self.config.checkpoint_model_filepath),
            save_best_only=True
        )


    def get_tb_ckpt_callbacks(self):
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]


In [16]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()
    
except Exception as e:
    raise e

[2024-09-05 18:46:31,279: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-09-05 18:46:31,289: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-05 18:46:31,294: INFO: common: created directory at: artifacts]
[2024-09-05 18:46:31,298: INFO: common: created directory at: artifacts\prepare_callbacks\checkpoint_dir]
[2024-09-05 18:46:31,302: INFO: common: created directory at: artifacts\prepare_callbacks\tensorboard_log_dir]


Callback in Real Life: A Kitchen Timer
Imagine you're baking a cake, and you use a kitchen timer. You set the timer for 30 minutes, and once the timer goes off, you check if the cake is done. If it's not done, you may decide to keep baking for another 5 minutes. If it's perfect, you take it out of the oven.

In this example, the kitchen timer is a type of callback. It's a helper that:

Keeps track of time (like logging the progress).
Alerts you when it's time to do something (like saving a model or checking accuracy).
Decides what to do next based on the state of the cake (e.g., stop baking, keep baking).
Callback in Deep Learning
In deep learning, callbacks work similarly to the kitchen timer. They automatically do something for you during training at specific moments without you having to intervene manually.

Here’s a simple breakdown:

Logging Callback (TensorBoard):

Example: While the model is training, a callback can log the accuracy and loss values every epoch (round of training). This is like writing down how well the cake is rising every 5 minutes to track the progress.
Why it's useful: You can later look at these logs (using TensorBoard) to see how the model is performing and decide if you need to change anything.
Model Checkpoint Callback (ModelCheckpoint):

Example: Let's say you're training the model for 50 epochs (rounds). Every time the model improves (e.g., higher accuracy or lower loss), the callback saves the model. This is like setting an automatic rule: "If the cake smells better, take a snapshot of it."
Why it's useful: You don’t have to manually check the model after each epoch and decide when to save it. The callback saves the best version for you, so you don’t lose progress if something goes wrong.

Simple Example of Callbacks
Let’s say you’re training a model to recognize handwritten digits (like the famous MNIST dataset). You want to do three things:

Track training progress using logs (to see if the model is improving).
Save the best model during training (so you don’t lose the best version).
Stop training early if the model stops improving (to save time and avoid overfitting).
Here’s how callbacks handle that:

Without Callbacks:
You'd have to manually check the training process after every epoch to log the accuracy and loss.
You'd need to save the model manually if you notice that it has improved.
You’d have to stop training manually if you see it’s not improving.
With Callbacks:
Logging Callback: Every epoch, the model automatically logs accuracy and loss for you to check later in TensorBoard.
Model Checkpoint Callback: The model automatically saves itself every time it performs better than before (based on a condition you define).
Early Stopping Callback: The model can automatically stop training early if it sees that the validation accuracy isn’t improving after a few epochs.

### How Model Checkpoint Works:

1. Checkpoint Based on Performance Improvement:

If you set save_best_only=True in the ModelCheckpoint callback, the model will only save when its performance improves based on a specific metric (e.g., validation accuracy or validation loss).
Suppose you're training for 50 epochs:
At epoch 10, if the validation accuracy improves, the model is saved.
At epoch 20, if the model performs better than it did at epoch 10, the previous checkpoint (from epoch 10) is replaced with the updated, better model.
At epoch 30, if the model’s performance worsens (e.g., higher loss, lower accuracy), the model from epoch 20 is kept, and the model isn't saved again.

2. Checkpoint Based on Performance at Each Epoch:

The ModelCheckpoint checks the model’s performance after every epoch. However, it will only save the model if it performs better than the previous best performance (when save_best_only=True).
If the model’s performance worsens after a certain epoch (higher loss, lower accuracy), the checkpoint will not save the model.
If save_best_only=False, it will save the model after every epoch, regardless of whether the performance improves or not.

### So, What's Right?

Option 2 is closer to the actual behavior:

The ModelCheckpoint callback checks the model’s performance after every epoch and saves the model if the performance improves based on a specified metric (like accuracy or loss).
If the model starts performing worse in later epochs, it won’t save the model, keeping the best version saved so far.
How Does ModelCheckpoint Know When the Model Is Performing Best?
The ModelCheckpoint knows the model is performing better by tracking a specific metric over the epochs. You can define which metric to monitor when creating the checkpoint callback, such as:

Validation Loss: If the validation loss decreases, the model is considered to be improving.
Validation Accuracy: If the validation accuracy increases, the model is considered to be improving.
You specify which metric to track using the monitor parameter in the ModelCheckpoint callback. Commonly used metrics are:

val_loss: Validation loss
val_accuracy: Validation accuracy

Example:

from tensorflow.keras.callbacks import ModelCheckpoint

******* Create a ModelCheckpoint callback

checkpoint_cb = ModelCheckpoint(

    filepath='best_model.h5',  # Where to save the model

    monitor='val_accuracy',    # Metric to monitor

    save_best_only=True,       # Save only if the model improves

    mode='max'                 # 'max' because higher accuracy is better

)

********* Train the model using the callback

history = model.fit(

    X_train, y_train,

    epochs=50,

    validation_data=(X_val, y_val),

    callbacks=[checkpoint_cb]  # Include the callback in training
    
)

### Key Points:

The checkpoint is saved every time the model performs better based on the monitored metric (val_accuracy or val_loss), which is checked after every epoch.
If the model performs worse in subsequent epochs, the previously saved best model will not be replaced.
The model is saved in the location specified by the filepath (e.g., 'best_model.h5').
mode='max' or mode='min':
Use mode='max' if you’re monitoring accuracy or other metrics where higher is better.
Use mode='min' if you’re monitoring loss, where lower is better.
