In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass

import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import io
from PIL import Image

from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, LearningRateScheduler, ModelCheckpoint, CSVLogger, ReduceLROnPlateau
## یعنی می خوام همینجا بیارش بالا
%load_ext tensorboard
%tensorboard --logdir logs


import os
import matplotlib.pylab as plt
import numpy as np
import math
import datetime
import pandas as pd

print("Version: ", tf.__version__)
tf.get_logger().setLevel('INFO')

Reusing TensorBoard on port 6006 (pid 13596), started 0:30:53 ago. (Use '!kill 13596' to kill it.)

Version:  2.19.0


In [2]:
ds , info = tfds.load('horses_or_humans', as_supervised=True, with_info=True,split=['train[:80%]','train[80%:]','test'])

(train_ds, validation_ds, test_ds) = ds

In [3]:
num_examples = info.splits['train'].num_examples
num_classes = info.features['label'].num_classes

In [4]:
print(num_examples)
print(num_classes)

1027
2


In [5]:
IMAGE_SIZE =(150,150)

In [6]:
def preprocecing (image , label):
    return tf.divide(tf.cast(tf.image.resize(image,IMAGE_SIZE),tf.float32),255.0) ,label

In [7]:
BATCH_SIZE = 32

In [8]:
train_batches = train_ds.shuffle(num_examples//4).map(preprocecing).batch(BATCH_SIZE).prefetch(1)
validation_batches = validation_ds.map(preprocecing).batch(BATCH_SIZE).prefetch(1)
test_batches = test_ds.map(preprocecing).batch(BATCH_SIZE)

In [9]:
image_batch , label_batch = next(iter(train_batches))

In [10]:
print(image_batch.shape ,'&', label_batch.shape)

(32, 150, 150, 3) & (32,)


***

The expression `input_shape=IMAGE_SIZE + (3,)` in Python is a combination of **tuples**, used to specify the **input dimensions of images** for the first layer of a neural network (here, `Conv2D`). Let’s break it down step by step:

***

### 1. The value of `IMAGE_SIZE`

```python
IMAGE_SIZE = (150, 150)
```

This means the input images should be 150 pixels wide and 150 pixels tall.

***

### 2. The expression `(3,)`

This is a **single-element tuple** with the value 3. The number 3 represents the **number of color channels**:

- For a color image: `3` (RGB)
- For a grayscale image: `1`

***

### 3. Adding Tuples

```python
IMAGE_SIZE + (3,)
```

In Python, when you add two tuples with the `+` operator, they are **concatenated**. The result:

```python
(150, 150) + (3,) → (150, 150, 3)
```

***

### 4. Final Result: `input_shape=(150, 150, 3)`

This means that each input image to the network should have the shape:

- **150 pixels height**
- **150 pixels width**
- **3 color channels (RGB)**

This value is passed as a parameter to `Conv2D` so it knows what shape of data to expect.

***

### Summary

```python
input_shape=IMAGE_SIZE + (3,)
```

is a Pythonic way to write `input_shape=(150, 150, 3)` dynamically and flexibly. For example, if you change the image size later, you won't have to manually alter `input_shape`.



In [12]:
def build_model(dense_units, input_shape=IMAGE_SIZE + (3,)):
  model = tf.keras.models.Sequential([
      tf.keras.layers.Input(shape=input_shape),  # ✅ لایه Input به صورت جداگانه
      tf.keras.layers.Conv2D(16, (3, 3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2, 2),
      tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2, 2),
      tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2, 2),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(dense_units, activation='relu'),
      tf.keras.layers.Dense(2, activation='softmax')
  ])
  return model

## TensorBoard Callback

First, we remove the folder if it exists.

In [15]:
import shutil
import datetime
import tensorflow as tf

# 1. اگر موجودیتی به نام "logs" هست (فایل یا پوشه)، آن را حذف کن
if os.path.exists("logs"):
    if os.path.isdir("logs"):
        shutil.rmtree("logs")
    else:
        os.remove("logs")

In [16]:
# 2. یک پوشه‌ی خالی به نام "logs" بساز
os.makedirs("logs", exist_ok=True)

In [17]:
# 3. مسیر لاگ جدید با Timestamp
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
# (اختیاری) مطمئن شو پوشه‌ی زیرمجموعه هم ساخته می‌شود
os.makedirs(logdir, exist_ok=True)

In [18]:
# قبل از حذف
print("Before:", os.listdir("."))      # لیست فایل و پوشه‌های جاری
# (پس از حذف/ساخت)
print("After:", os.listdir("logs"))     # باید خالی باشد یا فقط timestamp زیرمجموعه


Before: ['.anaconda', '.cache', '.conda', '.condarc', '.continuum', '.ipynb_checkpoints', '.ipython', '.jupyter', '.keras', '.matplotlib', '.virtual_documents', '.vscode', 'anaconda_projects', 'ansel', 'AppData', 'Application Data', 'callbacks_me.ipynb', 'Contacts', 'Cookies', 'Desktop', 'Documents', 'Downloads', 'Favorites', 'Intel', 'Links', 'Local Settings', 'logs', 'Music', 'My Documents', 'NetHood', 'NTUSER.DAT', 'ntuser.dat.LOG1', 'ntuser.dat.LOG2', 'NTUSER.DAT{a2332f18-cdbf-11ec-8680-002248483d79}.TM.blf', 'NTUSER.DAT{a2332f18-cdbf-11ec-8680-002248483d79}.TMContainer00000000000000000001.regtrans-ms', 'NTUSER.DAT{a2332f18-cdbf-11ec-8680-002248483d79}.TMContainer00000000000000000002.regtrans-ms', 'ntuser.ini', 'OneDrive', 'Pictures', 'PrintHood', 'Recent', 'Saved Games', 'Searches', 'SendTo', 'Start Menu', 'Templates', 'tensorflow_datasets', 'Videos']
After: ['20250518-151721']


In [19]:
# 4. تعریف و کامپایل مدل
model = build_model(dense_units=256)
model.compile(
    optimizer='sgd',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [20]:
# 5. ساخت Callback با نام آرگومان صحیح
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir, 
                                                      histogram_freq=1,
                                                      update_freq=1,
                                                      )

In [21]:
# 6. آموزش
model.fit(train_batches,
          epochs=10,
          validation_data=validation_batches,
          callbacks=[tensorboard_callback])

Epoch 1/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 57ms/step - accuracy: 0.5045 - loss: 0.6910 - val_accuracy: 0.5171 - val_loss: 0.6779
Epoch 2/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.5854 - loss: 0.6625 - val_accuracy: 0.7366 - val_loss: 0.6314
Epoch 3/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 0.6587 - loss: 0.6242 - val_accuracy: 0.7268 - val_loss: 0.5801
Epoch 4/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.7371 - loss: 0.5622 - val_accuracy: 0.7268 - val_loss: 0.5314
Epoch 5/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.7079 - loss: 0.5637 - val_accuracy: 0.7659 - val_loss: 0.4934
Epoch 6/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - accuracy: 0.7719 - loss: 0.4882 - val_accuracy: 0.7659 - val_loss: 0.4552
Epoch 7/10
[1m26/26[0m [32m━━━━

<keras.src.callbacks.history.History at 0x15663306860>

In [49]:
%tensorboard --logdir logs --port 6007 --bind_all

## Saving Checkpoints

In [82]:
from tensorflow.keras.callbacks import ModelCheckpoint

# ساخت پوشه checkpoints اگر وجود نداشته باشد
checkpoint_dir = 'checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

# تنظیم ModelCheckpoint برای ذخیره در پوشه checkpoints
checkpoint_callback = ModelCheckpoint(
    # در اینجا فقط ویت
    filepath=os.path.join(checkpoint_dir, 'weights.{epoch:02d}-{val_loss:.2f}.keras'),
    verbose=1,
    save_best_only=False,  # یا True اگر فقط بهترین مدل را می‌خواهید
    save_weights_only=False,  # برای ذخیره کل مدل
    monitor="val_loss",
    mode="min"
)

In [84]:
model = build_model(dense_units=256)
model.compile(
    optimizer='sgd',
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy'])
  
model.fit(
    train_batches,
    epochs=5,
    validation_data=validation_batches,
    verbose=2,
    callbacks=[checkpoint_callback]
)

Epoch 1/5

Epoch 1: saving model to checkpoints\weights.01-0.66.keras
26/26 - 1s - 51ms/step - accuracy: 0.6204 - loss: 0.6737 - val_accuracy: 0.6244 - val_loss: 0.6618
Epoch 2/5

Epoch 2: saving model to checkpoints\weights.02-0.61.keras
26/26 - 1s - 37ms/step - accuracy: 0.6898 - loss: 0.6326 - val_accuracy: 0.6537 - val_loss: 0.6105
Epoch 3/5

Epoch 3: saving model to checkpoints\weights.03-0.56.keras
26/26 - 1s - 38ms/step - accuracy: 0.6861 - loss: 0.5912 - val_accuracy: 0.7220 - val_loss: 0.5621
Epoch 4/5

Epoch 4: saving model to checkpoints\weights.04-0.54.keras
26/26 - 1s - 36ms/step - accuracy: 0.7251 - loss: 0.5548 - val_accuracy: 0.7561 - val_loss: 0.5425
Epoch 5/5

Epoch 5: saving model to checkpoints\weights.05-0.63.keras
26/26 - 1s - 38ms/step - accuracy: 0.8017 - loss: 0.4784 - val_accuracy: 0.6049 - val_loss: 0.6321


<keras.src.callbacks.history.History at 0x15644465bd0>

## EarlyStopping

In [95]:
early_stop = EarlyStopping(patience=3,
              min_delta=0.05,
              baseline=0.8,
              mode='min',
              monitor='val_loss',
              restore_best_weights=True,
              verbose=1)

In [97]:
model = build_model(dense_units=256)
model.compile(
    optimizer='sgd',
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy'])
  
model.fit(train_batches, 
          epochs=50, 
          validation_data=validation_batches, 
          verbose=2,
          callbacks=[early_stop])

Epoch 1/50
26/26 - 1s - 47ms/step - accuracy: 0.6058 - loss: 0.6705 - val_accuracy: 0.5756 - val_loss: 0.6592
Epoch 2/50
26/26 - 1s - 35ms/step - accuracy: 0.6983 - loss: 0.6245 - val_accuracy: 0.6098 - val_loss: 0.6158
Epoch 3/50
26/26 - 1s - 35ms/step - accuracy: 0.7129 - loss: 0.5883 - val_accuracy: 0.5610 - val_loss: 0.6325
Epoch 4/50
26/26 - 1s - 38ms/step - accuracy: 0.7470 - loss: 0.5450 - val_accuracy: 0.7122 - val_loss: 0.5461
Epoch 5/50
26/26 - 1s - 35ms/step - accuracy: 0.7579 - loss: 0.5075 - val_accuracy: 0.8927 - val_loss: 0.4358
Epoch 6/50
26/26 - 1s - 36ms/step - accuracy: 0.8273 - loss: 0.4301 - val_accuracy: 0.8683 - val_loss: 0.3832
Epoch 7/50
26/26 - 1s - 36ms/step - accuracy: 0.8382 - loss: 0.3919 - val_accuracy: 0.8780 - val_loss: 0.3366
Epoch 8/50
26/26 - 1s - 37ms/step - accuracy: 0.8771 - loss: 0.3353 - val_accuracy: 0.9317 - val_loss: 0.2553
Epoch 9/50
26/26 - 1s - 36ms/step - accuracy: 0.9221 - loss: 0.2570 - val_accuracy: 0.7220 - val_loss: 0.4351
Epoch 10/5

<keras.src.callbacks.history.History at 0x15646949db0>

## Learning Rate Scheduler

In [102]:
model = build_model(dense_units=256)
model.compile(
    optimizer='sgd',
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy'])

This line of code reduces the learning rate (`lr`) based on a formula:
- `initial_lr`: The initial learning rate.
- `drop`: The reduction factor (e.g., 0.5 for halving).
- `epoch`: The current epoch number.
- `epochs_drop`: The number of epochs after which the learning rate is reduced.

Formula:
- Every `epochs_drop` epochs, the learning rate is multiplied by `drop` (in a power-based manner).
- `math.floor((1+epoch)/epochs_drop)` determines how many times the reduction should be applied.

Example: If `initial_lr=0.1`, `drop=0.5`, `epochs_drop=10`:
- Up to epoch 9: `lr = 0.1`
- Epochs 10–19: `lr = 0.05`
- Epochs 20–29: `lr = 0.025`

Summary: The learning rate is decreased stepwise at specified intervals.

In [118]:
def step_decay(epoch):
	initial_lr = 0.01
	drop = 0.5  
	epochs_drop = 1
	lr = initial_lr * math.pow(drop, math.floor((1+epoch)/epochs_drop))
	return lr

In Python, `math.pow` and `math.floor` are functions from the `math` module used for mathematical calculations. Brief explanation:

- **`math.pow(x, y)`**:
  - **Function**: Calculates `x` raised to the power of `y` (`x^y`).
  - **Output**: Always returns a floating-point number (float).
  - **Example**:
    ```python
    import math
    print(math.pow(2, 3))  # Output: 8.0
    print(math.pow(5, 2))  # Output: 25.0
    ```

- **`math.floor(x)`**:
  - **Function**: Returns the largest integer less than or equal to `x` (rounds down).
  - **Output**: Returns an integer (`int`).
  - **Example**:
    ```python
    import math
    print(math.floor(3.7))   # Output: 3
    print(math.floor(3.2))   # Output: 3
    print(math.floor(-3.2))  # Output: -4
    ```

### Notes:
- To use these, you must import the `math` module with `import math`.
- `math.pow` is similar to the `**` operator, but always returns a float.
- `math.floor` is used to remove the fractional part and get the lower integer.

These functions are commonly used in calculations such as setting the learning rate (like in the previous code).

In [120]:
model.fit(train_batches, 
          epochs=5, 
          validation_data=validation_batches, 
          callbacks=[LearningRateScheduler(step_decay, verbose=1),
                    TensorBoard(log_dir=logdir)])


Epoch 1: LearningRateScheduler setting learning rate to 0.005.
Epoch 1/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.4868 - loss: 0.6932 - val_accuracy: 0.6146 - val_loss: 0.6823 - learning_rate: 0.0050

Epoch 2: LearningRateScheduler setting learning rate to 0.0025.
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.6427 - loss: 0.6742 - val_accuracy: 0.5268 - val_loss: 0.6763 - learning_rate: 0.0025

Epoch 3: LearningRateScheduler setting learning rate to 0.00125.
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.6288 - loss: 0.6640 - val_accuracy: 0.5854 - val_loss: 0.6721 - learning_rate: 0.0012

Epoch 4: LearningRateScheduler setting learning rate to 0.000625.
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.6429 - loss: 0.6610 - val_accuracy: 0.5805 - val_loss: 0.6705 - learning_rate: 6.2500e-0

<keras.src.callbacks.history.History at 0x15645931c30>

This line of code adjusts the learning rate (`lr`) using exponential decay. Summary:

- **`lr`**: The current learning rate (a numeric value).
- **`tf.math.exp(-0.1)`**: Computes the exponential of `-0.1` (i.e., $$ e^{-0.1} \approx 0.9048 $$).
- **Function**: The learning rate (`lr`) is multiplied by the constant $$ e^{-0.1} $$ (approximately 0.9048).
- **Result**: The learning rate decreases exponentially (about 9.52% decrease with each execution).

### Example:

If `lr = 0.1`:

```python
import tensorflow as tf
lr = 0.1
new_lr = lr * tf.math.exp(-0.1)
print(new_lr)  # Output: ~0.09048
```

### Usage:

- This formula is commonly used in machine learning model optimization for gradual learning rate reduction, helping the model converge more precisely over time.
- The value `-0.1` controls the decay rate (more negative values result in faster decay).

Summary: It decreases the learning rate by a fixed exponential coefficient ($$ e^{-0.1} $$).

In [128]:
model = build_model(dense_units=256)
model.compile(
    optimizer='sgd',
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy'])
  
def exp_decay_scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1).numpy()

model.fit(train_batches, 
          epochs=15, 
          validation_data=validation_batches, 
          callbacks=[LearningRateScheduler(exp_decay_scheduler, verbose=1),
                    TensorBoard(log_dir='./log_dir')])


Epoch 1: LearningRateScheduler setting learning rate to 0.009999999776482582.
Epoch 1/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step - accuracy: 0.5431 - loss: 0.6823 - val_accuracy: 0.4341 - val_loss: 0.7528 - learning_rate: 0.0100

Epoch 2: LearningRateScheduler setting learning rate to 0.009999999776482582.
Epoch 2/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - accuracy: 0.6183 - loss: 0.6425 - val_accuracy: 0.6634 - val_loss: 0.6162 - learning_rate: 0.0100

Epoch 3: LearningRateScheduler setting learning rate to 0.009999999776482582.
Epoch 3/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - accuracy: 0.7590 - loss: 0.5668 - val_accuracy: 0.6927 - val_loss: 0.5782 - learning_rate: 0.0100

Epoch 4: LearningRateScheduler setting learning rate to 0.009999999776482582.
Epoch 4/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.7426 - loss: 0.5115 - val_accu

<keras.src.callbacks.history.History at 0x15649b727d0>

## custom callback

How all types of custom callback formats function

In [158]:
class CustomCallback(tf.keras.callbacks.Callback):
    def on_train_begin(self,logs=None):
        keys = list(logs.keys())
        print("Starting training; got log keys: {}".format(keys))
    def on_train_end(self,logs=None):
        keys = list(logs.keys())
        print(f'stoping training : got log keys: {keys}')
    def on_batch_begin (self,epoch, logs =None):
        keys = list(logs.keys())
        print(f"Start epoch {epoch} of training; got log keys: {keys}")     
    def on_epoch_end(self, epoch, logs=None):
        keys = list(logs.keys())
        print("End epoch {} of training; got log keys: {}".format(epoch, keys))

    def on_test_begin(self, logs=None):
        keys = list(logs.keys())
        print("Start testing; got log keys: {}".format(keys))

    def on_test_end(self, logs=None):
        keys = list(logs.keys())
        print("Stop testing; got log keys: {}".format(keys))

    def on_predict_begin(self, logs=None):
        keys = list(logs.keys())
        print("Start predicting; got log keys: {}".format(keys))

    def on_predict_end(self, logs=None):
        keys = list(logs.keys())
        print("Stop predicting; got log keys: {}".format(keys))

    def on_train_batch_begin(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Training: start of batch {}; got log keys: {}".format(batch, keys))

    def on_train_batch_end(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Training: end of batch {}; got log keys: {}".format(batch, keys))

    def on_test_batch_begin(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Evaluating: start of batch {}; got log keys: {}".format(batch, keys))

    def on_test_batch_end(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Evaluating: end of batch {}; got log keys: {}".format(batch, keys))

    def on_predict_batch_begin(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Predicting: start of batch {}; got log keys: {}".format(batch, keys))

    def on_predict_batch_end(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Predicting: end of batch {}; got log keys: {}".format(batch, keys))


In [160]:
model = build_model(dense_units=256)
model.compile(
    optimizer='sgd',
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy'])
model.fit(train_batches, 
          epochs=2, 
          validation_data=validation_batches, 
          verbose=0,
          callbacks=[CustomCallback()])

Starting training; got log keys: []
...Training: start of batch 0; got log keys: []
...Training: end of batch 0; got log keys: ['accuracy', 'loss']
...Training: start of batch 1; got log keys: []
...Training: end of batch 1; got log keys: ['accuracy', 'loss']
...Training: start of batch 2; got log keys: []
...Training: end of batch 2; got log keys: ['accuracy', 'loss']
...Training: start of batch 3; got log keys: []
...Training: end of batch 3; got log keys: ['accuracy', 'loss']
...Training: start of batch 4; got log keys: []
...Training: end of batch 4; got log keys: ['accuracy', 'loss']
...Training: start of batch 5; got log keys: []
...Training: end of batch 5; got log keys: ['accuracy', 'loss']
...Training: start of batch 6; got log keys: []
...Training: end of batch 6; got log keys: ['accuracy', 'loss']
...Training: start of batch 7; got log keys: []
...Training: end of batch 7; got log keys: ['accuracy', 'loss']
...Training: start of batch 8; got log keys: []
...Training: end of 

<keras.src.callbacks.history.History at 0x1564d695540>

In [163]:
res = model.evaluate(
    test_batches, verbose=0, callbacks=[CustomCallback()]
)

res = model.predict(test_batches, batch_size=128, callbacks=[CustomCallback()])

Start testing; got log keys: []
...Evaluating: start of batch 0; got log keys: []
...Evaluating: end of batch 0; got log keys: ['accuracy', 'loss']
...Evaluating: start of batch 1; got log keys: []
...Evaluating: end of batch 1; got log keys: ['accuracy', 'loss']
...Evaluating: start of batch 2; got log keys: []
...Evaluating: end of batch 2; got log keys: ['accuracy', 'loss']
...Evaluating: start of batch 3; got log keys: []
...Evaluating: end of batch 3; got log keys: ['accuracy', 'loss']
...Evaluating: start of batch 4; got log keys: []
...Evaluating: end of batch 4; got log keys: ['accuracy', 'loss']
...Evaluating: start of batch 5; got log keys: []
...Evaluating: end of batch 5; got log keys: ['accuracy', 'loss']
...Evaluating: start of batch 6; got log keys: []
...Evaluating: end of batch 6; got log keys: ['accuracy', 'loss']
...Evaluating: start of batch 7; got log keys: []
...Evaluating: end of batch 7; got log keys: ['accuracy', 'loss']
Stop testing; got log keys: ['accuracy',

### Usage of `logs` dict
The `logs` dict contains the loss value, and all the metrics at the end of a batch or
epoch. Example includes the loss and mean absolute error.

In [199]:
class LossAndErrorPrintingCallback(tf.keras.callbacks.Callback):
    def on_train_batch_end(self, batch, logs=None):
        print(f'Up to batch {batch}, the average loss is {logs["loss"]:7.2f}')

    def on_test_batch_end(self, batch, logs=None):
        print(f'Up to batch {batch}, the average loss is {logs["loss"]:.2f}')

    def on_epoch_end(self, epoch, logs=None):
        print(f'The average loss for epoch {epoch} is {logs["loss"]:7.2f} '
              f'and accuracy is {logs["accuracy"]:7.2f}')

***

Let’s examine this topic in detail:

### Main Line of Code:
```python
"Up to batch {}, the average loss is {:7.2f}.".format(batch, logs["loss"])
```
Here, `:7.2f` is a **format specifier** in Python’s `.format()` method that controls how `logs["loss"]` is displayed. Let’s break it down:

- **`:7`**: The number 7 sets the minimum field width. The value will occupy at least 7 characters. If the number is shorter, it is padded with spaces on the left.
- **`.2f`**: This part means display the number as a floating point value with exactly 2 decimal places.
- **Combination `:7.2f`**: So, the number is shown as a float with 2 decimal digits, and the whole string will be at least 7 characters long (including the decimal point and digits). For example:
    - If `logs["loss"] = 12.345`, the output for `:7.2f` will be: `  12.34` (2 spaces + "12.34" = 7 characters).
    - If `logs["loss"] = 5.6`, it will be: `   5.60` (3 spaces + "5.60" = 7 characters).

***

### Provided Line of Code (f-string):
In the version I suggested:
```python
print(f'Up to batch {batch}, the average loss is {logs["loss"]:.2f}')
```
- **`:2f`**: In this f-string, this only specifies 2 digits after the decimal point but does not set the minimum field width.
- **Result**: The field width is dynamic (just as long as the number needs). For example:
    - If `logs["loss"] = 12.345`, output is `12.34` (no extra spaces).
    - If `logs["loss"] = 5.6`, output is `5.60`.

***

### Difference & How to Match:
The `:7` specifier in the original code ensures a fixed minimum width of 7 characters, but the initial f-string version did not include this. To make the f-string behave identically, set the field width with `:7.2f`.

***

### Fully Equivalent f-string Version:
```python
class LossAndErrorPrintingCallback(tf.keras.callbacks.Callback):
    def on_train_batch_end(self, batch, logs=None):
        print(f'Up to batch {batch}, the average loss is {logs["loss"]:7.2f}')

    def on_test_batch_end(self, batch, logs=None):
        print(f'Up to batch {batch}, the average loss is {logs["loss"]:7.2f}')

    def on_epoch_end(self, epoch, logs=None):
        print(f'The average loss for epoch {epoch} is {logs["loss"]:7.2f} '
              f'and accuracy is {logs["accuracy"]:7.2f}')
```

***

### Additional Notes:
- **Adding `:7.2f` in the f-string**: Now, the field width is 7 characters, exactly like the original version. If the number is shorter, it will be padded with spaces from the left.
- **Example outputs**:
    - If `logs["loss"] = 12.345`, the output is: `  12.34` (exactly like the original).
    - If `logs["accuracy"] = 0.98`, the output is: `   0.98`.

***

### Summary:
- `:7` sets the minimum field width to 7 characters; numbers shorter than that are left-padded with spaces.
- The initial f-string version missed this, but using `{logs["loss"]:7.2f}` (in the f-string) makes it fully equivalent to the original.

In [202]:
model.fit(
    train_batches,
    epochs=2,
    verbose=0,
    callbacks=[LossAndErrorPrintingCallback()],
)

Up to batch 0, the average loss is    0.22
Up to batch 1, the average loss is    0.24
Up to batch 2, the average loss is    0.24
Up to batch 3, the average loss is    0.24
Up to batch 4, the average loss is    0.24
Up to batch 5, the average loss is    0.25
Up to batch 6, the average loss is    0.24
Up to batch 7, the average loss is    0.25
Up to batch 8, the average loss is    0.24
Up to batch 9, the average loss is    0.24
Up to batch 10, the average loss is    0.24
Up to batch 11, the average loss is    0.24
Up to batch 12, the average loss is    0.24
Up to batch 13, the average loss is    0.24
Up to batch 14, the average loss is    0.24
Up to batch 15, the average loss is    0.24
Up to batch 16, the average loss is    0.23
Up to batch 17, the average loss is    0.23
Up to batch 18, the average loss is    0.23
Up to batch 19, the average loss is    0.23
Up to batch 20, the average loss is    0.23
Up to batch 21, the average loss is    0.22
Up to batch 22, the average loss is    0.2

<keras.src.callbacks.history.History at 0x1564ea54730>

In [206]:
res = model.evaluate(
    test_batches,
    verbose=0,
    callbacks=[LossAndErrorPrintingCallback()],
)

Up to batch 0, the average loss is 0.88
Up to batch 1, the average loss is 0.87
Up to batch 2, the average loss is 1.06
Up to batch 3, the average loss is 0.99
Up to batch 4, the average loss is 1.02
Up to batch 5, the average loss is 0.98
Up to batch 6, the average loss is 1.11
Up to batch 7, the average loss is 1.11


***

## Using the `self.model` Attribute

In addition to receiving log information during the invocation of a Callback method, these Callbacks also have access to the model associated with the current training, evaluation, or inference round via `self.model`.

Here are some things you can do with `self.model` in a Callback:

- Set `self.model.stop_training = True` to immediately stop the training process.
- Change optimizer parameters (such as `self.model.optimizer.learning_rate`) through `self.model.optimizer`.
- Save the model at certain intervals.
- Log the output of `model.predict()` on some test samples at the end of each epoch to monitor accuracy during training.
- Extract and visualize intermediate feature representations at the end of each epoch to observe what the model is learning over time.
- Etc.

Let’s look at these concepts in practice with a few examples.

***

### A Callback for Detecting Overfitting

Let’s review a Callback that measures the ratio between the validation loss and the training loss. If this ratio gets too high, it might signal overfitting—since the validation loss is no longer decreasing while the training loss keeps dropping, causing the ratio to rise. In this case, training should be stopped to prevent further overfitting.

#### Practical Example:

To implement this idea, we can write a custom Callback that calculates this ratio and stops training if it exceeds a certain threshold:

```python
import tensorflow as tf

class OverfittingDetectorCallback(tf.keras.callbacks.Callback):
    def __init__(self, threshold=1.5):
        super(OverfittingDetectorCallback, self).__init__()
        self.threshold = threshold  # The loss ratio threshold

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        val_loss = logs.get('val_loss')
        train_loss = logs.get('loss')

        if val_loss is not None and train_loss is not None:
            ratio = val_loss / train_loss
            print(f'Ratio of val_loss to train_loss at epoch {epoch}: {ratio:.2f}')
            if ratio > self.threshold:
                print(f'Overfitting detected! Ratio {ratio:.2f} exceeds threshold {self.threshold}. Stopping training...')
                self.model.stop_training = True

# Using the callback in your model
model = tf.keras.Sequential([...])  # Define your model layers
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=50,
          callbacks=[OverfittingDetectorCallback(threshold=1.5)])
```

***

#### Explanation:

- **Calculating the Ratio**: At the end of each epoch, the ratio of validation loss to training loss is computed.
- **Overfitting Detection**: If this ratio exceeds the threshold (e.g., 1.5), overfitting is assumed and training is stopped.
- **Access to `self.model`**: The training loop is halted by setting `self.model.stop_training = True`.
- **Flexibility**: The threshold is adjustable and can be fine-tuned for the project’s needs.

This Callback helps prevent wasting computation resources by stopping training when overfitting is detected.

In [22]:
class DetectOverfittingCallback(tf.keras.callbacks.Callback):
    def __init__(self,threshold=0.7):
        super(DetectOverfittingCallback,self).__init__()
        self.threshold = threshold
        
    def on_epoch_end(self,epoch,logs=None):
        ratio = logs['val_loss']/logs['loss']
        print(f"Epoch: {epoch}, Val/Train loss ratio: {ratio:.2f}")

        if ratio >self.threshold:
            print("Stopping training...")
            self.model.stop_training = True

In [None]:
model = build_model(dense_units=256)
model.compile(
    optimizer='sgd',
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy'])
model.fit(train_batches, 
          epochs=10, 
          validation_data=validation_batches, 
          verbose=0,
          callbacks=[DetectOverfittingCallback()])

# Visualizing at the end of each epoch

We do this to make sure that our accuracy is not high only for a single class, and that a high accuracy score is not just because of one class.

In fact, our model should not remain unprocessed for a particular class.

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
# گام 1: آماده‌سازی داده‌ها
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # نرمال‌سازی داده‌ها

# انتخاب 5 نمونه برای ویژوال‌سازی
num_samples = 5
test_samples = x_test[:num_samples]
true_labels = y_test[:num_samples]

# گام 2: تعریف مدل
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

In [None]:
# گام 3: کامپایل مدل
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# گام 4: تعریف Callback سفارشی
class VisualizePredictionsCallback(Callback):
    def __init__(self, test_samples, true_labels, num_samples):
        super(VisualizePredictionsCallback, self).__init__()
        self.test_samples = test_samples
        self.true_labels = true_labels
        self.num_samples = num_samples

    def on_epoch_end(self, epoch, logs=None):
        # گام 5: پیش‌بینی مدل
        predictions = self.model.predict(self.test_samples, verbose=0)
        predicted_labels = np.argmax(predictions, axis=1)

        # گام 6: ویژوال‌سازی
        plt.figure(figsize=(15, 3))
        for i in range(self.num_samples):
            plt.subplot(1, num_samples, i + 1)
            plt.imshow(test_samples[i], cmap='gray')
            plt.title(f'True: {true_labels[i]}\nPred: {predicted_labels[i]}')
            plt.axis('off')
        plt.suptitle(f'Epoch {epoch + 1} Predictions')
        plt.show()

In [None]:
# گام 7: آموزش مدل با Callback
model.fit(x_train, y_train,
          epochs=5,
          validation_data=(x_test, y_test),
          callbacks=[VisualizePredictionsCallback(test_samples, true_labels, num_samples)])