# Training & Evaluation 
* In this notebook we are going to train a simple object detection CNN from scratch. 
* We'll reuse similar CNN that we used for image classification. 
* We'll use the optimized data generation script to generate the training data. 

In [9]:
import pandas as pd
import numpy as np
from pathlib import Path
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.datasets import mnist
import matplotlib.pyplot as plt
import matplotlib.patches as patches



## validate tensorflow 
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


## Constants

In [10]:
data_dir = Path("..","data")
models_dir = Path("..","models")

In [11]:
import os
import sys
# Build an absolute path from this notebook's parent directory
module_path = os.path.abspath(os.path.join('..'))

# Add to sys.path if not already present
if module_path not in sys.path:
    sys.path.append(module_path)
    
from src import graph_compatible_data_generator,training_utils

## logic to auto reload scripts without restarting the kernel
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Initialize Pipeline

In [12]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

X_tensor = tf.convert_to_tensor(x_train, dtype=tf.float32)
# X_tensor = tf.reshape(X_tensor, shape=(-1, 28, 28, 1))
y_tensor = tf.convert_to_tensor(y_train, dtype=tf.float32)

batch_size = 32
raw_dataset = tf.data.Dataset.from_tensor_slices((X_tensor, y_tensor))

# create a generator for 2 digits
data_gen_2_digits = graph_compatible_data_generator.create_data_generator(2)

processed_dataset_2 = raw_dataset.map(
    data_gen_2_digits).batch(batch_size=batch_size).prefetch(tf.data.AUTOTUNE)

### Validating Pipeline

In [13]:
def visualize_generated_data(batch):
    batched_canvases,predictions = batch
    print(f"batched_canvases shape: {batched_canvases.shape}")
    print(f"predictions shape ", tf.shape(predictions))
    # Get the very first canvas from the batch (shape 100x100)
    # We use .numpy() to convert it from a EagerTensor to a NumPy array for plotting
    canvas_to_show = batched_canvases[0].numpy()
    prediction = predictions[0]

    print(f"Canvas shape: {canvas_to_show.shape}")
    print(f"Single Prediction shape: {prediction.shape}")
    
    
    # # Plot it
    # # --- Create a figure and axis ---
    fig, ax = plt.subplots(1, figsize=(8, 8))
    
    # get the 2 predictions
    for i in range(2):
        bbox = (prediction[i]).numpy() * 100
        
        # flag, x_center, y_center, width, height,
        flag = bbox[0]
        x_center = bbox[1]
        y_center = bbox[2]
        width = bbox[3]
        height = bbox[4]
        
        x_min = x_center - (width / 2)
        y_min = y_center - (width / 2)        
        # print("flag, x_center, y_center, width, height",flag, x_min, y_min, width, height,)
        rect = patches.Rectangle(
            (x_min, y_min),
            width,
            height,
            linewidth=2,
            edgecolor='r',
            facecolor='none'
        )
        
        ax.add_patch(rect)
    # Display the image
    ax.imshow(canvas_to_show, cmap='gray')
    
    
    plt.title("Generated 100x100 Canvas (Test 1)")
    plt.show()


# Get one batch
# Your dataset is batched, so .take(1) gets one full batch
# for batch in processed_dataset_2.take(1):
#     visualize_generated_data(batch=batch)

## Initialize The Model

In [17]:
inputs = tf.keras.Input(shape=(100,100,1),batch_size=batch_size ,name="input_layer")

x = tf.keras.layers.Rescaling(scale=1./255, name="rescaling")(inputs)

x = tf.keras.layers.Conv2D(filters=8, kernel_size=5, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(filters=8, kernel_size=5, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D()(x)

x = tf.keras.layers.Conv2D(filters=8, kernel_size=3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(filters=8, kernel_size=3, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D()(x)

x = tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D()(x)

x = tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu')(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D()(x)

outputs = tf.keras.layers.Conv2D(filters=45, kernel_size=1, padding='same', activation='linear')(x)

# Define the final model by specifying its inputs and outputs
model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.summary()

In [18]:
# step 4: Define the callbacks
checkpoint_filepath = '../models/experiment_1_{epoch:02d}.keras'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='loss',
    mode='min',
    save_best_only=True,
    save_freq="epoch",
    verbose=1,
    )

In [19]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001,clipnorm=1.0),
              loss=training_utils.calculate_model_loss,
              metrics=[training_utils.objectness_metrics, training_utils.bounding_box_metrics, training_utils.classification_metrics])
## step 5: Fit the model
epochs=5

history = model.fit(
  processed_dataset_2,
  epochs=epochs,
  callbacks=[model_checkpoint_callback]
)

Epoch 1/5


I0000 00:00:1761774056.556280   58537 service.cc:152] XLA service 0x7fdd8c017470 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1761774056.556377   58537 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 2080 SUPER, Compute Capability 7.5
2025-10-29 14:40:56.861519: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1761774058.046962   58537 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-10-29 14:40:59.132885: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] Omitted potentially buggy algorithm eng14{k25=0} for conv %cudnn-conv-bias-activation.29 = (f32[32,8,50,50]{3,2,1,0}, u8[0]{0}) custom-call(f32[32,8,50,50]{3,2,1,0} %bitcast.10271, f32[8,8,3,3]{3,2,1,0} %bitcast.8136, f32[8]{0} %bitcast.10365), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_targe

[1m   3/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:05[0m 35ms/step - bounding_box_metrics: 0.1765 - classification_metrics: 2.3033 - loss: 3.6908 - objectness_metrics: 0.6939  

I0000 00:00:1761774063.159836   58537 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step - bounding_box_metrics: 2.2975 - classification_metrics: 2.0403 - loss: 2.5250 - objectness_metrics: 0.1694
Epoch 1: loss improved from inf to 1.99304, saving model to ../models/experiment_1_01.keras
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 123ms/step - bounding_box_metrics: 2.2975 - classification_metrics: 2.0401 - loss: 2.5247 - objectness_metrics: 0.1694
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - bounding_box_metrics: 2.0900 - classification_metrics: 1.1457 - loss: 1.2946 - objectness_metrics: 0.0449
Epoch 2: loss improved from 1.99304 to 1.18336, saving model to ../models/experiment_1_02.keras
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 124ms/step - bounding_box_metrics: 2.0899 - classification_metrics: 1.1457 - loss: 1.2945 - objectness_metrics: 0.0449
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━