In [1]:
import pandas as pd
import numpy as np
import os
import sys
import math
import gc
from PIL import Image
import cv2
import ast
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard
import keras_cv
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import random
import warnings
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["TF_CPP_MIN_VLOG_LEVEL"] = "0"
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

2025-10-24 20:50:44.251161: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761339044.482143      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761339044.548777      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
print("Available devices: \n")
for device in tf.config.list_logical_devices():
    print(device.name, device.device_type)

Available devices: 

/device:CPU:0 CPU
/device:GPU:0 GPU


I0000 00:00:1761339064.079120      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [3]:
def get_strategy():
    """
    Detects and returns the best TensorFlow distribution strategy.
    - TPUStrategy for TPU(s)
    - MirroredStrategy for GPU(s)
    - Default strategy for CPU
    """
    try:
        # Try TPU first
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='local')
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.TPUStrategy(tpu)
        print("Using TPU strategy:", type(strategy).__name__)
    except Exception:
        # If TPU not available, try GPU
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            strategy = tf.distribute.MirroredStrategy()
            print("Using GPU strategy:", type(strategy).__name__)
        else:
            # Fallback CPU
            strategy = tf.distribute.get_strategy()
            print("No TPU/GPU found. Using CPU strategy:", type(strategy).__name__)

    print("REPLICAS:", strategy.num_replicas_in_sync)
    return strategy

# Call it
strategy = get_strategy()

Using GPU strategy: MirroredStrategy
REPLICAS: 1


In [4]:
print("REPLICAS:", strategy.num_replicas_in_sync)
print("TensorFlow version:", tf.__version__)

REPLICAS: 1
TensorFlow version: 2.18.0


In [5]:
SEED = 28
def seed_everything(SEED):
    random.seed(SEED)
    tf.random.set_seed(SEED)
    np.random.seed(SEED)
    print('For reproducing purposes, everything seeded !')

seed_everything(SEED)

For reproducing purposes, everything seeded !


In [6]:
DATA_DIR = '/kaggle/input/global-wheat-detection'
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
TEST_DIR = os.path.join(DATA_DIR, 'test')
CSV_PATH = os.path.join(DATA_DIR, 'train.csv')

In [11]:
num_train_images = len(os.listdir(TRAIN_DIR))
num_test_images = len(os.listdir(TEST_DIR))
print(f'Number of total images on Train directory: {num_train_images}')
print(f'Number of test images on Test directory: {num_test_images}')

Number of total images on Train directory: 3422
Number of test images on Test directory: 10


In [12]:
img_path = os.path.join(TRAIN_DIR, os.listdir(TRAIN_DIR)[0])
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
print(img.shape)

(1024, 1024, 3)


In [7]:
df = pd.read_csv(CSV_PATH)
df.head()

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1


In [39]:
df.shape

(147793, 5)

In [11]:
averaged_bbox_per_img = df.groupby('image_id').size().mean()
print(f'Average Bounding boxes exists in an image: {int(averaged_bbox_per_img)}')

Average Bounding boxes exists in an image: 43


In [None]:
bbox_counts = df.groupby('image_id').size()
print('Statistics of wheat head per image:')
print(bbox_counts.describe().T)

In [None]:
plt.figure(figsize= (12, 6))
sns.histplot(bbox_counts, bins= 30, kde= True, color= 'purple')
plt.title('Number of Bounding Boxes per Image')
plt.xlabel('Number of Bounding Boxes')
plt.ylabel('Number of images')

plt.show()

In [8]:
annonated_ids = set(df['image_id'].unique())
print(f'Number of images with Wheat: {len(annonated_ids)}')

Number of images with Wheat: 3373


In [9]:
all_images = [f.replace('.jpg', '') for f in os.listdir(TRAIN_DIR)]
empty_images = [f for f in all_images if f not in annonated_ids]
print(f'Number of images without annonation(Wheat): {len(empty_images)}')
print(f'Example of empty image: {empty_images[0]}')

Number of images without annonation(Wheat): 49
Example of empty image: dec23c826


In [None]:
empty_img_frac = len(empty_images) / len(os.listdir(TRAIN_DIR))
annonated_img_frac = len(annonated_ids) / len(os.listdir(TRAIN_DIR))

print(f'Empty images percentage: {empty_img_frac:.4f}')
print(f'Annonated images percentage: {annonated_img_frac:.4f}')
print("Empty images aren't dominated, no problem with them at all!")

In [None]:
img_path = os.path.join(TRAIN_DIR, empty_images[0] + '.jpg')
img = Image.open(img_path)

plt.imshow(img)
plt.axis('off')
plt.title(f'Example of empty: {empty_images[0]}.jpg')
plt.show()

In [None]:
def show_images(num_images= 6, cols= 3):
    files = os.listdir(TRAIN_DIR)[:num_images]
    rows = (num_images + cols - 1) // cols

    fig = plt.figure(figsize= (cols* 4, rows* 4))
    
    for i, fname in enumerate(files):
        img_path = os.path.join(TRAIN_DIR, fname)
        img = Image.open(img_path)
        img = img.resize((256, 256))

        plt.subplot(rows, cols, i+1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(fname)
        
    plt.tight_layout()
    plt.show()

In [None]:
show_images(num_images= 6, cols= 3)

In [10]:
df['bbox'] = df['bbox'].apply(ast.literal_eval)
df['x_min'] = df['bbox'].apply(lambda b: b[0])
df['y_min'] = df['bbox'].apply(lambda b: b[1])
df['x_max'] = df['bbox'].apply(lambda b: b[0] + b[2])
df['y_max'] = df['bbox'].apply(lambda b: b[1] + b[3])

In [None]:
df.head()

In [15]:
df['width'] = df['x_max'] - df['x_min']
df['height'] = df['y_max'] - df['y_min']
print(df[['width' ,'height']].describe().T)

           count       mean        std  min   25%   50%    75%    max
width   147793.0  84.435060  35.553450  1.0  62.0  78.0  100.0  987.0
height  147793.0  76.927306  33.853186  1.0  55.0  71.0   91.0  714.0


In [None]:
df.head()

In [None]:
fig, ax = plt.subplots(1, 2, figsize= (12, 6))
for i, col in enumerate(['width', 'height']):
    sns.histplot(df[col], bins= 50, kde= True, ax= ax[i])
    ax[i].set_title(f'Bounding Boxes {col} distribution')
    ax[i].set_xlim((0, 250))
    ax[i].set_xlabel(f'{col} pixels')
    ax[i].set_ylabel('Count')

In [None]:
def show_images_with_bboxes(df, image_dir, nrows, ncols):
    # Pick random images from the train dir
    files = os.listdir(image_dir)
    selected_files = random.sample(files, nrows * ncols)

    fig, axs = plt.subplots(nrows, ncols, figsize=(4*ncols, 4*nrows))

    for ax, fname in zip(axs.flatten(), selected_files):
        image_id = fname.replace('.jpg', '')

        # Load image
        img_path = os.path.join(image_dir, fname)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Get bboxes if exists
        if image_id in df['image_id'].values:
            bboxes = df[df['image_id'] == image_id][['x_min', 'y_min', 'x_max', 'y_max']].values
            for (x_min, y_min, x_max, y_max) in bboxes:
                start_point = (int(x_min), int(y_min))
                end_point = (int(x_max), int(y_max))
                color = (255, 0, 0)
                thickness = 2
                cv2.rectangle(img, start_point, end_point, color, thickness)

        # Show image
        ax.imshow(img)
        ax.axis('off')
        ax.set_title(fname, fontsize=8)

    plt.tight_layout()
    plt.show()

In [None]:
show_images_with_bboxes(df, TRAIN_DIR, 2, 2)

In [11]:
grouped = df.groupby('image_id')[['x_min', 'y_min', 'x_max', 'y_max']].apply(
    lambda x: x.values.tolist()
)

In [12]:
data_dicts = []
for image_id, bboxes in grouped.items():
    img_path = os.path.join(TRAIN_DIR, f'{image_id}.jpg')
    bboxes = np.array(bboxes, dtype=np.float32).reshape(-1, 4)
    data_dicts .append({
        'image_path': img_path,
         'bboxes': bboxes
    })

print(data_dicts[:2])

[{'image_path': '/kaggle/input/global-wheat-detection/train/00333207f.jpg', 'bboxes': array([[   0.,  654.,   37.,  765.],
       [   0.,  817.,  135.,  915.],
       [   0.,  192.,   22.,  273.],
       [   4.,  342.,   67.,  380.],
       [  82.,  334.,  164.,  415.],
       [  30.,  296.,   78.,  345.],
       [ 176.,  316.,  246.,  370.],
       [ 176.,  126.,  245.,  177.],
       [ 203.,   38.,  245.,  123.],
       [   3.,  142.,   92.,  200.],
       [ 236.,    0.,  296.,   25.],
       [ 329.,    0.,  404.,   57.],
       [ 796.,    0.,  865.,   96.],
       [ 659.,   24.,  718.,  114.],
       [ 540.,   81.,  680.,  161.],
       [ 233.,  152.,  322.,  203.],
       [ 422.,  159.,  480.,  209.],
       [ 462.,  153.,  667.,  217.],
       [ 468.,  210.,  576.,  263.],
       [ 417.,  235.,  553.,  323.],
       [ 287.,  257.,  343.,  308.],
       [ 283.,  322.,  400.,  398.],
       [ 393.,  329.,  567.,  429.],
       [ 606.,  346.,  653.,  403.],
       [ 611.,  286.,  681

In [13]:
train_dicts, val_dicts = train_test_split(
    data_dicts,
    test_size= 0.2,
    random_state= SEED,
    shuffle= True
)
print('Train and Validation dicts created successfully! 20% of data stored for validation')

Train and Validation dicts created successfully! 20% of data stored for validation


In [14]:
for fname in empty_images:
    img_path = os.path.join(TRAIN_DIR, f'{fname}.jpg')
    bboxes = np.zeros((0, 4), dtype=np.float32)
    train_dicts.append({
        'image_path': img_path,
        'bboxes': bboxes
    })

random.shuffle(train_dicts)

In [15]:
IMG_SIZE = (1024, 1024)
NUM_CLASSES = 1
GLOBAL_CLIPNORM = 10.0
WARMUP_LR= 1e-3
FINE_TUNE_BB_LR = 5e-5
FINE_TUNE_MODEL_LR = 1e-5
WARMUP_EPOCH = 10
INTERMEDIATE_EPOCH = WARMUP_EPOCH + 20
FINAL_EPOCH = INTERMEDIATE_EPOCH + 50
MAX_BOXES = 120
AUTO = tf.data.AUTOTUNE
BATCH_SIZE_PER_REPLICA = 4
BUFFER_SHUFFLE_SIZE = 512
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
print(f'Global Batch size: {BATCH_SIZE}')

Global Batch size: 4


In [36]:
# This generator will read one image and its boxes at a time
def data_generator(dict_list):
    for sample in dict_list:
        image = tf.io.read_file(sample['image_path'])
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.convert_image_dtype(image, tf.float32)
        
        b = np.array(sample['bboxes'], dtype=np.float32).reshape(-1, 4)
        n = b.shape[0]
        bounding_boxes = {
            'boxes': tf.convert_to_tensor(b, dtype=tf.float32),          # (N,4)
            'classes': tf.zeros((n,), dtype=tf.float32)                  # (N,)
        }
        yield {'images': image, 'bounding_boxes': bounding_boxes}

In [17]:
random_flip = keras_cv.layers.RandomFlip(
    mode="horizontal", 
    bounding_box_format="xyxy"
)
# Use a gentler scaling factor to avoid making small wheat heads disappear
train_resizing = keras_cv.layers.JitteredResize(
    target_size=IMG_SIZE, 
    scale_factor=(0.9, 1.1), 
    bounding_box_format="xyxy"
)

val_resizing = keras_cv.layers.JitteredResize(
    target_size=IMG_SIZE, 
    scale_factor=(1.0, 1.0), # Reduced range
    bounding_box_format="xyxy"
)

random_color_jitter = keras_cv.layers.RandomColorJitter(
    value_range= (0.0, 1.0),
    brightness_factor= 0.2,
    contrast_factor= 0.2,
    saturation_factor= 0.2,
    hue_factor= 0.1
)

random_color_deg = keras_cv.layers.RandomColorDegeneration(
    factor= (0.2, 0.7),
    seed= SEED
)


In [18]:
# --- LIGHTER AUGMENTATIONS for Phase 2 and 3 ---

# Keep flip and a gentler resize
random_flip_light = keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xyxy")
train_resizing_light = keras_cv.layers.JitteredResize(
    target_size=IMG_SIZE, 
    scale_factor=(0.95, 1.05), # Reduced range
    bounding_box_format="xyxy"
)

# Reduce the intensity of color jitter
random_color_jitter_light = keras_cv.layers.RandomColorJitter(
    value_range=(0.0, 1.0),
    brightness_factor=0.1, # Reduced from 0.2
    contrast_factor=0.1,   # Reduced from 0.2
    saturation_factor=0.1, # Reduced from 0.2
    hue_factor=0.05        # Reduced from 0.1
)

# Reduce the intensity of color degeneration
random_color_deg_light = keras_cv.layers.RandomColorDegeneration(
    factor=(0.1, 0.4), # Tighter, weaker range than (0.2, 0.7)
    seed=SEED
)

In [47]:
# Strong augmentations
augmenter_strong = tf.keras.Sequential([
    keras_cv.layers.JitteredResize(target_size=IMG_SIZE, scale_factor=(0.9, 1.1), bounding_box_format="xyxy"),
    keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xyxy"),
    keras_cv.layers.RandomColorJitter(value_range=(0.0, 1.0),
                                      brightness_factor=0.2, contrast_factor=0.2,
                                      saturation_factor=0.2, hue_factor=0.1),
    keras_cv.layers.RandomColorDegeneration(factor=(0.2, 0.7), seed=SEED),
])

# Lighter augmentations
augmenter_light = tf.keras.Sequential([
    keras_cv.layers.JitteredResize(target_size=IMG_SIZE, scale_factor=(0.95, 1.05), bounding_box_format="xyxy"),
    keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xyxy"),
    keras_cv.layers.RandomColorJitter(value_range=(0.0, 1.0),
                                      brightness_factor=0.1, contrast_factor=0.1,
                                      saturation_factor=0.1, hue_factor=0.05),
    keras_cv.layers.RandomColorDegeneration(factor=(0.1, 0.4), seed=SEED),
])

In [16]:
def prepare_ragged_inputs(dicts):
    image_paths = tf.ragged.constant(
        [s["image_path"] for s in dicts], dtype=tf.string
    )

    bbox_list = [
        np.array(s["bboxes"], dtype=np.float32).reshape(-1, 4)
        for s in dicts
    ]

    # Replace with real class IDs if you have them
    classes_list = [
        np.zeros((len(b)), dtype=np.float32) for b in bbox_list
    ]

    bboxes  = tf.ragged.constant(bbox_list, ragged_rank=1, dtype=tf.float32)
    classes = tf.ragged.constant(classes_list, ragged_rank=1, dtype=tf.float32)

    return image_paths, classes, bboxes


In [17]:
def load_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    return tf.image.convert_image_dtype(image, tf.float32)

def load_dataset(image_path, classes_rt, boxes_rt):
    image = load_image(image_path)
    bounding_boxes = {"boxes": boxes_rt, "classes": classes_rt}
    return {"images": image, "bounding_boxes": bounding_boxes}

In [18]:
# Strong augmentations
augmenter_strong = tf.keras.Sequential([
    keras_cv.layers.JitteredResize(
        target_size=IMG_SIZE, scale_factor=(0.9, 1.1), bounding_box_format="xyxy"
    ),
    keras_cv.layers.Mosaic(bounding_box_format="xyxy", name= 'mosaic'),
    keras_cv.layers.RandomFlip(
        mode="horizontal", bounding_box_format="xyxy"
    ),
    keras_cv.layers.RandomColorJitter(
        value_range=(0.0, 1.0),
        brightness_factor=0.2, contrast_factor=0.2,
        saturation_factor=0.2, hue_factor=0.1
    ),
    keras_cv.layers.RandomColorDegeneration(
        factor=(0.2, 0.7), seed=SEED
    ),
])

# Light augmentations (for later phases)
augmenter_light = tf.keras.Sequential([
    keras_cv.layers.JitteredResize(
        target_size=IMG_SIZE, scale_factor=(0.95, 1.05), bounding_box_format="xyxy"
    ),
    keras_cv.layers.RandomFlip(
        mode="horizontal", bounding_box_format="xyxy"
    ),
    keras_cv.layers.RandomColorJitter(
        value_range=(0.0, 1.0),
        brightness_factor=0.1, contrast_factor=0.1,
        saturation_factor=0.1, hue_factor=0.05
    ),
    keras_cv.layers.RandomColorDegeneration(
        factor=(0.1, 0.4), seed=SEED
    ),
])

# Validation (deterministic) resizing
augmenter_val = tf.keras.Sequential([
    keras_cv.layers.JitteredResize(
        target_size=IMG_SIZE, scale_factor=(1.0, 1.0), bounding_box_format="xyxy"
    )
])


In [19]:
def augment_strong(inputs):
    inputs = train_resizing(inputs)
    inputs = random_flip(inputs)
    inputs = random_color_jitter(inputs)
    inputs = random_color_deg(inputs)

    return inputs

In [20]:
# New augmentation pipeline function
def augment_light(inputs):
    inputs = train_resizing_light(inputs)
    inputs = random_flip_light(inputs)
    inputs = random_color_jitter_light(inputs)
    inputs = random_color_deg_light(inputs)
    return inputs

In [19]:
def dict_to_tuple(inputs):
    return inputs['images'], inputs['bounding_boxes']

In [20]:
def create_strong_dataset(dict_list, batch_size=BATCH_SIZE):
    
    image_paths, classes, bboxes = prepare_ragged_inputs(dict_list)

    ds = tf.data.Dataset.from_tensor_slices((image_paths, classes, bboxes))
    ds = ds.shuffle(BUFFER_SHUFFLE_SIZE)
    ds = ds.map(load_dataset, num_parallel_calls=AUTO)
    ds = ds.ragged_batch(batch_size, drop_remainder=True)
    ds = ds.map(augmenter_strong, num_parallel_calls=AUTO)
    ds = ds.map(dict_to_tuple, num_parallel_calls=AUTO)
    
    return ds.prefetch(AUTO)

In [21]:
def augment_val(inputs):
    # Only applies resizing for validation stability
    return val_resizing(inputs)

In [22]:
def create_light_dataset(dict_list, batch_size=BATCH_SIZE, is_training= False):

    image_paths, classes, bboxes = prepare_ragged_inputs(dict_list)
    
    ds = tf.data.Dataset.from_tensor_slices((image_paths, classes, bboxes))

    ds = ds.map(load_dataset, num_parallel_calls=AUTO)

    if is_training:
        ds = ds.shuffle(BUFFER_SHUFFLE_SIZE)
        ds = ds.ragged_batch(batch_size, drop_remainder=True)
        ds = ds.map(augmenter_light, num_parallel_calls=AUTO)
    else:
        ds = ds.ragged_batch(batch_size, drop_remainder=True)
        ds = ds.map(augmenter_val, num_parallel_calls=AUTO)
    
    ds = ds.map(dict_to_tuple, num_parallel_calls=AUTO)
    
    
    return ds.prefetch(AUTO)

In [23]:
# --- Find this existing code in your notebook ---
train_strong_dataset = create_strong_dataset(train_dicts)
val_dataset = create_light_dataset(val_dicts, is_training= False)
train_light_dataset = create_light_dataset(train_dicts, is_training= True)

print('✅ Train and Validation and light augmented Train datasets are ready !')
print('Light Augmented dataset for Mid-Tune and Fine-Tune phases created !')

✅ Train and Validation and light augmented Train datasets are ready!
Light Augmented dataset for Mid-Tune and Fine-Tune phases created !


In [36]:
for images, bounding_boxes in train_light_dataset.take(3):
    bboxes = bounding_boxes["boxes"]
    classes = bounding_boxes["classes"]

    print("Images shape:", images.shape)
    print("Boxes shape:", bboxes.shape)
    print("Classes shape:", classes.shape)

Images shape: (4, 1024, 1024, 3)
Boxes shape: (4, None, 4)
Classes shape: (4, None)
Images shape: (4, 1024, 1024, 3)
Boxes shape: (4, None, 4)
Classes shape: (4, None)
Images shape: (4, 1024, 1024, 3)
Boxes shape: (4, None, 4)
Classes shape: (4, None)


In [26]:
def count_empty(ds, name):
    empty = 0
    total = 0
    for images, bboxes in ds.unbatch().take(3000):  # adjust limit if needed
        total += 1
        if tf.shape(bboxes["boxes"])[0] == 0:
            empty += 1
    print(f"{name}: {empty}/{total} samples have no boxes")

count_empty(train_light_dataset, "train_light_dataset")
count_empty(train_strong_dataset, "train_strong_dataset")
count_empty(val_dataset, "val_dataset")

train_light_dataset: 0/2744 samples have no boxes


InvalidArgumentError: {{function_node __wrapped__IteratorGetNext_output_types_3_device_/job:localhost/replica:0/task:0/device:CPU:0}} TypeError: `generator` yielded an element of shape (120, 4) where an element of shape (None,) was expected.
Traceback (most recent call last):

  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/data/ops/from_generator_op.py", line 235, in generator_py_func
    raise TypeError(

TypeError: `generator` yielded an element of shape (120, 4) where an element of shape (None,) was expected.


	 [[{{node PyFunc}}]] [Op:IteratorGetNext] name: 

In [24]:
NUM_TRAIN_IMAGES = len(train_dicts)
NUM_VAL_IMAGES   = len(val_dicts)

steps_per_epoch  = math.ceil(NUM_TRAIN_IMAGES / BATCH_SIZE)
validation_steps = math.ceil(NUM_VAL_IMAGES / BATCH_SIZE)

print(f"Steps per Epoch: {steps_per_epoch}")
print(f"Validation Steps: {validation_steps}")

Steps per Epoch: 687
Validation Steps: 169


In [None]:
# After creating your datasets...
del train_dicts, val_dicts, data_dicts, annonated_ids, all_images, empty_images
import gc
gc.collect() # Force garbage collection

In [None]:
def visualize_dataset(dataset, rows=2, cols=2, bounding_box_format="xyxy"):
    images, bounding_boxes = next(iter(dataset.take(1)))
    num_images = rows * cols

    # First grid
    fig, axs = plt.subplots(rows, cols, figsize=(4*cols, 4*rows))
    axs = axs.flatten()
    for i in range(num_images):
        axs[i].imshow(images[i].numpy())
        axs[i].set_title('Raw Image')
        axs[i].axis('off')
    plt.tight_layout()
    plt.show()

    # Rescale images to [0,255] for keras_cv plotting
    images_255 = tf.cast(images * 255.0, tf.uint8)

    keras_cv.visualization.plot_bounding_box_gallery(
        images_255,
        y_true=bounding_boxes,
        value_range=(0, 255),   # <-- important
        rows=rows,
        cols=cols,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
    )
    plt.tight_layout()
    plt.show()
# Usage
visualize_dataset(train_strong_dataset, rows=2, cols=2)
visualize_dataset(val_dataset, rows=2, cols=2)

In [25]:
def create_model():
    backbone = keras_cv.models.YOLOV8Backbone.from_preset(
        'yolo_v8_m_backbone_coco',
        name= 'yolov8_backbone'
    )

    model = keras_cv.models.YOLOV8Detector(
        num_classes= NUM_CLASSES,
        bounding_box_format= 'xyxy',
        fpn_depth= 3,
        backbone= backbone,
        name= 'yolov8_detector'
    )
    model.summary()
    return model

In [28]:
with strategy.scope():
    
    model = create_model()
    for layer in model.backbone.layers:
        layer.trainable = False

    # Freeze BN stats explicitly
    for layer in model.backbone.layers:
        if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False

    for layer in model.layers: # Iterate through all layers of the detector model
    # Note: We re-check for BN to catch those in the Neck and Head
        if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False

    
    optimizer = tf.keras.optimizers.AdamW(
    learning_rate= WARMUP_LR,
    weight_decay= 1e-4,
    beta_1= 0.9,
    beta_2= 0.999,
    global_clipnorm= GLOBAL_CLIPNORM)

    classification_loss = keras_cv.losses.FocalLoss()
    model.compile(
        optimizer= optimizer,
        classification_loss= classification_loss,
        box_loss= 'ciou',
        steps_per_execution= 32 if isinstance(strategy, tf.distribute.TPUStrategy) else 1
    )

In [80]:
# Take one batch
example_batch = next(iter(train_strong_dataset.take(1)))
images, bounding_boxes = example_batch  # unpack tuple

# Evaluate
try:
    model.evaluate(train_strong_dataset.take(1), verbose=True)
except Exception as e:
    print("Your model is not compatible with the dataset you defined earlier.")
    print("Error:", e)
else:
    # Predict using the images dict
    predictions = model.predict(images, verbose=True)


W0000 00:00:1761324149.581559      36 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: -8 } dim { size: 2048 } dim { size: 2048 } dim { size: 3 } } } inputs { dtype: DT_FLOAT shape { dim { size: -7 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -7 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } value { dtype: DT_INT32 tensor_shape { dim { size: 2 } } int_val: 1024 } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 4 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -7 } dim { 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21s/step - box_loss: 3.5738 - class_loss: 4.8318e-05 - loss: 3.5739
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step


In [29]:
class EvaluateCOCOMetricsCallback(tf.keras.callbacks.Callback):
    def __init__(self, data, save_path):
        super().__init__()
        self.data = data
        self.metrics = keras_cv.metrics.BoxCOCOMetrics(
            bounding_box_format="xyxy",
            evaluate_freq=1e9,  # We will control evaluation timing manually
        )
        self.save_path = save_path
        self.best_map = -1.0

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        self.metrics.reset_state()

        # ---- START: MODIFIED SECTION ----
        # 1. Create lists to hold all ground truth and prediction data
        y_true_list = []
        y_pred_list = []

        # 2. Iterate through the entire validation dataset to collect data
        for images, y_true in self.data:
            y_pred = self.model.predict(images, verbose=0)
            y_true_list.append(y_true)
            y_pred_list.append(y_pred)

        # 3. Concatenate all batches into single, large ragged tensors
        y_true_concat = {
            'boxes': tf.concat([item['boxes'] for item in y_true_list], axis=0),
            'classes': tf.concat([item['classes'] for item in y_true_list], axis=0)
        }
        # Note: model prediction includes 'confidence', which we also need to concatenate
        y_pred_concat = {
            'boxes': tf.concat([item['boxes'] for item in y_pred_list], axis=0),
            'classes': tf.concat([item['classes'] for item in y_pred_list], axis=0),
            'confidence': tf.concat([item['confidence'] for item in y_pred_list], axis=0)
        }
        # ---- END: MODIFIED SECTION ----

        # 4. Update the metric's state ONCE with the full dataset
        self.metrics.update_state(y_true_concat, y_pred_concat)

        # 5. Get the final results
        metrics = self.metrics.result(force=True)
        logs.update(metrics)

        current_map = metrics["MaP"]
        
        # Manually print the validation metrics
        print(f"\nEpoch {epoch+1}: Validation Metrics")
        for key, value in metrics.items():
            print(f"  {key}: {value:.4f}")
            
        if current_map > self.best_map:
            self.best_map = current_map
            self.model.save(self.save_path)
            print(f"✅ Validation MaP improved to {current_map:.4f}. Model saved to {self.save_path}")

        return logs

In [30]:
phase1_saved_path = "/kaggle/working/warmup_best_model.keras"
coco_cb = EvaluateCOCOMetricsCallback(val_dataset, 
                                      save_path= phase1_saved_path,
                                      )
early_stopping_cb = EarlyStopping(
    monitor= 'MaP',
    patience= 3,
    restore_best_weights= True,
    mode= 'max'
)

reduce_lr_cb = ReduceLROnPlateau(
    monitor= 'MaP',
    patience= 3,
    factor= 0.66,
    min_lr= WARMUP_LR * 0.1,
    verbose= 1
)

tb_cb = TensorBoard(
    log_dir= '/kaggle/working/logs',
    histogram_freq= 1
)

callbacks = [
    coco_cb,
    early_stopping_cb,
    reduce_lr_cb,
    tb_cb
]

In [31]:
# Set the number of epochs for this training phase
print("--- Starting Phase 1: Warmup Training ---")
# Fit the model to the training data
history = model.fit(train_strong_dataset.repeat(), 
                    validation_data= val_dataset.repeat(),
                    epochs= WARMUP_EPOCH,
                    callbacks= [callbacks],
                    steps_per_epoch= steps_per_epoch,
                    validation_steps= validation_steps)

--- Starting Phase 1: Warmup Training ---


W0000 00:00:1761339200.270994      36 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: -67 } dim { size: -68 } dim { size: -69 } dim { size: -70 } } } inputs { dtype: DT_FLOAT shape { dim { size: -8 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -8 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } value { dtype: DT_INT32 tensor_shape { dim { size: 2 } } int_val: 1024 } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 4 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -8 } dim {

Epoch 1/10


W0000 00:00:1761339215.681492      36 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: -67 } dim { size: -68 } dim { size: -69 } dim { size: -70 } } } inputs { dtype: DT_FLOAT shape { dim { size: -8 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -8 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } value { dtype: DT_INT32 tensor_shape { dim { size: 2 } } int_val: 1024 } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 4 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -8 } dim {

[1m687/687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 880ms/step - box_loss: 0.9615 - class_loss: 9.0402e-06 - loss: 0.9615
Epoch 1: Validation Metrics
  MaP: 0.0000
  MaP@[IoU=50]: 0.0000
  MaP@[IoU=75]: 0.0000
  MaP@[area=small]: 0.0000
  MaP@[area=medium]: 0.0000
  MaP@[area=large]: 0.0000
  Recall@[max_detections=1]: 0.0000
  Recall@[max_detections=10]: 0.0000
  Recall@[max_detections=100]: 0.0000
  Recall@[area=small]: 0.0000
  Recall@[area=medium]: 0.0000
  Recall@[area=large]: 0.0000
✅ Validation MaP improved to 0.0000. Model saved to /kaggle/working/warmup_best_model.keras
[1m687/687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m850s[0m 1s/step - box_loss: 0.9605 - class_loss: 9.0305e-06 - loss: 0.9605 - val_box_loss: 0.0000e+00 - val_class_loss: 0.0000e+00 - val_loss: 0.0000e+00 - MaP: 0.0000e+00 - MaP@[IoU=50]: 0.0000e+00 - MaP@[IoU=75]: 0.0000e+00 - MaP@[area=small]: 0.0000e+00 - MaP@[area=medium]: 0.0000e+00 - MaP@[area=large]: 0.0000e+00 - Recall@[max_detect

KeyboardInterrupt: 

In [29]:
START_UNFREEZE_LAYER_NAME = 'stack4_downsample_conv'
with strategy.scope():
    print("Loading model from warmup phase...")
    model = tf.keras.models.load_model(
        '/kaggle/input/wheat-detection/keras/default/1/phase1_best_model.keras',
            custom_objects = {
                'YOLOV8Detector': keras_cv.models.YOLOV8Detector,
                'YOLOV8Backbone': keras_cv.models.YOLOV8Backbone
            }
    )
    print("Model loaded successfully. Ready for Mid-Tune phase !")
    
    model.backbone.trainable = True
    unfreeze_checkpoint = False

    for layer in model.backbone.layers:
        if layer.name == START_UNFREEZE_LAYER_NAME:
            unfreeze_checkpoint = True
        if unfreeze_checkpoint:
            layer.trainable = True
        else:
            layer.trainable = False

        if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False

    for layer in model.layers: # Iterate through all layers of the detector model
    # Note: We re-check for BN to catch those in the Neck and Head
        if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False
    
    num_phase2_epochs = INTERMEDIATE_EPOCH - WARMUP_EPOCH
    decay_steps = int(steps_per_epoch * num_phase2_epochs)
    learning_rate = tf.keras.optimizers.schedules.CosineDecay(
        initial_learning_rate=FINE_TUNE_BB_LR,
        decay_steps=decay_steps,
        alpha=0.1 # End LR will be 10% of initial LR (5e-6)
    )

    optimizer = tf.keras.optimizers.AdamW(
        learning_rate = learning_rate,
        weight_decay = 1e-4,
        beta_1 = 0.9,
        beta_2 = 0.999,
        global_clipnorm = GLOBAL_CLIPNORM
    )

    classification_loss = keras_cv.losses.FocalLoss()
    
    model.compile(
        optimizer = optimizer,
        classification_loss = classification_loss,
        box_loss = 'ciou',
        steps_per_execution= 32 if isinstance(strategy, tf.distribute.TPUStrategy) else 1
    )
    print("\n--- Model configured for Phase 2: Mid-Tune ---")

Loading model from warmup phase...
Model loaded successfully. Ready for Mid-Tune phase !

--- Model configured for Phase 2: Mid-Tune ---


In [30]:
phase2_saved_path = "/kaggle/working/midtune_best_model.keras"
coco_cb = EvaluateCOCOMetricsCallback(val_dataset, 
                                       phase2_saved_path)
early_stopping_cb = EarlyStopping(
    monitor= 'MaP',
    patience= 5,
    restore_best_weights= True,
    mode= 'max'
)

tb_cb = TensorBoard(
    log_dir= '/kaggle/working/logs',
    histogram_freq= 1
)

callbacks = [
    coco_cb,
    early_stopping_cb,
    tb_cb
]

In [31]:
print("--- Starting Phase 2: Mid-Tune Training ---")
final_history = model.fit(
    train_light_dataset.repeat(),
    epochs= INTERMEDIATE_EPOCH,
    initial_epoch= WARMUP_EPOCH,
    validation_data= val_dataset.repeat(),
    steps_per_epoch= steps_per_epoch,
    validation_steps= validation_steps,
    callbacks= callbacks
)

--- Starting Phase 2: Mid-Tune Training ---
Epoch 11/30


I0000 00:00:1761062280.765429      97 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m687/687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 530ms/step - box_loss: 1.1718 - class_loss: 3.0198e-06 - loss: 1.1718
Epoch 11: Validation Metrics
  MaP: 0.4798
  MaP@[IoU=50]: 0.8474
  MaP@[IoU=75]: 0.4832
  MaP@[area=small]: 0.0848
  MaP@[area=medium]: 0.4800
  MaP@[area=large]: 0.4954
  Recall@[max_detections=1]: 0.0159
  Recall@[max_detections=10]: 0.1481
  Recall@[max_detections=100]: 0.5838
  Recall@[area=small]: 0.2017
  Recall@[area=medium]: 0.5836
  Recall@[area=large]: 0.6012
✅ Validation MaP improved to 0.4798. Model saved to /kaggle/working/midtune_best_model.keras
[1m687/687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m634s[0m 880ms/step - box_loss: 1.1718 - class_loss: 3.0198e-06 - loss: 1.1718 - val_box_loss: 1.1342 - val_class_loss: 3.0032e-06 - val_loss: 1.1342 - MaP: 0.4798 - MaP@[IoU=50]: 0.8474 - MaP@[IoU=75]: 0.4832 - MaP@[area=small]: 0.0848 - MaP@[area=medium]: 0.4800 - MaP@[area=large]: 0.4954 - Recall@[max_detections=1]: 0.0159 - Recall@[m

InvalidArgumentError: Graph execution error:

Detected at node yolov8_label_encoder_1/cond/mul_5 defined at (most recent call last):
  File "/usr/lib/python3.11/threading.py", line 1002, in _bootstrap

  File "/usr/lib/python3.11/threading.py", line 1045, in _bootstrap_inner

  File "/usr/local/lib/python3.11/dist-packages/keras_cv/src/models/object_detection/yolo_v8/yolo_v8_detector.py", line 532, in test_step

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 92, in test_step

  File "/usr/local/lib/python3.11/dist-packages/keras/src/trainers/trainer.py", line 387, in _compute_loss

  File "/usr/local/lib/python3.11/dist-packages/keras_cv/src/models/object_detection/yolo_v8/yolo_v8_detector.py", line 555, in compute_loss

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/layer.py", line 908, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras_cv/src/models/object_detection/yolo_v8/yolo_v8_label_encoder.py", line 248, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/core.py", line 1033, in cond

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/core.py", line 987, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/core.py", line 979, in call_fn

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/core.py", line 993, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/core.py", line 240, in cond

  File "/usr/local/lib/python3.11/dist-packages/keras_cv/src/models/object_detection/yolo_v8/yolo_v8_label_encoder.py", line 250, in <lambda>

  File "/usr/local/lib/python3.11/dist-packages/keras_cv/src/models/object_detection/yolo_v8/yolo_v8_label_encoder.py", line 111, in assign

required broadcastable shapes
	 [[{{node yolov8_label_encoder_1/cond/mul_5}}]] [Op:__inference_multi_step_on_iterator_75803]

In [38]:
with strategy.scope():
    print("Loading model from mid-tune phase...")
    model = tf.keras.models.load_model(
        '/kaggle/input/wheat-detection/keras/default/2/midtune_best_model.keras',
            custom_objects = {
                'YOLOV8Detector': keras_cv.models.YOLOV8Detector,
                'YOLOV8Backbone': keras_cv.models.YOLOV8Backbone
            }
    )
    print("Model loaded successfully. Ready for Fine-Tune phase !")
    
    model.backbone.trainable = True

    for layer in model.backbone.layers:
        if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False

    for layer in model.layers: # Iterate through all layers of the detector model
    # Note: We re-check for BN to catch those in the Neck and Head
        if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False
    
    num_phase2_epochs = FINAL_EPOCH - INTERMEDIATE_EPOCH
    decay_steps = int(steps_per_epoch * num_phase2_epochs)
    learning_rate = tf.keras.optimizers.schedules.CosineDecay(
        initial_learning_rate=FINE_TUNE_MODEL_LR,
        decay_steps=decay_steps,
        alpha=0.1 # End LR will be 10% of initial LR (5e-6)
    )

    optimizer = tf.keras.optimizers.AdamW(
        learning_rate = learning_rate,
        weight_decay = 1e-4,
        beta_1 = 0.9,
        beta_2 = 0.999,
        global_clipnorm = GLOBAL_CLIPNORM
    )

    classification_loss = keras_cv.losses.FocalLoss()
    
    model.compile(
        optimizer = optimizer,
        classification_loss = classification_loss,
        box_loss = 'ciou',
        steps_per_execution= 32 if isinstance(strategy, tf.distribute.TPUStrategy) else 1
    )
    print("\n--- Model configured for Phase 3: Fine-Tune ---")

Loading model from mid-tune phase...
Model loaded successfully. Ready for Fine-Tune phase !

--- Model configured for Phase 3: Fine-Tune ---


In [39]:
phase3_saved_path = "/kaggle/working/best_model.keras"
coco_cb = EvaluateCOCOMetricsCallback(val_dataset, 
                                       phase3_saved_path)
early_stopping_cb = EarlyStopping(
    monitor= 'MaP',
    patience= 8,
    restore_best_weights= True,
    mode= 'max'
)

tb_cb = TensorBoard(
    log_dir= '/kaggle/working/logs',
    histogram_freq= 1
)

callbacks = [
    coco_cb,
    early_stopping_cb,
    tb_cb
]

In [41]:
print("--- Starting Phase 3: Fine-Tune Training ---")
final_history = model.fit(
    train_light_dataset.repeat(),
    epochs= FINAL_EPOCH,
    initial_epoch= INTERMEDIATE_EPOCH,
    validation_data= val_dataset.repeat(),
    steps_per_epoch= steps_per_epoch,
    validation_steps= validation_steps,
    callbacks= callbacks
)

--- Starting Phase 3: Fine-Tune Training ---
Epoch 31/80


I0000 00:00:1761066018.639254      95 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m687/687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583ms/step - box_loss: 1.0839 - class_loss: 2.9265e-06 - loss: 1.0839
Epoch 31: Validation Metrics
  MaP: 0.4957
  MaP@[IoU=50]: 0.8621
  MaP@[IoU=75]: 0.5055
  MaP@[area=small]: 0.0821
  MaP@[area=medium]: 0.4943
  MaP@[area=large]: 0.5206
  Recall@[max_detections=1]: 0.0163
  Recall@[max_detections=10]: 0.1511
  Recall@[max_detections=100]: 0.5951
  Recall@[area=small]: 0.2198
  Recall@[area=medium]: 0.5923
  Recall@[area=large]: 0.6224
✅ Validation MaP improved to 0.4957. Model saved to /kaggle/working/best_model.keras
[1m687/687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m675s[0m 933ms/step - box_loss: 1.0838 - class_loss: 2.9265e-06 - loss: 1.0838 - val_box_loss: 1.1213 - val_class_loss: 2.9558e-06 - val_loss: 1.1213 - MaP: 0.4957 - MaP@[IoU=50]: 0.8621 - MaP@[IoU=75]: 0.5055 - MaP@[area=small]: 0.0821 - MaP@[area=medium]: 0.4943 - MaP@[area=large]: 0.5206 - Recall@[max_detections=1]: 0.0163 - Recall@[max_detec

InvalidArgumentError: Graph execution error:

Detected at node jittered_resize_1_1/SelectV2_4 defined at (most recent call last):
<stack traces unavailable>
Detected at node jittered_resize_1_1/SelectV2_4 defined at (most recent call last):
<stack traces unavailable>
2 root error(s) found.
  (0) INVALID_ARGUMENT:  Error in user-defined function passed to ParallelMapDatasetV2:14 transformation with iterator: Iterator::Root::Prefetch::ForeverRepeat[0]::Prefetch::ParallelMapV2::MapAndBatch::ParallelMapV2:  condition [1,41], then [1,96], and else [] must be broadcastable
	 [[{{node jittered_resize_1_1/SelectV2_4}}]]
	 [[MultiDeviceIteratorGetNextFromShard]]
	 [[RemoteCall]]
	 [[IteratorGetNext]]
	 [[StatefulPartitionedCall/yolov8_label_encoder_1/cond/pivot_t/_436/_33]]
  (1) INVALID_ARGUMENT:  Error in user-defined function passed to ParallelMapDatasetV2:14 transformation with iterator: Iterator::Root::Prefetch::ForeverRepeat[0]::Prefetch::ParallelMapV2::MapAndBatch::ParallelMapV2:  condition [1,41], then [1,96], and else [] must be broadcastable
	 [[{{node jittered_resize_1_1/SelectV2_4}}]]
	 [[MultiDeviceIteratorGetNextFromShard]]
	 [[RemoteCall]]
	 [[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_multi_step_on_iterator_82178]

In [None]:
def visualize_detections(model, dataset, bounding_box_format):
    images, y_true = next(iter(dataset.take(1)))
    y_pred = model.predict(images)

    # y_pred is already in dict format (boxes, classes, confidence)
    keras_cv.visualization.plot_bounding_box_gallery(
        images,
        value_range=(0, 255),
        bounding_box_format=bounding_box_format,
        y_true=y_true,
        y_pred=y_pred,   # no need for to_ragged
        scale=4,
        rows=2,
        cols=2,
        show=True,
        font_scale=0.7,
    )
visualize_detections(yolo_model, val_dataset, bounding_box_format= 'xyxy')

In [None]:
def load_and_preprocess(img_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    return img

In [None]:
example_batch = next(iter(train_dataset.take(1)))
img, bb = example_batch
# Run with model.predict(), not just model()
preds = yolo_model.predict(img)

print(preds)

In [None]:
def preprocess_for_inference(image_path):
    """Loads and resizes a single image for model prediction."""
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, IMG_SIZE)
    return image

In [None]:
test_image_paths = [os.path.join(TEST_DIR, fname) for fname in os.listdir(TEST_DIR)]

# Create a dataset from the file paths
test_ds = tf.data.Dataset.from_tensor_slices(test_image_paths)

# Map the preprocessing function
test_ds = test_ds.map(preprocess_for_inference, num_parallel_calls=tf.data.AUTOTUNE)

# Batch the dataset
BATCH_SIZE = 4 # You can adjust this based on your RAM
test_ds = test_ds.batch(BATCH_SIZE)

# Run prediction on the entire test set
y_preds = yolo_model.predict(test_ds)

In [None]:
import matplotlib.pyplot as plt

def visualize_predictions(image_paths, predictions, count=4, confidence_threshold=0.5):
    """Visualizes model predictions on a set of images."""
    num_images_to_show = min(count, len(image_paths))
    
    # Load the original images for display
    images_to_plot = [np.array(Image.open(p)) for p in image_paths[:num_images_to_show]]
    
    # Extract predictions for the images we'll show
    boxes = predictions['boxes'][:num_images_to_show]
    confidences = predictions['confidence'][:num_images_to_show]
    num_detections = predictions['num_detections'][:num_images_to_show]
    
    # Create a bounding box dictionary suitable for KerasCV's plot function
    y_pred_for_plot = {
        'boxes': [],
        'classes': [],
        'confidence': []
    }

    for i in range(num_images_to_show):
        num_valid = num_detections[i]
        
        # Filter out padded boxes and low-confidence boxes
        valid_indices = confidences[i, :num_valid] >= confidence_threshold
        
        y_pred_for_plot['boxes'].append(boxes[i, :num_valid][valid_indices])
        y_pred_for_plot['classes'].append(np.zeros(np.sum(valid_indices), dtype=int)) # All class 0
        y_pred_for_plot['confidence'].append(confidences[i, :num_valid][valid_indices])

    # Convert lists to ragged tensors for plotting
    y_pred_for_plot['boxes'] = tf.ragged.constant(y_pred_for_plot['boxes'])
    y_pred_for_plot['classes'] = tf.ragged.constant(y_pred_for_plot['classes'])
    y_pred_for_plot['confidence'] = tf.ragged.constant(y_pred_for_plot['confidence'])
    
    # Create preprocessed images for correct box scaling
    preprocessed_images = [preprocess_for_inference(p) for p in image_paths[:num_images_to_show]]
    preprocessed_images = tf.stack(preprocessed_images)

    keras_cv.visualization.plot_bounding_box_gallery(
        preprocessed_images,
        value_range=(0, 255),
        bounding_box_format="xyxy",
        y_pred=y_pred_for_plot,
        scale=4,
        rows=2,
        cols=2,
        font_scale=0.7
    )
    plt.show()

# Visualize predictions on the first few test images
visualize_predictions(test_image_paths, y_preds, count=4, confidence_threshold=0.4)

In [None]:
count = 0
count += sum(1 for layer in model.backbone.layers)
print(count)