In [None]:
!pip install --quiet keras-cv tensorflow
!pip install --quiet keras pillow pandas matplotlib opencv-python

In [1]:
from datasets import *
from debugging import *
from analytics import *
from augmentations import *
from models import *
import keras_cv
import keras
import tensorflow as tf

init()

2025-08-06 01:10:05.098712: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-06 01:10:05.139180: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754439005.165860  397305 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754439005.175057  397305 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754439005.198980  397305 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Memory growth enabled for GPU(s)


# Datasets

In [4]:
train_ds = composite_ds(
    multi_coin_ds([('./datasets/synthetic/annotations.xml', None)], togray=True, d_img=False, d_bb=False,
                  tsz=(1024, 1024)),
    multi_coin_ds([('./datasets/synthetic/annotations.xml', None)], togray=True, d_img=False, d_bb=False,
                  tsz=(1024, 1024)).map(apply_directional_blur(45, 11)),
    multi_coin_ds([('./datasets/synthetic/annotations.xml', None)], togray=True, d_img=False, d_bb=False,
                  tsz=(1024, 1024)).map(apply_gaussian_blur(kernel_size=15))
).batch(4).prefetch(tf.data.AUTOTUNE)

val_ds = multi_coin_ds([
    ('./datasets/90/annotations.xml', 'PNG'),
    ('./datasets/60/annotations.xml', 'PNG'),
    ('./datasets/30/annotations.xml', 'PNG'),
    ('./datasets/090/annotations.xml', 'PNG'),
    ('./datasets/190/annotations.xml', 'PNG'),
    ('./datasets/clear1/annotations.xml', None),
    ('./datasets/clear2/annotations.xml', None),
], visibility=['clear'], format='xywh', d_img=False, d_bb=False, tsz=(1024, 1024)).batch(2).prefetch(tf.data.AUTOTUNE)

s_ar = scale_ar_factor()

# RetinaNet + EfficientNet Backbone

In [None]:
backbone = keras_cv.models.EfficientNetV2Backbone.from_preset("efficientnetv2_b0_imagenet", include_rescaling=True)
backbone.trainable = False

model = keras_cv.models.RetinaNet(
    num_classes=1,
    backbone=backbone,
    anchor_generator=keras_cv.layers.AnchorGenerator(
        bounding_box_format="xywh",
        sizes=[  # anchor size
            4.0,
            8.0,
            10.0,
            16.0,
            32.0
        ],
        strides=[2 ** i for i in range(3, 8)],
        aspect_ratios=[  # coin aspect ratios (w/h)
            0.75 * s_ar,  # low distance
            1.25 * s_ar,  # medium distance
            1.80 * s_ar  # distant
        ],
        scales=[
            1,
            2 ** (1 / 3),
            2 ** (2 / 3)
        ],
        clip_boxes=True,
    ),
    bounding_box_format="xywh",
)

model.compile(
    optimizer=tf.keras.optimizers.AdamW(
        learning_rate=0.0001,
        weight_decay=0.0001,
        amsgrad=False
    ),
    classification_loss=keras_cv.losses.FocalLoss(
        from_logits=True,
        alpha=0.25,
        gamma=2.0,
    ),
    box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0)
)

history_head = model.fit(train_ds, epochs=5, validation_data=val_ds)

In [None]:
plot_history(history_head)

# YOLOv8

In [None]:
backbone = keras_cv.models.YOLOV8Backbone.from_preset(
    "yolo_v8_s_backbone_coco",  # or "yolo_v8_s_pretrained", "yolo_v8_l_pretrained" etc.
    include_rescaling=True
)
backbone.trainable = False  # Freeze backbone for fine-tuning

model = keras_cv.models.YOLOV8Detector(
    num_classes=1,
    bounding_box_format="xywh",
    backbone=backbone,
    fpn_depth=2  # You can adjust this based on your needs
)

model.compile(
    optimizer=tf.keras.optimizers.AdamW(
        learning_rate=0.0001,
        weight_decay=0.0001,
        amsgrad=False
    ),
    classification_loss=keras_cv.losses.FocalLoss(
        from_logits=True,
        alpha=0.25,
        gamma=2.0,
    ),
    box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0)
)

model.fit(train_ds, validation_data=val_ds, epochs=5)

In [None]:
backbone.trainable = True
for layer in backbone.layers[:-10]:  # Keep first layers frozen
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.AdamW(
        learning_rate=0.0001,
        weight_decay=0.0001,
        amsgrad=False
    ),
    classification_loss=keras_cv.losses.FocalLoss(
        from_logits=True,
        alpha=0.25,
        gamma=2.0,
    ),
    box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0)
)

model.fit(ds, validation_data=val, epochs=5)

In [None]:
it = iter(get_real_samples(
    multi_coin_ds([('./datasets/190/annotations.xml', 'PNG')], d_img=False, d_bb=False, tsz=(1024, 1024))))

In [None]:
import numpy as np

img, boxes, classes = next(it)

y = model.predict(np.expand_dims(img, axis=0))

visualise_bundle(
    (img.numpy() * 255).astype(np.uint8),
    [y['boxes'][0][0]],
    [y['classes'][0][0]]
)
print("confidence of ", np.max(y['confidence']))

visualise_bundle(
    (img.numpy() * 255).astype(np.uint8),
    boxes,
    classes
)

In [None]:
callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.75, verbose=1),
    keras.callbacks.ModelCheckpoint(
        'coin_best_3.h5',
        monitor='val_loss',
        save_best_only=True,
        mode='min',  # 'min' for val_loss, 'max' for val_accuracy
        verbose=1
    )
],

backbone.trainable = True  # Unfreeze for adaptation
model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-5))


In [None]:
it = iter(get_real_samples(
    multi_coin_ds([('./datasets/190/annotations.xml', 'PNG')], d_img=False, d_bb=False, tsz=(1024, 1024))))

In [None]:

img, boxes, classes = next(it)

y = model.predict(np.expand_dims(img, axis=0))

idx = np.argmax(y['confidence'])

visualise_bundle(
    (img.numpy() * 255).astype(np.uint8),
    y['boxes'][0],
    y['classes'][0]
)

print(f"confidence of {np.max(y['confidence'])} (i={idx})")

visualise_bundle(
    (img.numpy() * 255).astype(np.uint8),
    boxes,
    classes
)

In [None]:
idx = np.argmax(y['confidence'])

best_box = y['boxes'][idx:idx + 1]  # shape: (1, 4)
best_score = y['confidence'][idx:idx + 1]  # shape: (1,)
best_class = np.zeros_like(best_score, dtype=np.int32)

# img_uint8 = (img.numpy() * 255).astype(np.uint8)
#
# keras_cv.visualization.plot_bounding_box_gallery(
#     np.expand_dims(img, axis=0),
#     value_range=(0, 255),
#     rows=1,
#     cols=1,
#     y_true={
#         "boxes": np.expand_dims(best_box, axis=0),
#         "classes": np.expand_dims([0], axis=0)
#     },
#     scale=5,
#     bounding_box_format="xywh",
#     class_mapping={int(k): f"cls_{k}" for k in best_class},
# )
#
# plt.show()


In [None]:
y['boxes'][0][idx:idx + 1]