## 1) Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/ComparisonDetector')
print('CWD:', os.getcwd())
print('List:', os.listdir('.')[:10])

## 2) Install deps (TF 2.15 + KerasCV)

In [None]:
!pip install -q tensorflow==2.15.0 keras==2.15.0 keras-cv==0.6.4 tensorflow-io-gcs-filesystem==0.31.0
!pip install -q numpy==1.26.4 opencv-python pillow matplotlib

## 3) Imports & Label map

In [None]:
import tensorflow as tf
import keras_cv
import numpy as np
import os
from libs.label_dict import get_label_name_map

label_map = get_label_name_map()  # id -> name
num_classes = len(label_map)
print('Classes:', label_map)

## 4) TFRecord parser (boxes+labels)

In [None]:
tfrecord_path = 'tfdata/tct/train.tfrecord'

def parse_example(example_proto):
    features = {
        'img': tf.io.FixedLenFeature([], tf.string),
        'img_height': tf.io.FixedLenFeature([], tf.int64),
        'img_width': tf.io.FixedLenFeature([], tf.int64),
        'gtboxes_and_label': tf.io.FixedLenFeature([], tf.string),
        'img_name': tf.io.FixedLenFeature([], tf.string),
    }
    parsed = tf.io.parse_single_example(example_proto, features)
    h = tf.cast(parsed['img_height'], tf.int32)
    w = tf.cast(parsed['img_width'], tf.int32)
    img = tf.io.decode_raw(parsed['img'], tf.uint8)
    img = tf.reshape(img, [h, w, 3])
    # boxes: [x1,y1,x2,y2,label]
    gl = tf.io.decode_raw(parsed['gtboxes_and_label'], tf.int32)
    gl = tf.reshape(gl, [-1, 5])
    boxes_xyxy = tf.cast(gl[:, :4], tf.float32)
    labels = tf.cast(gl[:, 4], tf.int32)
    # convert to yxyx normalized for KerasCV
    y1 = boxes_xyxy[:, 1] / tf.cast(h, tf.float32)
    x1 = boxes_xyxy[:, 0] / tf.cast(w, tf.float32)
    y2 = boxes_xyxy[:, 3] / tf.cast(h, tf.float32)
    x2 = boxes_xyxy[:, 2] / tf.cast(w, tf.float32)
    boxes = tf.stack([y1, x1, y2, x2], axis=-1)
    return img, boxes, labels

def preprocess(img, boxes, labels, image_size=(640,640)):
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, image_size)
    return img, { 'boxes': boxes, 'classes': tf.cast(labels, tf.float32) }

def load_dataset(batch_size=2, shuffle=512, image_size=(640,640)):
    ds = tf.data.TFRecordDataset(tfrecord_path, num_parallel_reads=tf.data.AUTOTUNE)
    ds = ds.map(parse_example, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.shuffle(shuffle)
    ds = ds.map(lambda i,b,l: preprocess(i,b,l, image_size), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.padded_batch(batch_size,
                        padding_values=(0.0, 0.0, -1.0),
                        drop_remainder=True)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = load_dataset(batch_size=2, image_size=(640,640))
print('Dataset ready')

## 5) Build RetinaNet model (KerasCV)

In [None]:
model = keras_cv.models.RetinaNet(
    classes=num_classes,
    bounding_box_format='yxyx',
    backbone='resnet50',
)
learning_rate = 1e-4
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
model.compile(optimizer=optimizer, classification_loss='focal', box_loss='smoothl1')
model.summary()

## 6) Train

In [None]:
epochs = 10
steps_per_epoch = 1000  # chỉnh nếu muốn
callbacks = [
    tf.keras.callbacks.ModelCheckpoint('detector_retinanet_best.h5', save_best_only=True, monitor='loss', mode='min'),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=2, min_lr=1e-6, verbose=1),
]
history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch, callbacks=callbacks)
model.save('detector_retinanet_final.h5')
print('Saved detector_retinanet_final.h5')

## 7) Inference on one image (draw boxes)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

infer_model = tf.keras.models.load_model('detector_retinanet_best.h5', compile=False)

# lấy một sample từ TFRecord
raw_sample = next(iter(tf.data.TFRecordDataset(tfrecord_path).map(parse_example).take(1)))
img_name = raw_sample['img_name'].numpy().decode('utf-8')
h = int(raw_sample['img_height'].numpy())
w = int(raw_sample['img_width'].numpy())
img = tf.io.decode_raw(raw_sample['img'], tf.uint8)
img = tf.reshape(img, [h, w, 3])
img_resized = tf.image.resize(tf.cast(img, tf.float32)/255.0, (640,640))
pred = infer_model.predict(img_resized[None], verbose=0)
boxes = pred['boxes'][0]   # y1,x1,y2,x2 normalized
scores = pred['confidence'][0]
classes = tf.cast(pred['classes'][0], tf.int32)

# filter theo score
mask = scores > 0.3
boxes = tf.boolean_mask(boxes, mask)
scores = tf.boolean_mask(scores, mask)
classes = tf.boolean_mask(classes, mask)

# vẽ
img_draw = Image.fromarray(img.numpy())
draw = ImageDraw.Draw(img_draw)
for box, sc, cls in zip(boxes.numpy(), scores.numpy(), classes.numpy()):
    y1,x1,y2,x2 = box
    y1*=h; y2*=h; x1*=w; x2*=w
    draw.rectangle([x1,y1,x2,y2], outline='red', width=2)
    name = label_map.get(int(cls), str(int(cls)))
    draw.text((x1+3, y1+3), f

plt.figure(figsize=(10,10))
plt.imshow(img_draw)
plt.axis('off')
plt.show()
print('Image:', img_name, 'detections:', len(boxes))