In [145]:
import argparse
import os
import loguru
import keras_cv
import tensorflow as tf
from tensorflow import keras
from keras_cv import bounding_box
from keras_cv import visualization
import yaml

In [146]:

import os
from tqdm.auto import tqdm
import xml.etree.ElementTree as ET

In [147]:

path_images = "../datasets/images/"
path_annot = "../datasets/Annotations/"

# Get all XML file paths in path_annot and sort them
xml_files = sorted(
    [
        os.path.join(path_annot, file_name)
        for file_name in os.listdir(path_annot)
        if file_name.endswith(".xml")
    ]
)

# Get all JPEG image file paths in path_images and sort them
jpg_files = sorted(
    [
        os.path.join(path_images, file_name)
        for file_name in os.listdir(path_images)
        if file_name.endswith(".jpg")
    ]
)

In [148]:
class_ids = [
    "jempol",
    "five",
    "three",
    "v sign"
]
class_mapping = dict(zip(range(len(class_ids)), class_ids))

In [149]:
def parse_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    image_name = root.find("filename").text
    image_path = os.path.join(path_images, image_name)

    boxes = []
    classes = []
    for obj in root.iter("object"):
        cls = obj.find("name").text
        classes.append(cls)

        bbox = obj.find("bndbox")
        xmin = float(bbox.find("xmin").text)
        ymin = float(bbox.find("ymin").text)
        xmax = float(bbox.find("xmax").text)
        ymax = float(bbox.find("ymax").text)
        boxes.append([xmin, ymin, xmax, ymax])

    class_ids = [
        list(class_mapping.keys())[list(class_mapping.values()).index(cls)]
        for cls in classes
    ]
    return image_path, boxes, class_ids


image_paths = []
bbox = []
classes = []
for xml_file in tqdm(xml_files):
    image_path, boxes, class_ids = parse_annotation(xml_file)
    image_paths.append(image_path)
    bbox.append(boxes)
    classes.append(class_ids)


100%|██████████| 8/8 [00:00<00:00, 5446.26it/s]


In [150]:
bbox = tf.ragged.constant(bbox)
classes = tf.ragged.constant(classes)
image_paths = tf.ragged.constant(image_paths)
data = tf.data.Dataset.from_tensor_slices((image_paths, classes, bbox))



In [151]:
def load_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    return image


def load_dataset(image_path, classes, bbox):
    # Read Image
    image = load_image(image_path)

    bounding_boxes = {
        "classes": tf.cast(classes, dtype=tf.float32),
        "boxes": bbox,
    }


    return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}

resizing = keras_cv.layers.JitteredResize(
    target_size=(640, 640),
    scale_factor=(0.75, 1.3),
    bounding_box_format="xyxy",
)

def dict_to_tuple(inputs):
        return inputs["images"], bounding_box.to_dense(
        inputs["bounding_boxes"], max_boxes=32
    )

In [152]:
BATCH_SIZE = 3

In [153]:
data = data.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
data = data.ragged_batch(BATCH_SIZE, drop_remainder=True)
data = data.map(resizing, num_parallel_calls=tf.data.AUTOTUNE)
data = data.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)






In [180]:
for images, labels in data.take(1):
    print(images.shape)

(3, 640, 640, 3)


AttributeError: 'dict' object has no attribute 'shape'

In [158]:
class YOLOv8Trainer:
    def __init__(self, config_file):
        self.config_file = config_file
        self.config = self.arg_parse()

    def arg_parse(self):
        with open(self.config_file, 'r') as f:
            config = yaml.safe_load(f)
        config = StaticDotDict(config)
        return config

    def configure_model(self):
        self.backbone = keras_cv.models.YOLOV8Backbone.from_preset(self.config.model.backbone)
        yolo_v8_model = keras_cv.models.YOLOV8Detector(
            backbone=self.backbone,
            num_classes=len(self.config.model.classes),
            bounding_box_format=self.config.model.bounding_box_format,
            fpn_depth=self.config.model.fpn_depth,
        )
        self.model = yolo_v8_model
        
    def configure_optimizer(self):
        raise NotImplementedError

    def configure_callback(self):
        raise NotImplementedError

    def configure_trainer(self):
        raise NotImplementedError

    def visualize_dataset(self):
        raise NotImplementedError

class StaticDotDict(dict):
    """
    a dictionary that supports dot notation 
    as well as dictionary access notation 
    usage: d = DotDict() or d = DotDict({'val1':'first'})
    set attributes: d.val2 = 'second' or d['val2'] = 'second'
    get attributes: d.val2 or d['val2']
    static mean that the dict is not dynamic, i.e. you can't add new attributes to it
    """
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

    def __init__(self, dct):
        for key, value in dct.items():
            if hasattr(value, 'keys'):
                value = StaticDotDict(value)
            self[key] = value

trainer = YOLOv8Trainer('../config.yaml')

In [169]:
trainer.configure_model()

In [181]:
trainer.model.compile(
    optimizer=keras.optimizers.Adam(),
    box_loss='ciou',
    classification_loss = 'binary_crossentropy'
    
)
trainer.model.fit(data, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7f252822f2e0>

In [174]:
images, y_true = next(iter(data.take(1)))

In [179]:
images

<tf.Tensor: shape=(3, 640, 640, 3), dtype=float32, numpy=
array([[[[ 59.323235,  74.31762 ,  64.652824],
         [ 57.9319  ,  74.19014 ,  63.9319  ],
         [ 57.678032,  73.79466 ,  62.984734],
         ...,
         [201.67738 , 220.10696 , 217.91681 ],
         [202.4734  , 219.04521 , 217.47432 ],
         [203.10608 , 215.35953 , 216.31604 ]],

        [[ 61.19662 ,  73.99538 ,  66.8024  ],
         [ 60.73624 ,  74.26442 ,  65.6172  ],
         [ 61.043056,  74.571236,  65.27312 ],
         ...,
         [201.04395 , 219.42569 , 218.6622  ],
         [201.26924 , 219.04144 , 218.82394 ],
         [202.63571 , 215.10535 , 216.62428 ]],

        [[ 63.893253,  73.43256 ,  68.72473 ],
         [ 64.67302 ,  74.89001 ,  67.63882 ],
         [ 64.46439 ,  75.53133 ,  67.36935 ],
         ...,
         [200.7908  , 218.83124 , 220.00801 ],
         [202.28159 , 218.3592  , 220.67606 ],
         [202.92839 , 214.92009 , 218.33926 ]],

        ...,

        [[107.49262 , 109.34626 , 

In [176]:
y_true

{'classes': <tf.Tensor: shape=(3, 32), dtype=float32, numpy=
 array([[-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1.],
        [ 3., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1.],
        [-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1.]], dtype=float32)>,
 'boxes': <tf.Tensor: shape=(3, 32, 4), dtype=float32, numpy=
 array([[[ -1.   ,  -1.   ,  -1.   ,  -1.   ],
         [ -1.   ,  -1.   ,  -1.   ,  -1.   ],
         [ -1.   ,  -1.   ,  -1.   ,  -1.   ],
         [ -1.   ,  -1.   ,  -1.   ,  -1.   ],
         [ -1.   ,  -1.   ,  -1.   ,  -1.   ],
         [ -1.   ,  -1.   ,  -1.   ,  -1.   ],
         [ -1

In [178]:
trainer.model.predict(images)



{'boxes': array([[[-2.79469299e+01,  2.99462891e+01,  4.76677979e+02,
           4.86065674e+02],
         [-1.24302856e+02,  6.43034973e+01,  4.77792725e+02,
           4.82339447e+02],
         [ 3.51041870e+01,  6.27543793e+01,  4.79013367e+02,
           4.84588135e+02],
         ...,
         [ 1.19938034e+02,  5.68054016e+02,  1.19931931e+02,
           1.19717712e+02],
         [-4.81815033e+01,  1.11783005e+02,  2.39909088e+02,
           2.40358841e+02],
         [ 1.36240387e+02,  1.92228241e+02,  1.19372177e+02,
           1.19735535e+02]],
 
        [[ 1.31821304e+02,  9.56275330e+01,  4.76476562e+02,
           4.84434784e+02],
         [ 1.94170853e+02,  1.60662155e+02,  4.79326050e+02,
           4.82406189e+02],
         [ 3.22817017e+02,  1.29080200e+00,  4.77708008e+02,
           4.82777649e+02],
         ...,
         [ 2.87891602e+02,  1.91776611e+02,  2.39892151e+02,
           2.40411285e+02],
         [ 3.83819519e+02,  3.51624390e+02,  2.40048645e+02,
         