In [20]:
%pip install --upgrade git+https://github.com/keras-team/keras-cv -q --break-system-packages

Note: you may need to restart the kernel to use updated packages.


In [21]:
import os
from tqdm.auto import tqdm
import xml.etree.ElementTree as ET

import tensorflow as tf
from tensorflow import keras

import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization
import cv2
import random

import numpy as np

In [22]:
SPLIT_RATIO = 0.2
BATCH_SIZE = 4
LEARNING_RATE = 0.001
EPOCH = 5
GLOBAL_CLIPNORM = 10.0
IMAGE_WIDTH = 3000
IMAGE_HEIGHT = 4000

In [23]:
class_ids = [
    "neutrofilo",
    "linfocito",
    "monocito",
    "bastonete",
    "metamielocito",
    "eosinofilo",
]
class_mapping = dict(zip(range(len(class_ids)), class_ids))

# Path to images and annotations
path_images = "/home/gabriela/projetos/yolov8keras/Malu"
path_annot = "/home/gabriela/projetos/yolov8keras/Malu/annotations"

path_test_img = "/home/gabriela/projetos/yolov8keras/test"
path_test_annot = "/home/gabriela/projetos/yolov8keras/test/annotations"

# Get all XML file paths in path_annot and sort them
xml_files = sorted(
    [
        os.path.join(path_annot, file_name)
        for file_name in os.listdir(path_annot)
        if file_name.endswith(".xml")
    ]
)

# xml_test_files = sorted(
#     [
#         os.path.join(path_test_annot, file_name)
#         for file_name in os.listdir(path_test_annot)
#         if file_name.endswith(".xml")
#     ]
# )

# Get all JPEG image file paths in path_images and sort them
jpg_files = sorted(
    [
        os.path.join(path_images, file_name)
        for file_name in os.listdir(path_images)
        if file_name.endswith(".jpg")
    ]
)

# jpg_test_files = sorted(
#     [
#         os.path.join(path_test_img, file_name)
#         for file_name in os.listdir(path_test_img)
#         if file_name.endswith(".jpg")
#     ]
# )

In [24]:
class_mapping[0]

'neutrofilo'

In [None]:
#read the objects in the annotation xml file
def parse_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    image_name = root.find("filename").text
    image_path = os.path.join(path_images, image_name)

    boxes = []
    classes = []
    for obj in root.find("objects"):
        cls = obj.tag
        print(cls)
        classes.append(cls)

        if(obj.find("bbox") != None):
            bbox = obj.find("bbox")
            min_x, min_y, width, height = normalize_bounding_box(float(bbox.find("x").text), float(bbox.find("y").text), float(bbox.find("width").text), float(bbox.find("height").text))
            boxes.append([min_x, min_y, width, height])
        else:
            xCoords = []
            yCoords = []
            for coord in obj:
                if (coord.tag.find("x") != -1):
                    xCoords.append(float(coord.text))
                elif (coord.tag.find("y") != -1):
                    yCoords.append(float(coord.text))

            xmin, ymin, width, height = calculate_bounding_box_normalized(xCoords, yCoords)
            boxes.append([xmin, ymin, width, height])

    class_ids = [
        list(class_mapping.keys())[list(class_mapping.values()).index(cls)]
        for cls in classes
    ]
    return image_path, boxes, class_ids

#calculate bounding boxes with points provided
def calculate_bounding_box_normalized(xCoords, yCoords):
    points = np.array([xCoords, yCoords])

    min_x = float(np.min(points[0, :]))
    min_x = (min_x/IMAGE_WIDTH)*640
    min_y = float(np.min(points[1, :]))
    # min_y = ((IMAGE_HEIGHT - min_y)/IMAGE_HEIGHT)*640
    min_y = (min_y/IMAGE_HEIGHT)*640

    max_x = float(np.max(points[0, :]))
    max_x = (max_x/IMAGE_WIDTH)*640
    max_y = float(np.max(points[1, :]))
    # max_y = ((IMAGE_HEIGHT - max_y)/IMAGE_HEIGHT)*640
    max_y = (max_y/IMAGE_HEIGHT)*640

    width =  max_x - min_x
    height = max_y - min_y

    return (min_x, min_y, width, height)

def normalize_bounding_box(xmin, ymin, width, height):

    min_x = (xmin/IMAGE_WIDTH)*640
    # min_y = ((IMAGE_HEIGHT - ymin)/IMAGE_HEIGHT)*640
    min_y = (ymin/IMAGE_HEIGHT)*640
    width = (width/IMAGE_WIDTH)*640
    height = (height/IMAGE_HEIGHT)*640

    return (min_x, min_y, width, height)

In [26]:
image_paths = []
bbox = []
classes = []

for xml_file in tqdm(xml_files):
    image_path, boxes, class_ids = parse_annotation(xml_file)
    image_paths.append(image_path)
    bbox.append(boxes)
    classes.append(class_ids)

100%|██████████| 128/128 [00:00<00:00, 5874.95it/s]


In [27]:
def load_image(path):
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (640, 640), interpolation=cv2.INTER_CUBIC)
    return image

def load_all_images():
    images = []
    for path in image_paths:
        image = load_image(path)
        images.append(image)
        # print(image.shape)
    return images


images = load_all_images()

In [28]:
images[2].shape

(640, 640, 3)

In [29]:
bbox[2][0]

[342.23376645794787,
 298.69493955093793,
 29.797644815241267,
 23.602708263216044]

In [30]:
classes[2]

[0]

In [40]:
def drawBoundingBoxes(imageData, imageOutputPath, bboxes, labels, color):
    """Draw bounding boxes on an image.
    imageData: image data in numpy array format
    imageOutputPath: output image file path
    inferenceResults: inference results array off object (l,t,w,h)
    colorMap: Bounding box color candidates, list of RGB tuples.
    """
    for box in bboxes:
        for class_id in labels:
            left = int(box[0])
            bottom = int(box[1])
            right = int(box[0]) + int(box[2])
            top = int(box[1]) + int(box[3])
            label = class_mapping[int(class_id)]
            imgHeight, imgWidth, _ = imageData.shape
            thick = int((imgHeight + imgWidth) // 900)
            print (left, bottom, imgHeight, imgWidth)
            print (label)
            cv2.rectangle(imageData,(left, top), (right, bottom), color, thick)
            cv2.putText(imageData, label, (left, top - 12), 0, 1e-3 * imgHeight, color, thick//3)
    cv2.imwrite(imageOutputPath, imageData)

In [41]:
print(len(images))
index = 100
drawBoundingBoxes(images[index], '/home/gabriela/projetos/yolov8keras/output/example2.jpg', bbox[index], classes[index], (255, 0, 0))

128
506 217 640 640
neutrofilo
506 217 640 640
eosinofilo
506 217 640 640
linfocito
506 217 640 640
neutrofilo
281 275 640 640
neutrofilo
281 275 640 640
eosinofilo
281 275 640 640
linfocito
281 275 640 640
neutrofilo
197 398 640 640
neutrofilo
197 398 640 640
eosinofilo
197 398 640 640
linfocito
197 398 640 640
neutrofilo
207 241 640 640
neutrofilo
207 241 640 640
eosinofilo
207 241 640 640
linfocito
207 241 640 640
neutrofilo


In [14]:
images = tf.convert_to_tensor(images, dtype=tf.float32)
# print(images.shape)

I0000 00:00:1732571901.230574   13491 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1768 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [15]:
ragged_bboxes = tf.ragged.constant(bbox, dtype=tf.float32)
ragged_classes = tf.ragged.constant(classes, dtype=tf.int64)

labels = {
    "boxes": ragged_bboxes.to_tensor(),
    "classes": ragged_classes.to_tensor(),
}

# print(labels["boxes"].type())

In [16]:
# model = keras_cv.models.YOLOV8Detector(
#     num_classes=len(class_mapping),
#     bounding_box_format="xywh",
#     backbone=keras_cv.models.YOLOV8Backbone.from_preset(
#         "yolo_v8_s_backbone_coco"
#     ),
#     fpn_depth=2
# )

backbone=keras_cv.models.YOLOV8Backbone.from_preset("yolo_v8_xs_backbone_coco")

model = keras_cv.models.YOLOV8Detector(
    num_classes=len(class_mapping),
    bounding_box_format="xywh",
    backbone=backbone,
    fpn_depth=2,
)

In [17]:
optimizer = tf.keras.optimizers.Adam(
    learning_rate=LEARNING_RATE,
    global_clipnorm=GLOBAL_CLIPNORM,
)

model.compile(
    optimizer=optimizer, classification_loss="binary_crossentropy", box_loss="ciou"
)

# model.compile(
#     classification_loss='binary_crossentropy',
#     box_loss='ciou',
#     optimizer=tf.optimizers.SGD(global_clipnorm=10.0),
#     jit_compile=False,
# )

In [18]:
model.fit(images, labels, batch_size=4, epochs=50)

Epoch 1/50


I0000 00:00:1732571985.631480   13672 service.cc:148] XLA service 0x7f4478017530 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1732571985.631832   13672 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2024-11-25 18:59:44.117772: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1732571987.171574   13672 cuda_dnn.cc:529] Loaded cuDNN version 90300
2024-11-25 19:00:12.087275: W external/local_xla/xla/service/gpu/nvptx_compiler.cc:930] The NVIDIA driver's CUDA version is 12.4 which is older than the PTX compiler version 12.5.82. Because the driver is older than the PTX compiler version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.
I0000 0

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 73ms/step - box_loss: 6.5561 - class_loss: 8469.9268 - loss: 8476.4834
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 73ms/step - box_loss: 3.8346 - class_loss: 866.2945 - loss: 870.1291
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 73ms/step - box_loss: 2.2650 - class_loss: 225.7611 - loss: 228.0261
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 73ms/step - box_loss: 1.9970 - class_loss: 79.2277 - loss: 81.2247
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 73ms/step - box_loss: 1.9189 - class_loss: 28.0400 - loss: 29.9589
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 72ms/step - box_loss: 1.8030 - class_loss: 11.4966 - loss: 13.2996
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 72ms/step - box_loss: 1.7582 - class_loss: 4.9486 - loss: 6.7068
Epoch 8/50

<keras.src.callbacks.history.History at 0x7f4568d908c0>