In [1]:
INSTALL_PACKAGES = False

# we're going to use tensorflow 2.14.0 and keras 2.14.0 because that's probably what the tutorial used
# https://developer.apple.com/metal/tensorflow-plugin/
# KerasCV installation: https://keras.io/keras_cv/#keras-2-installation
if INSTALL_PACKAGES:
    !pip install tensorflow==2.14.0 tensorflow-metal keras-cv opencv-python pycocotools matplotlib

In [2]:
# setup
import os
from tqdm.auto import tqdm
import xml.etree.ElementTree as ET

import tensorflow as tf
from tensorflow import keras

import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization

Using TensorFlow backend


# Load Data

In [3]:
# hyperparameters
# SCALE_MAX = 1.3
# SCALE_MIN = 0.75
SCALE_MAX = 1.0
SCALE_MIN = 1.0
SPLIT_RATIO = 0.2
BATCH_SIZE = 4
LEARNING_RATE = 0.001
EPOCHS = 50
GLOBAL_CLIPNORM = 10.0
USE_RAGGED_TENSORS = True

In [4]:
# map class names to unique integer identifier
class_ids = [
    "Bud",
    "Stem",
    "Leaf",
    "Leaf-Attachment",
]

class_mapping = dict(zip(range(len(class_ids)), class_ids))
class_mapping = {0: 'Bud', 1: 'Stem'}
class_mapping_values = class_mapping.values()

print(class_mapping)
print(class_mapping_values)

# Path to images and annotations
path_annot = "data/annotations/"
path_images = "data/images/"

# Get all XML file paths in path_annot and sort them
xml_files = sorted(
    [
        os.path.join(path_annot, file_name)
        for file_name in os.listdir(path_annot)
        if file_name.endswith(".xml")
    ]
)

print(xml_files)

# Get all JPEG image file paths in path_images and sort them
jpg_files = sorted(
    [
        os.path.join(path_images, file_name)
        for file_name in os.listdir(path_images)
        if file_name.endswith(".jpeg")
    ]
)

print(jpg_files)

{0: 'Bud', 1: 'Stem'}
dict_values(['Bud', 'Stem'])
['data/annotations/02d71183-IMG_6937.xml', 'data/annotations/081b0c8e-IMG_6939.xml', 'data/annotations/093b9a0e-IMG_6790.xml', 'data/annotations/09997155-IMG_6933.xml', 'data/annotations/12a96634-IMG_6762.xml', 'data/annotations/23be9e87-IMG_6929.xml', 'data/annotations/26598a44-IMG_6921.xml', 'data/annotations/2e492c86-IMG_6440.xml', 'data/annotations/44523aa4-IMG_6931.xml', 'data/annotations/4b03862e-IMG_6923.xml', 'data/annotations/63c48c4f-IMG_6936.xml', 'data/annotations/6cf840cd-IMG_6927.xml', 'data/annotations/775d6964-IMG_6918.xml', 'data/annotations/7c5321ca-IMG_6934.xml', 'data/annotations/88969803-IMG_6920.xml', 'data/annotations/8ae4834c-IMG_6919.xml', 'data/annotations/8c8007be-IMG_6922.xml', 'data/annotations/9758a822-IMG_6924.xml', 'data/annotations/995c3bd7-IMG_6917.xml', 'data/annotations/9979093c-IMG_6935.xml', 'data/annotations/a11d5427-IMG_6930.xml', 'data/annotations/a2409362-IMG_6800.xml', 'data/annotations/b1ca73

In [5]:
# export PascalVOC images + annotations from Label Studio

class_counts = dict.fromkeys(class_ids, 0)
file_counts = dict.fromkeys(class_ids, 0)

def parse_annotation(xml_file):
    local_class_counts = dict.fromkeys(class_ids, 0)
#    print(local_class_counts)
  
    classes_seen = set()
    
    tree = ET.parse(xml_file)
    root = tree.getroot()

    image_name = root.find("filename").text
    image_path = os.path.join(path_images, image_name)

    if not os.path.exists(image_path):
        print(f'image at {image_path} not found')
    
    boxes = []
    classes = []

    has_marked_class_in_file = False
    
    for obj in root.iter("object"):
        cls = obj.find("name").text
        classes_seen.add(cls)

#        print(f'here, cls is "{cls}", lcc is {local_class_counts}')
        
        local_class_counts[cls] += 1
        class_counts[cls] += 1
        
        if not cls in class_mapping_values:
            continue
        
        classes.append(cls)

        bbox = obj.find("bndbox")
        xmin = float(bbox.find("xmin").text)
        ymin = float(bbox.find("ymin").text)
        xmax = float(bbox.find("xmax").text)
        ymax = float(bbox.find("ymax").text)
        boxes.append([xmin, ymin, xmax, ymax])

    local_class_ids = [
        list(class_mapping.keys())[list(class_mapping_values).index(cls)]
        for cls in classes
    ]

    for cls in classes_seen:
        file_counts[cls] += 1        
    
    print(f'{image_name}, {local_class_counts}')
    
    return image_path, boxes, local_class_ids


image_paths = []
bbox = []
classes = []
for xml_file in tqdm(xml_files):
    image_path, boxes, local_class_ids = parse_annotation(xml_file)
    if len(set(local_class_ids)) == len(class_mapping_values):
        image_paths.append(image_path)
        bbox.append(boxes)
        classes.append(class_ids)

print(f'{len(image_paths)} image paths: {image_paths}')
print(f'{len(classes)} classes: {classes}')
print(f'{len(bbox)} bbox: {bbox}')
print(f'class counts: {class_counts}')
print(f'file counts: {file_counts}')


  0%|          | 0/28 [00:00<?, ?it/s]

02d71183-IMG_6937.jpeg, {'Bud': 6, 'Stem': 2, 'Leaf': 0, 'Leaf-Attachment': 0}
081b0c8e-IMG_6939.jpeg, {'Bud': 5, 'Stem': 2, 'Leaf': 0, 'Leaf-Attachment': 0}
093b9a0e-IMG_6790.jpeg, {'Bud': 1, 'Stem': 0, 'Leaf': 0, 'Leaf-Attachment': 0}
09997155-IMG_6933.jpeg, {'Bud': 2, 'Stem': 2, 'Leaf': 0, 'Leaf-Attachment': 0}
12a96634-IMG_6762.jpeg, {'Bud': 4, 'Stem': 8, 'Leaf': 8, 'Leaf-Attachment': 1}
23be9e87-IMG_6929.jpeg, {'Bud': 7, 'Stem': 2, 'Leaf': 5, 'Leaf-Attachment': 0}
26598a44-IMG_6921.jpeg, {'Bud': 8, 'Stem': 15, 'Leaf': 8, 'Leaf-Attachment': 3}
2e492c86-IMG_6440.jpeg, {'Bud': 0, 'Stem': 8, 'Leaf': 0, 'Leaf-Attachment': 0}
44523aa4-IMG_6931.jpeg, {'Bud': 2, 'Stem': 2, 'Leaf': 0, 'Leaf-Attachment': 0}
4b03862e-IMG_6923.jpeg, {'Bud': 3, 'Stem': 6, 'Leaf': 10, 'Leaf-Attachment': 4}
63c48c4f-IMG_6936.jpeg, {'Bud': 3, 'Stem': 1, 'Leaf': 0, 'Leaf-Attachment': 0}
6cf840cd-IMG_6927.jpeg, {'Bud': 2, 'Stem': 2, 'Leaf': 1, 'Leaf-Attachment': 0}
775d6964-IMG_6918.jpeg, {'Bud': 4, 'Stem': 1, 'Lea

In [6]:
if USE_RAGGED_TENSORS:
    bbox = tf.ragged.constant(bbox)
    classes = tf.ragged.constant(classes)
    image_paths = tf.ragged.constant(image_paths)
else:
    bbox = tf.constant(bbox)
    classes = tf.constant(classes)
    image_paths = tf.constant(image_paths)

data = tf.data.Dataset.from_tensor_slices((image_paths, classes, bbox))

print(data)

<_TensorSliceDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), RaggedTensorSpec(TensorShape([None]), tf.string, 0, tf.int64), RaggedTensorSpec(TensorShape([None, None]), tf.float32, 1, tf.int64))>


2024-05-19 19:41:06.635227: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2024-05-19 19:41:06.635252: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2024-05-19 19:41:06.635301: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2024-05-19 19:41:06.635366: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-05-19 19:41:06.635390: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [7]:
# Determine the number of validation samples
num_val = int(len(image_paths) * SPLIT_RATIO)

# Split the dataset into train and validation sets
val_data = data.take(num_val)
train_data = data.skip(num_val)

print(f'num_val: {num_val}')
print(f'val_data: {val_data}, cardinality: {val_data.cardinality()}')
print(f'train_data: {train_data}, cardinality: {train_data.cardinality()}')

num_val: 5
val_data: <_TakeDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), RaggedTensorSpec(TensorShape([None]), tf.string, 0, tf.int64), RaggedTensorSpec(TensorShape([None, None]), tf.float32, 1, tf.int64))>, cardinality: 5
train_data: <_SkipDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), RaggedTensorSpec(TensorShape([None]), tf.string, 0, tf.int64), RaggedTensorSpec(TensorShape([None, None]), tf.float32, 1, tf.int64))>, cardinality: 20


In [8]:
def load_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    return image

def load_dataset(image_path, classes, bbox):
    print(f'load_dataset({image_path}, {classes}, {bbox})')
    
    # Read Image
    image = load_image(image_path)

    bounding_boxes = {
        "classes": tf.cast(classes, dtype=tf.float32),
        "boxes": bbox,
    }
    return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}

In [9]:
# Resizes images to 640x640 while maintaining aspect ratio.
# The bounding boxes associated with the image are specified in the xyxy format.
# If necessary, the resized image will be padded with zeros to maintain the original aspect ratio.

augmenter = keras.Sequential(
    layers=[
#        keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xyxy"),
#        keras_cv.layers.RandomShear(
#            x_factor=0.2, y_factor=0.2, bounding_box_format="xyxy"
#        ),
        keras_cv.layers.JitteredResize(
            target_size=(640, 640),
            scale_factor=(SCALE_MIN, SCALE_MAX),
            bounding_box_format="xyxy"
        ),
    ]
)

# Creating Training Dataset

In [10]:
# BATCH_SIZE was 4, and we may want to use this for larger datasets, but for now shuffle the full set
# using the dataset's cardinality

train_ds = train_data.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.shuffle(BATCH_SIZE * 4)
if USE_RAGGED_TENSORS:
    train_ds = train_ds.ragged_batch(BATCH_SIZE, drop_remainder=True)
train_ds = train_ds.map(augmenter, num_parallel_calls=tf.data.AUTOTUNE)

load_dataset(Tensor("args_0:0", shape=(), dtype=string), Tensor("RaggedFromVariant/RaggedTensorFromVariant:0", shape=(None,), dtype=string), tf.RaggedTensor(values=Tensor("RaggedFromVariant_1/RaggedTensorFromVariant:1", shape=(None,), dtype=float32), row_splits=Tensor("RaggedFromVariant_1/RaggedTensorFromVariant:0", shape=(None,), dtype=int64)))


# Creating Validation Dataset

In [11]:
resizing = keras_cv.layers.JitteredResize(
    target_size=(640, 640),
    scale_factor=(SCALE_MIN, SCALE_MAX),
    bounding_box_format="xyxy",
)

val_ds = val_data.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.shuffle(val_ds.cardinality())
if USE_RAGGED_TENSORS:
 val_ds = val_ds.ragged_batch(BATCH_SIZE, drop_remainder=True)
val_ds = val_ds.map(resizing, num_parallel_calls=tf.data.AUTOTUNE)

load_dataset(Tensor("args_0:0", shape=(), dtype=string), Tensor("RaggedFromVariant/RaggedTensorFromVariant:0", shape=(None,), dtype=string), tf.RaggedTensor(values=Tensor("RaggedFromVariant_1/RaggedTensorFromVariant:1", shape=(None,), dtype=float32), row_splits=Tensor("RaggedFromVariant_1/RaggedTensorFromVariant:0", shape=(None,), dtype=int64)))


# Visualization

In [12]:
print(f'train_ds: {train_ds}')
print(f'val_ds: {val_ds}')

def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
    print(inputs)
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]

    print(images)
    print(bounding_boxes)
    
    visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping=class_mapping,
    )

visualize_dataset(
    train_ds, bounding_box_format="xyxy", value_range=(0, 255), rows=2, cols=2
)
visualize_dataset(
    val_ds, bounding_box_format="xyxy", value_range=(0, 255), rows=2, cols=2
)

train_ds: <_ParallelMapDataset element_spec={'images': TensorSpec(shape=(4, 640, 640, 3), dtype=tf.float32, name=None), 'bounding_boxes': {'classes': RaggedTensorSpec(TensorShape([4, None]), tf.float32, 1, tf.int64), 'boxes': RaggedTensorSpec(TensorShape([4, None, None]), tf.float32, 1, tf.int64)}}>
val_ds: <_ParallelMapDataset element_spec={'images': TensorSpec(shape=(4, 640, 640, 3), dtype=tf.float32, name=None), 'bounding_boxes': {'classes': RaggedTensorSpec(TensorShape([4, None]), tf.float32, 1, tf.int64), 'boxes': RaggedTensorSpec(TensorShape([4, None, None]), tf.float32, 1, tf.int64)}}>
<_ParallelMapDataset element_spec={'images': TensorSpec(shape=(4, 640, 640, 3), dtype=tf.float32, name=None), 'bounding_boxes': {'classes': RaggedTensorSpec(TensorShape([4, None]), tf.float32, 1, tf.int64), 'boxes': RaggedTensorSpec(TensorShape([4, None, None]), tf.float32, 1, tf.int64)}}>


2024-05-19 19:41:09.203612: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
2024-05-19 19:41:09.284228: W tensorflow/core/framework/op_kernel.cc:1816] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to float is not supported
2024-05-19 19:41:09.286453: W tensorflow/core/framework/op_kernel.cc:1816] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to float is not supported
2024-05-19 19:41:09.288583: W tensorflow/core/framework/op_kernel.cc:1816] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to float is not supported
2024-05-19 19:41:09.290735: W tensorflow/core/framework/op_kernel.cc:1816] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to float is not supported
2024-05-19 19:41:09.292981: W tensorflow/core/framework/op_kernel.cc:1816] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to float is not supported
2024-05-19

UnimplementedError: {{function_node __wrapped__IteratorGetNext_output_types_3_device_/job:localhost/replica:0/task:0/device:CPU:0}} Cast string to float is not supported
	 [[{{node Cast}}]] [Op:IteratorGetNext] name: 

In [None]:
def dict_to_tuple(inputs):
    return inputs["images"], inputs["bounding_boxes"]

train_ds = train_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

val_ds = val_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.prefetch(tf.data.AUTOTUNE)

# Create The Model

In [None]:
backbone = keras_cv.models.YOLOV8Backbone.from_preset(
    "yolo_v8_s_backbone_coco"  # We will use yolov8 small backbone with coco weights
)

In [None]:
print(f'len(class_mapping) is {len(class_mapping)}')

yolo = keras_cv.models.YOLOV8Detector(
    num_classes=len(class_mapping),
    bounding_box_format="xyxy",
    backbone=backbone,
    fpn_depth=1,
)

# Compile the Model

In [None]:
# using optimizers.legacy.Adam instead of optimizers.Adam because of this message:
#
# WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs,
# please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`.

optimizer = tf.keras.optimizers.legacy.Adam(
    learning_rate=LEARNING_RATE,
    global_clipnorm=GLOBAL_CLIPNORM,
)

yolo.compile(
    optimizer=optimizer, classification_loss="binary_crossentropy", box_loss="ciou"
)

In [None]:
class EvaluateCOCOMetricsCallback(keras.callbacks.Callback):
    def __init__(self, data, save_path):
        super().__init__()
        self.data = data
        self.metrics = keras_cv.metrics.BoxCOCOMetrics(
            bounding_box_format="xyxy",
            evaluate_freq=1e9,
        )

        self.save_path = save_path
        self.best_map = -1.0

    def on_epoch_end(self, epoch, logs):
        self.metrics.reset_state()
        for batch in self.data:
            images, y_true = batch[0], batch[1]
            y_pred = self.model.predict(images, verbose=0)
            self.metrics.update_state(y_true, y_pred)

        metrics = self.metrics.result(force=True)
        logs.update(metrics)

        current_map = metrics["MaP"]
        if current_map > self.best_map:
            self.best_map = current_map
            self.model.save(self.save_path, save_format="tf")  # Save the model when mAP improves

        return logs

# Train the Model

In [None]:
yolo.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=[EvaluateCOCOMetricsCallback(val_ds, "model.tf")],
)

# Visualize Predictions

In [None]:
def visualize_detections(model, dataset, bounding_box_format):
    images, y_true = next(iter(dataset.take(1)))
    y_pred = model.predict(images)
    y_pred = bounding_box.to_ragged(y_pred)
    visualization.plot_bounding_box_gallery(
        images,
        value_range=(0, 255),
        bounding_box_format=bounding_box_format,
        y_true=y_true,
        y_pred=y_pred,
        scale=4,
        rows=1,
        cols=2,
        show=True,
        font_scale=0.7,
        class_mapping=class_mapping,
    )

visualize_detections(yolo, dataset=val_ds, bounding_box_format="xyxy")