In [1]:
# Imports
import sys
sys.path.append('../')
import tensorflow as tf
from tensorflow import keras
from helper import set_model_config
from helper import plot_loss, plot_confusion_matrix, visualize_object_predictions
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import keras_cv
from keras_cv import visualization
from keras_cv import bounding_box

2023-12-08 18:29:41.265957: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-08 18:29:41.315616: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-08 18:29:41.315670: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-08 18:29:41.315723: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-08 18:29:41.325658: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-08 18:29:41.326659: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

Using TensorFlow backend


In [2]:
config  = set_model_config(model_name='pascal_yolo')
config

{'batch_size': 128,
 'learning_rate': 0.001,
 'training_epochs': 20,
 'global_clipnorm': 10,
 'n_classes': None,
 'optimizer': 'adam',
 'val_size': None}

In [3]:
# Load the Pascal-Voc dataset

# Visualize the Keras-CV compatible dataset
def visualize_object_detection_samples(inputs, value_range, rows, cols, bounding_box_format, class_mapping):
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
    visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping=class_mapping,
    )

# Get the class mapping dictionary
def get_class_mapping(dataset_info):
    class_mapping = {i: class_info for i, class_info in enumerate(dataset_info.features['objects']['label'].names)}
    return class_mapping


# Unpackage the raw tfdf formats into Keras-CV format
def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
    image = inputs["image"]
    boxes = keras_cv.bounding_box.convert_format(
        inputs["objects"]["bbox"],
        images=image,
        source="rel_yxyx",
        target=bounding_box_format,
    )
    bounding_boxes = {
        "classes": tf.cast(inputs["objects"]["label"], dtype=tf.int32),
        "boxes": tf.cast(boxes, dtype=tf.int32),
    }
    return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}

# Unpack batch from dataset to tuple format function
def unpack_batch_dicts(inputs):
    return inputs["images"], inputs["bounding_boxes"]

# Custom dataloader, compatible with Keras-CV, applies shuffling and batching
def load_pascal_voc(split, dataset, bounding_box_format):
    ds, ds_info  = tfds.load(dataset, split=split, with_info=True, shuffle_files=True)
    
    # Convert the images/bboxes to the Keras-CV API format
    ds = ds.map(
        lambda x: unpackage_raw_tfds_inputs(x, bounding_box_format=bounding_box_format),
        num_parallel_calls=tf.data.AUTOTUNE,
    )
    
    # if split != 'test':
    #     ds = ds.shuffle(config['batch_size'] * 4, reshuffle_each_iteration=True)
    
    # Create ragged batches(with elems of different sizes)
    if split == 'test':
        ds = ds.shuffle(32, reshuffle_each_iteration=True)
        ds = ds.ragged_batch(8, drop_remainder=True)
    else:
        ds = ds.shuffle(config['batch_size'] * 4, reshuffle_each_iteration=True)
        ds = ds.ragged_batch(config['batch_size'], drop_remainder=True)
    
    return ds,ds_info

# Define augmenter module using custom object detection friendly ops from Keras-CV
augmenter = keras.Sequential(
    layers=[
        keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xywh"),
        keras_cv.layers.RandomShear(
            x_factor=0.2, y_factor=0.2, bounding_box_format="xywh"
        ),
        keras_cv.layers.JitteredResize(
            target_size=(480, 480), scale_factor=(0.75, 1.3), bounding_box_format="xywh"
        ),
    ]
)

# # Inference inputs pre-processing for our test and validation sets
inf_preprocess = keras_cv.layers.JitteredResize(target_size=(480, 480),
                                              scale_factor=(0.75, 1.3),
                                              bounding_box_format="xywh",
                                            )

# Load the three different pre-processed splits of our dataset
ds_train, ds_info = load_pascal_voc(
    split="train", dataset="voc/2007", bounding_box_format="xywh"
)
ds_val, _ = load_pascal_voc(
    split="validation", dataset="voc/2007", bounding_box_format="xywh"
)
ds_test, _ = load_pascal_voc(
    split="test", dataset="voc/2007", bounding_box_format="xywh"
)

# Apply augmentations and set prefetch option on training set
ds_train = ds_train.map(augmenter, num_parallel_calls=tf.data.AUTOTUNE)
ds_train = ds_train.map(unpack_batch_dicts, num_parallel_calls=tf.data.AUTOTUNE)
ds_train = ds_train.prefetch(tf.data.AUTOTUNE)

# Pre-process validation and test set
ds_val = ds_val.map(inf_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
ds_val = ds_val.map(unpack_batch_dicts, num_parallel_calls=tf.data.AUTOTUNE)
ds_val = ds_val.prefetch(tf.data.AUTOTUNE)

ds_test = ds_test.map(inf_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.shuffle(ds_info.splits['test'].num_examples)
ds_test = ds_test.prefetch(tf.data.AUTOTUNE)

In [4]:
ds_info

tfds.core.DatasetInfo(
    name='voc',
    full_name='voc/2007/4.0.0',
    description="""
    This dataset contains the data from the PASCAL Visual Object Classes Challenge,
    corresponding to the Classification and Detection competitions.
    
    In the Classification competition, the goal is to predict the set of labels
    contained in the image, while in the Detection competition the goal is to
    predict the bounding box and label of each individual object.
    annotations.
    """,
    config_description="""
    This dataset contains the data from the PASCAL Visual Object Classes Challenge
    2007, a.k.a. VOC2007.
    
    A total of 9963 images are included in this dataset, where each image
    contains a set of objects, out of 20 different classes, making a total of
    24640 annotated objects.
    
    """,
    homepage='http://host.robots.ox.ac.uk/pascal/VOC/voc2007/',
    data_dir='/home/dimitris/tensorflow_datasets/voc/2007/4.0.0',
    file_format=tfrecord,
    downlo

In [5]:
print("----Pascal-Voc dataset information-----:")
print(f"Number of training examples: {ds_info.splits['train'].num_examples}")
print(f"Number of validation examples: {ds_info.splits['validation'].num_examples}")
print(f"Number of test examples: {ds_info.splits['test'].num_examples}")
print(f"Dataset splits available: {list(ds_info.splits.keys())}")
print("Number of Classes:", len(ds_info.features["objects"]["label"].names))
print(f"Class names: {ds_info.features['objects']['label'].names}")

----Pascal-Voc dataset information-----:
Number of training examples: 2501
Number of validation examples: 2510
Number of test examples: 4952
Dataset splits available: ['test', 'train', 'validation']
Number of Classes: 20
Class names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']


In [None]:
# Visualize some samples from the dataset
with plt.style.context('dark_background'):
    visualize_object_detection_samples(ds_train, value_range=(0, 255), rows=2, cols=4, bounding_box_format="xywh", class_mapping=get_class_mapping(ds_info))

In [4]:
# Load a pre-trained, on the CoCo dataset, YoloV8 model
backbone =  keras_cv.models.YOLOV8Backbone.from_preset("yolo_v8_xs_backbone")
backbone.summary()

Model: "yolov8_backbone"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, None, None, 3)]      0         []                            
                                                                                                  
 rescaling (Rescaling)       (None, None, None, 3)        0         ['input_1[0][0]']             
                                                                                                  
 stem_1_pad (ZeroPadding2D)  (None, None, None, 3)        0         ['rescaling[0][0]']           
                                                                                                  
 stem_1_conv (Conv2D)        (None, None, None, 16)       432       ['stem_1_pad[0][0]']          
                                                                                    

In [5]:
# Create our custom YOLO model from the smallest available backbone
model = keras_cv.models.YOLOV8Detector(
    num_classes=len(get_class_mapping(ds_info)),
    bounding_box_format="xywh",
    backbone=backbone,
    fpn_depth = 1
)
model.summary()

Model: "yolov8_detector"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, None, None, 3)]      0         []                            
                                                                                                  
 model (Functional)          {'P3': (None, None, None,    1277680   ['input_2[0][0]']             
                             64),                                                                 
                              'P4': (None, None, None,                                            
                             128),                                                                
                              'P5': (None, None, None,                                            
                             256)}                                                  

In [None]:
# Compile and configure the model for training

# NOTE: Feel free to replace this with your own optimizer
if config['optimizer'].lower() == 'adam':
    optimizer = tf.keras.optimizers.Adam(learning_rate= config['learning_rate'],
                                         global_clipnorm= config['global_clipnorm'])

# Set Early Stopping strategy after 5 epochs of no improvement in total loss for validation set
callback = EarlyStopping(monitor='val_loss', patience=5)

# Compile and train
model.compile(optimizer=optimizer,
              classification_loss= 'binary_crossentropy',
              box_loss="ciou")
history = model.fit(ds_train, validation_data= ds_val, epochs = config['training_epochs'], callbacks = [callback])

# Plot losses
with plt.style.context('dark_background'):
    plot_loss(history, model_type= 'object_detection')


In [None]:
# Load a trained model and visualize predictions
from keras.models import load_model

trained_model = load_model('computer_vision/trained_models/pascal_yolo_model')

with plt.style.context('dark_background'):
    visualize_object_predictions(trained_model, dataset= ds_val, bounding_box_format='xywh', class_mapping= get_class_mapping(ds_info))

In [None]:
# FINAL TODO: Log test_set eval metrics and plot confusion matrix for test set