In [45]:
import tensorflow as tf
import tensorflow_datasets as tfds
import keras_cv
import numpy as np
from tensorflow import keras
from keras.utils import load_img, img_to_array
import xml.etree.ElementTree as elemTree
from luketils import visualization

In [46]:
CHECKPOINT_PATH = 'checkpoint2/'
INFERENCE_CHECKPOINT_PATH = CHECKPOINT_PATH

class_ids = [
    "ok",
    "nok"
]
class_mapping = dict(zip(range(len(class_ids)), class_ids))

In [37]:
def visualize_image(images, boxes):
    visualization.plot_bounding_box_gallery(
        images,
        value_range=(0, 255),
        bounding_box_format='xywh',
        y_true=boxes,
        scale=4,
        rows=8,
        cols=5,
        show=True,
        thickness=4,
        font_scale=1,
        class_mapping=class_mapping,
    )

def img_load(fname):
    img = img_to_array(load_img('images/trains/' + fname + '.jpg'))
    return img

def box_load(fname):
    tree = elemTree.parse('images/trains/' + fname + '.xml')
    object = tree.find('object')
    name = object.find('name').text
    bndbox = object.find('bndbox')
    xmin = bndbox.find('xmin').text
    ymin = bndbox.find('ymin').text
    xmax = bndbox.find('xmax').text
    ymax = bndbox.find('ymax').text
    x = int(xmin)
    y = int(ymin)
    w = int(xmax) - int(xmin)
    h = int(ymax) - int(ymin)
    k = 0
    for key, value in class_mapping.items():
        if value == name:
            k = key
    return np.expand_dims(np.array([x, y, w, h, k]), 0)

In [66]:
images = []
bounding_boxes = []

datasets = []

fnames = ['ok', 'nok']    
for fname in fnames:
    image = img_load(fname)
    box = box_load(fname)
    datasets.append({ 'image': image, 'box': box })
    for i in range(20):
        rand_augment = keras_cv.layers.RandAugment(
            value_range=(0, 255), augmentations_per_image=3, magnitude=0.5
            , geometric=False
        )
        datasets.append({ 'image': rand_augment(image).numpy(), 'box': box })

for data in datasets:
    images.append(data['image'])
    bounding_boxes.append(data['box'])

images = np.array(images)
bounding_boxes = np.array(bounding_boxes)
# images = tf.convert_to_tensor(
#     images, dtype=tf.float32
# )
bounding_boxes = tf.ragged.constant(bounding_boxes, dtype=tf.float32)

# visualize_image(images, boxes)

In [70]:
inputs = {'images': images, 'bounding_boxes': bounding_boxes}
dataset = tf.data.Dataset.from_tensor_slices(inputs)
dataset = dataset.batch(images.shape[0])
example = next(iter(dataset))
images, boxes = example['images'], example['bounding_boxes']
image = images[0]
boxes = boxes[0]


In [69]:
model = keras_cv.models.RetinaNet(
    classes=20,
    bounding_box_format='xywh',
    backbone='resnet50',
    backbone_weights='imagenet',
    include_rescaling=True,
    evaluate_train_time_metrics=False
)

model.backbone.trainable = False
optimizer = tf.optimizers.SGD(global_clipnorm=10.0)
model.compile(
    classification_loss=keras_cv.losses.FocalLoss(from_logits=True, reduction="none"),
    box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"),
    optimizer=optimizer,
    metrics = [
        keras_cv.metrics.COCOMeanAveragePrecision(
        class_ids=range(2),
        bounding_box_format="xywh",
        name="Mean Average Precision",
    ),
    keras_cv.metrics.COCORecall(
        class_ids=range(2),
        bounding_box_format="xywh",
        max_detections=100,
        name="Recall",
    ),
],
)

callbacks = [
    # keras.callbacks.TensorBoard(log_dir="logs"),
    keras.callbacks.ReduceLROnPlateau(patience=5),
    # Uncomment to train your own RetinaNet
    keras.callbacks.ModelCheckpoint(CHECKPOINT_PATH, save_weights_only=True),
]

model.fit(
    dataset,
    validation_data=0.3,
    epochs=10,
    callbacks=callbacks,
)
model.save_weights(CHECKPOINT_PATH)

ValueError: Unbatching a tensor is only supported for rank >= 1