# Object detection model customization

## Setup

To install the libraries for customizing a model, run the following commands:

In [None]:
!python --version
!pip install --upgrade pip
!pip install mediapipe-model-maker

Use the following code to import the required Python classes:

In [None]:
from google.colab import files
import os
import json
from tqdm import tqdm
import tensorflow as tf
assert tf.__version__.startswith('2')

from mediapipe_model_maker import object_detector

from google.colab import drive
import shutil

%matplotlib inline

### Set paths

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

In [None]:
base_path = '/content/drive/MyDrive/'
source_path = base_path + 'Datasets/revisitop/rparis6k/data/'

dest_base_path = base_path + 'MyProject/rparis6k/'

train_dataset_path = dest_base_path + 'train/'
validation_dataset_path = dest_base_path + 'validation/'
test_dataset_path = dest_base_path + 'test/'

In [None]:
os.makedirs(dest_base_path, exist_ok=True)

os.makedirs(train_dataset_path, exist_ok=True)
os.makedirs(validation_dataset_path, exist_ok=True)
os.makedirs(test_dataset_path, exist_ok=True)

os.makedirs(os.path.join(train_dataset_path, 'images'), exist_ok=True)
os.makedirs(os.path.join(validation_dataset_path, 'images'), exist_ok=True)
os.makedirs(os.path.join(test_dataset_path, 'images'), exist_ok=True)

## Prepare data

### Copy images

In [None]:
# FIXME: fix check_and_delete_augmented_images function

In [None]:
# Function to search for augmented data
def check_and_delete_augmented_images(folder_path):
    augmented_images = [f for f in os.listdir(folder_path) if 'aug' in f]

    if len(augmented_images) > 0:
        print(f"Found {len(augmented_images)} augmented images in {folder_path}.")
        user_input = input("Do you want to delete these images? (yes/no): ").strip().lower()

        if user_input == 'yes':
            # delete all the elements in the image folder
            # delete the labels.json file
            for img in tqdm(augmented_images, desc="Deleting augmented images"):
                img_path = os.path.join(folder_path, img)
                os.remove(img_path)
            print("Augmented images deleted successfully.")
        else:
            print("Deletion aborted by user.")
    else:
        print("No augmented images found.")

# folder_path = augmented_train_dataset_path

In [None]:
# TODO: mention that the train set contain augmented images if it's the case
# TODO: use also a boolean value to memorize the aug data deletion

In [None]:
# Function to copy images
def copy_images(file_list, dest_folder):
    with open(file_list, 'r') as f:
        lines = f.readlines()
        for line in tqdm(lines, desc=f"Copying images to {dest_folder}"):
            img_name = line.strip()
            src = os.path.join(source_path, img_name)
            dst = os.path.join(dest_folder, img_name)
            os.makedirs(os.path.dirname(dst), exist_ok=True)
            shutil.copy2(src, dst)

# Copy images for each set
if (len(os.listdir(train_dataset_path + 'images/')) == 0 and
    len(os.listdir(validation_dataset_path + 'images/')) == 0 and
    len(os.listdir(test_dataset_path + 'images/')) == 0):
    copy_images(dest_base_path + 'train.txt', train_dataset_path + 'images/')
    copy_images(dest_base_path + 'val.txt', validation_dataset_path + 'images/')
    copy_images(dest_base_path + 'test.txt', test_dataset_path + 'images/')
    print("Dataset division completed!\n")
else:
    print("One or more directories are not empty. Copy operation aborted.\n")

print(f"Number of images in train set: {len(os.listdir(train_dataset_path + 'images/'))}")
print(f"Number of images in validation set: {len(os.listdir(validation_dataset_path + 'images/'))}")
print(f"Number of images in test set: {len(os.listdir(test_dataset_path + 'images/'))}")

### Review data

Verify the dataset content by printing the categories from the `labels.json` file. There should be 13 total categories. Index 0 is always set to be the `background` class which may be unused in the dataset.

In [None]:
with open(os.path.join(train_dataset_path, "labels.json"), "r") as f:
  labels_json = json.load(f)
for category_item in labels_json["categories"]:
  print(f"{category_item['id']}: {category_item['name']}")

In [None]:
#@title Visualize the training data
import matplotlib.pyplot as plt
from matplotlib import patches, text, patheffects
from collections import defaultdict
import math

# TODO: it may be interesting if it shows two random images per category

def draw_outline(obj):
    obj.set_path_effects([patheffects.Stroke(linewidth=4,  foreground='black'), patheffects.Normal()])

def draw_box(ax, bb):
    patch = ax.add_patch(patches.Rectangle((bb[0],bb[1]), bb[2], bb[3], fill=False, edgecolor='red', lw=2))
    draw_outline(patch)

def draw_text(ax, bb, txt, disp):
    text = ax.text(bb[0], (bb[1]-disp), txt, verticalalignment='top', color='white', fontsize=10, weight='bold')
    draw_outline(text)

def draw_bbox(ax, annotations_list, id_to_label, image_shape):
    for annotation in annotations_list:
        cat_id = annotation["category_id"]
        bbox = annotation["bbox"]
        draw_box(ax, bbox)
        draw_text(ax, bbox, id_to_label[cat_id], image_shape[0] * 0.05)

def visualize(dataset_folder, max_examples=None):
    with open(os.path.join(dataset_folder, "labels.json"), "r") as f:
        labels_json = json.load(f)

    images = labels_json["images"]
    cat_id_to_label = {item["id"]:item["name"] for item in labels_json["categories"]}
    image_annots = defaultdict(list)

    for annotation_obj in labels_json["annotations"]:
        image_id = annotation_obj["image_id"]
        image_annots[image_id].append(annotation_obj)

    if max_examples is None:
        max_examples = len(image_annots.items())

    n_rows = math.ceil(max_examples / 3)
    fig, axs = plt.subplots(n_rows, 3, figsize=(24, n_rows*8)) # 3 columns(2nd index), 8x8 for each image

    for ind, (image_id, annotations_list) in enumerate(list(image_annots.items())[:max_examples]):
        ax = axs[ind//3, ind%3]
        img = plt.imread(os.path.join(dataset_folder, "images", images[image_id]["file_name"]))
        ax.imshow(img)
        draw_bbox(ax, annotations_list, cat_id_to_label, img.shape)

    plt.show()

visualize(train_dataset_path, 9)

### Create dataset

In [None]:
if os.path.exists("/tmp/od_data/train"):
    shutil.rmtree("/tmp/od_data/train") # TODO: do I need this instruction ?

In [None]:
# TODO: is it possible to add a progress bar?

In [None]:
train_data = object_detector.Dataset.from_coco_folder(train_dataset_path, cache_dir="/tmp/od_data/train")
validation_data = object_detector.Dataset.from_coco_folder(validation_dataset_path, cache_dir="/tmp/od_data/validation")
print("train_data size: ", train_data.size)
print("validation_data size: ", validation_data.size)

## Augmentation

### Augment data

In [None]:
import albumentations as A
import numpy as np
import cv2

In [None]:
# TODO: check the aug data deletion boolean value
# if true -> ask to execute augmentation
# if false -> ask to delete aug data before and then to execute augmentation (update train_data_path)

In [None]:
# Augmentation pipeline
bbox_params=A.BboxParams(format='coco', label_fields=['category_ids'])

transform = A.Compose([
    A.RandomResizedCrop(height=384, width=384, scale=(0.5, 1.0), ratio=(0.8, 1.2), p=1.0),  # scale=(0.8, 1.0), ratio=(0.9, 1.1)
    A.HorizontalFlip(p=0.5),                                                                #
    A.RandomBrightnessContrast(p=0.3),                                                      # p=0.2
    A.RGBShift(r_shift_limit=30, g_shift_limit=30, b_shift_limit=20, p=0.3),                # r_shift_limit=20, g_shift_limit=20
    A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=30, p=0.5),           # shift_limit=0.1, scale_limit=0.1, rotate_limit=15
    A.RandomShadow(p=0.2),                                                                  #
    A.CLAHE(p=0.3),                                                                         #
], bbox_params=bbox_params)                                                                 # min_area=1024 min_area=256, min_visibility=0.1

In [None]:
def normalize_bbox(bbox, image_width, image_height):
    """
    Normalize bbox coordinates to [0, 1] range.
    """
    x_min, y_min, w, h = bbox
    x_max = x_min + w
    y_max = y_min + h

    x_min_norm = max(0, min(1, x_min / image_width))
    y_min_norm = max(0, min(1, y_min / image_height))
    x_max_norm = max(0, min(1, x_max / image_width))
    y_max_norm = max(0, min(1, y_max / image_height))

    return [x_min_norm, y_min_norm, x_max_norm, y_max_norm]

In [None]:
def denormalize_bbox(bbox_norm, image_width, image_height):
    """
    Convert normalized bbox coordinates back to pixel coordinates.
    """

    x_min_norm, y_min_norm, x_max_norm, y_max_norm = bbox_norm

    x_min = round(x_min_norm * image_width, 1)
    y_min = round(y_min_norm * image_height, 1)
    x_max = round(x_max_norm * image_width, 1)
    y_max = round(y_max_norm * image_height, 1)

    w = round(x_max - x_min, 1)
    h = round(y_max - y_min, 1)

    return [x_min, y_min, w, h]

In [None]:
def apply_augmentation(image_path, bboxes, category_ids, output_path, output_filename, transform):
    """
    Apply augmentation to an image and save the results.

    Parameters:
    - image_path: Path to the original image.
    - bboxes: Bounding boxes in the format [(x_min, y_min, x_max, y_max), ...].
    - category_ids: List of category IDs corresponding to each bounding box.
    - output_path: Directory where the augmented image will be saved.
    - output_filename: Filename for the augmented image.
    - transform: Albumentations transform to be applied.

    Returns:
    - A tuple containing the transformed bounding boxes and category IDs.
    """
    # Read the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_height, image_width = image.shape[:2]

    # Normalize bounding boxes
    normalized_bboxes = [normalize_bbox(bbox, image_width, image_height) for bbox in bboxes]

    # Apply the augmentation
    try:
        transformed = transform(image=image, bboxes=normalized_bboxes, category_ids=category_ids) # FIXME: empty returning values
    except Exception as e:
        print(f"Error during transformation: {e}")
        return [], []

    # Save the augmented image
    augmented_image_path = os.path.join(output_path, output_filename)
    #cv2.imwrite(augmented_image_path, cv2.cvtColor(transformed['image'], cv2.COLOR_RGB2BGR)) # TODO: uncomment

    print(transformed.keys())

    return transformed['bboxes'], transformed['category_ids']

In [None]:
with open(os.path.join(train_dataset_path, 'labels.json'), 'r') as f:
    train_json = json.load(f)

with open(os.path.join(validation_dataset_path, 'labels.json'), 'r') as f:
    val_json = json.load(f)

with open(os.path.join(test_dataset_path, 'labels.json'), 'r') as f:
    test_json = json.load(f)

n_images = max(train_json['images'][-1]['id'], val_json['images'][-1]['id'], test_json['images'][-1]['id'])
n_annotations = max(train_json['annotations'][-1]['id'], val_json['annotations'][-1]['id'], test_json['annotations'][-1]['id'])

print(f"Number of images: {n_images}")
print(f"Number of annotations: {n_annotations}")

In [None]:
def augment_dataset(input_path, output_path, transform, n_images, n_annotations, num_augmentations=5):
    """
    Augment a dataset based on COCO format and save the augmented images and annotations.

    Parameters:
    - input_path: Path to the directory containing the original dataset and 'labels.json'.
    - output_path: Path to the directory where the augmented dataset will be saved.
    - num_augmentations: Number of augmentations to apply per image.

    The function reads the original COCO JSON file, applies specified augmentations to each image,
    and saves the augmented images and updated annotations in a new COCO JSON file.
    """
    # Load the original COCO JSON file
    with open(os.path.join(input_path, 'labels.json'), 'r') as f:
        coco_data = json.load(f)

    new_images = []
    new_annotations = []

    # Copy original images and annotations
    for img in tqdm(coco_data['images'], desc="Copying original images"):

        # Copy original image
        src_path = os.path.join(input_path, 'images', img['file_name'])
        dst_path = os.path.join(output_path, 'images', img['file_name'])
        #shutil.copy2(src_path, dst_path) # TODO: uncomment

        new_images.append(img)
        img_anns = [ann for ann in coco_data['annotations'] if ann['image_id'] == img['id']]
        new_annotations.extend(img_anns)

    # Apply augmentations
    for img in tqdm(coco_data['images'], desc="Augmenting images"):
        image_path = os.path.join(input_path, 'images', img['file_name'])

        # Find annotations for this image
        annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] == img['id']]

        # Get original image dimensions
        image = cv2.imread(image_path)
        image_height, image_width = image.shape[:2]

        for i in range(num_augmentations):
            # Prepare data for augmentation
            bboxes = [ann['bbox'] for ann in annotations]
            category_ids = [ann['category_id'] for ann in annotations]

            # Generate a new filename
            new_filename = f"{os.path.splitext(img['file_name'])[0]}_aug_{i}.jpg"

            # Apply augmentation
            new_bboxes, new_category_ids = apply_augmentation( # FIXME: new_bboxes and new_category_ids are always empty
                image_path, bboxes, category_ids,
                os.path.join(output_path, 'images'), new_filename, transform
            )

            new_bboxes = [denormalize_bbox(bbox, image_width, image_height) for bbox in new_bboxes]

            # Create a new image entry
            new_img_id = n_images + 1
            new_images.append({
                'id': new_img_id,
                'file_name': new_filename
            })

            n_images = n_images + 1

            # Create new annotations
            for bbox, cat_id in zip(new_bboxes, new_category_ids):
                new_annotations.append({
                    'id': n_annotations + 1,
                    'image_id': new_img_id,
                    'category_id': cat_id,
                    'bbox': bbox
                })

                n_annotations = n_annotations + 1

    # Create the new COCO JSON file
    new_coco_data = {
        'categories': coco_data['categories'],
        'images': new_images,
        'annotations': new_annotations
    }

    ''' # TODO: uncomment
    # Save the new COCO JSON file
    with open(os.path.join(output_path, 'labels.json'), 'w') as f:
        json.dump(new_coco_data, f, indent=4)
    '''


augmented_train_dataset_path = dest_base_path + 'train_augmented/'

os.makedirs(os.path.join(augmented_train_dataset_path, 'images'), exist_ok=True)

if (len(os.listdir(augmented_train_dataset_path + 'images/'))) == 0:
    augment_dataset(train_dataset_path, augmented_train_dataset_path, transform, n_images, n_annotations, num_augmentations=5)
else:
    print("Augmentation has already been made.")

In [None]:
extensions=['.jpg', '.jpeg']
count = 0
count = sum(1 for filename in os.listdir(os.path.join(augmented_train_dataset_path, 'images')) if any(filename.lower().endswith(ext) for ext in extensions))

print(f"Number of images in the train_augmented folder: {count}")

In [None]:
#@title Visualize the augmented training data

visualize(augmented_train_dataset_path, 12) # FIXME

### Rewrite train dataset

In [None]:
# TODO: add if condition (if augmentation has been executed)

In [None]:
shutil.rmtree("/tmp/od_data/train") # TODO: do I need this instruction ?

In [None]:
train_data = object_detector.Dataset.from_coco_folder(augmented_train_dataset_path, cache_dir="/tmp/od_data/train")
print("Updated train_data size: ", train_data.size)

## Retrain model

### Set retraining options

In [None]:
spec = object_detector.SupportedModels.MOBILENET_MULTI_AVG_I384

hparams = object_detector.HParams(
    learning_rate=0.015,
    batch_size=64,
    epochs=100,
    cosine_decay_epochs=100,
    cosine_decay_alpha=0.1,
    export_dir='exported_model'
)

model_options = object_detector.ModelOptions(
    l2_weight_decay=1e-4
)

options = object_detector.ObjectDetectorOptions(
    supported_model=spec,
    hparams=hparams,
    model_options=model_options
)

### Run retraining

In [None]:
model = object_detector.ObjectDetector.create(
    train_data=train_data,
    validation_data=validation_data,
    options=options
)

### Evaluate the model performance

After training the model, evaluate it on validation dataset and print the loss and coco_metrics. The most important metric for evaluating the model performance is typically the "AP" coco metric for Average Precision.

In [None]:
# TODO: is it possible to add a progress bar?

In [None]:
loss, coco_metrics = model.evaluate(validation_data, batch_size=4)
print(f"Validation loss: {loss}")
print(f"Validation coco metrics: {coco_metrics}")

## Export model

After creating the model, convert and export it to a Tensorflow Lite model format for later use on an on-device application. The export also includes model metadata, which includes the label map.

In [None]:
# TODO: do I need to remove the existing model first?

In [None]:
model.export_model()
!ls exported_model
files.download('exported_model/model.tflite')

## Model quantization

Model quantization is a model modification technique that can reduce the model size and improve the speed of predictions with only a relatively minor decrease in accuracy.

This section of the guide explains how to apply quantization to your model. Model Maker supports two forms of quantization for object detector:
1. Quantization Aware Training: 8 bit integer precision for CPU usage
2. Post-Training Quantization: 16 bit floating point precision for GPU usage

### Quantization aware training (int8 quantization)
Quantization aware training (QAT) is a fine-tuning step which happens after fully training your model. This technique further tunes a model which emulates inference time quantization in order to account for the lower precision of 8 bit integer quantization. For on-device applications with a standard CPU, use Int8 precision. For more information, see the [TensorFlow Lite](https://www.tensorflow.org/model_optimization/guide/quantization/training) documentation.

To apply quantization aware training and export to an int8 model, create a `QATHParams` configuration and run the `quantization_aware_training` method. See the **Hyperparameters** section below on detailed usage of `QATHParams`.

In [None]:
qat_hparams = object_detector.QATHParams(learning_rate=0.3, batch_size=4, epochs=10, decay_steps=6, decay_rate=0.96)
model.quantization_aware_training(train_data, validation_data, qat_hparams=qat_hparams)
qat_loss, qat_coco_metrics = model.evaluate(validation_data)
print(f"QAT validation loss: {qat_loss}")
print(f"QAT validation coco metrics: {qat_coco_metrics}")

The QAT step often requires multiple runs to tune the parameters of training. To avoid having to rerun model training using the `create` method, use the `restore_float_ckpt` method to restore the model state back to the fully trained float model(After running the `create` method) in order to run QAT again.

In [None]:
new_qat_hparams = object_detector.QATHParams(learning_rate=0.9, batch_size=4, epochs=15, decay_steps=5, decay_rate=0.96)
model.restore_float_ckpt()
model.quantization_aware_training(train_data, validation_data, qat_hparams=new_qat_hparams)
qat_loss, qat_coco_metrics = model.evaluate(validation_data)
print(f"QAT validation loss: {qat_loss}")
print(f"QAT validation coco metrics: {qat_coco_metrics}")

Finally, us the `export_model` to export to an int8 quantized model. The `export_model` function will automatically export to either float32 or int8 model depending on whether `quantization_aware_training` was run.

In [None]:
model.export_model('model_int8_qat.tflite')
!ls -lh exported_model
files.download('exported_model/model_int8_qat.tflite')

### Post-training quantization (fp16 quantization)

Post-training model quantization is a model modification technique that can reduce the model size and improve the speed of predictions with only a relatively minor decrease in accuracy. This approach reduces the size of the data processed by the model, for example by transforming 32-bit floating point numbers to 16-bit floats. Float16 quantization is reccomended for GPU usage. For more information, see the [TensorFlow Lite](https://www.tensorflow.org/model_optimization/guide/quantization/post_training) documentation.

First, import the MediaPipe Model Maker quantization module:

In [None]:
from mediapipe_model_maker import quantization

Define a QuantizationConfig object using the `for_float16()` class method. This configuration modifies a trained model to use 16-bit floating point numbers instead of 32-bit floating point numbers. You can further customize the quantization process by setting additional parameters for the QuantizationConfig class.

In [None]:
quantization_config = quantization.QuantizationConfig.for_float16()

Export the model using the additional quantization_config object to apply post-training quantization. Note that if you previously ran `quantization_aware_training`, you must first convert the model back to a float model by using `restore_float_ckpt`.

In [None]:
model.restore_float_ckpt()
model.export_model(model_name="model_fp16.tflite", quantization_config=quantization_config)
!ls -lh exported_model
files.download('exported_model/model_fp16.tflite')