## Setup

In [1]:
!python --version
!pip install --upgrade pip
!pip install mediapipe-model-maker

Python 3.10.12
Collecting pip
  Using cached pip-24.2-py3-none-any.whl.metadata (3.6 kB)
Using cached pip-24.2-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-24.2
Collecting mediapipe-model-maker
  Downloading mediapipe_model_maker-0.2.1.4-py3-none-any.whl.metadata (1.7 kB)
Collecting mediapipe>=0.10.0 (from mediapipe-model-maker)
  Downloading mediapipe-0.10.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting tensorflow<2.16,>=2.10 (from mediapipe-model-maker)
  Downloading tensorflow-2.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting tensorflow-addons (from mediapipe-model-maker)
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting tensorflow-model-

In [2]:
from google.colab import files
import os
import json
from tqdm import tqdm
import tensorflow as tf
assert tf.__version__.startswith('2')

from mediapipe_model_maker import object_detector

from google.colab import drive
import shutil

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



### Colab Pro

In [3]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Tue Aug 27 07:07:33 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   31C    P0              45W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [4]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 89.6 gigabytes of available RAM

You are using a high-RAM runtime!


### Set paths

In [5]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
base_path = '/content/drive/MyDrive/'
source_path = base_path + 'Datasets/florence1k/'

dest_base_path = base_path + 'MyProject/florence1k/'

train_dataset_path = dest_base_path + 'train/'
validation_dataset_path = dest_base_path + 'validation/'
test_dataset_path = dest_base_path + 'test/'

In [7]:
#@title Create directories

os.makedirs(dest_base_path, exist_ok=True)

os.makedirs(train_dataset_path, exist_ok=True)
os.makedirs(validation_dataset_path, exist_ok=True)
os.makedirs(test_dataset_path, exist_ok=True)

os.makedirs(os.path.join(train_dataset_path, 'images'), exist_ok=True)
os.makedirs(os.path.join(validation_dataset_path, 'images'), exist_ok=True)
os.makedirs(os.path.join(test_dataset_path, 'images'), exist_ok=True)

## Prepare Data

### Copy images

In [8]:
# Function to copy images
def copy_images(file_list, dest_folder):
    with open(file_list, 'r') as f:
        lines = f.readlines()
        for line in tqdm(lines, desc=f"Copying images to {dest_folder}"):
            img_name = line.strip()
            src = os.path.join(source_path, img_name)
            dst = os.path.join(dest_folder, img_name)
            os.makedirs(os.path.dirname(dst), exist_ok=True)
            shutil.copy2(src, dst)

# Copy images for each set
if os.listdir(train_dataset_path + 'images/') == [] and \
   os.listdir(validation_dataset_path + 'images/') == [] and \
   os.listdir(test_dataset_path + 'images/') == []:
    copy_images(dest_base_path + 'train.txt', train_dataset_path + 'images/')
    copy_images(dest_base_path + 'val.txt', validation_dataset_path + 'images/')
    copy_images(dest_base_path + 'test.txt', test_dataset_path + 'images/')
    print("Dataset division completed!\n")
else:
    print("One or more directories are not empty. Copy operation aborted.\n")

print(f"Number of images in train set: {len(os.listdir(train_dataset_path + 'images/'))}")
print(f"Number of images in validation set: {len(os.listdir(validation_dataset_path + 'images/'))}")
print(f"Number of images in test set: {len(os.listdir(test_dataset_path + 'images/'))}")

One or more directories are not empty. Copy operation aborted.

Number of images in train set: 720
Number of images in validation set: 360
Number of images in test set: 120


### Review Data

In [9]:
with open(os.path.join(train_dataset_path, "labels.json"), "r") as f:
  labels_json = json.load(f)
for category_item in labels_json["categories"]:
  print(f"{category_item['id']}: {category_item['name']}")

0: background
1: santamariadelfiore
2: battisterosangiovanni
3: campanilegiotto
4: galleriauffizi
5: loggialanzi
6: palazzovecchio
7: pontevecchio
8: basilicasantacroce
9: palazzopitti
10: piazzalemichelangelo
11: basilicasantamarianovella
12: basilicasanminiato


### Create Dataset

In [None]:
# TODO: do I need this instruction ?

cache_dirs = ["/tmp/od_data/train", "/tmp/od_data/validation"]

for cache_dir in cache_dirs:
    if os.path.exists(cache_dir):
        shutil.rmtree(cache_dir)

In [10]:
train_data = object_detector.Dataset.from_coco_folder(train_dataset_path, cache_dir="/tmp/od_data/train")
validation_data = object_detector.Dataset.from_coco_folder(validation_dataset_path, cache_dir="/tmp/od_data/validation")

print(f"{'Training Dataset Size:':<25} {train_data.size:>4}")
print(f"{'Validation Dataset Size:':<25} {validation_data.size:>4}")

Training Dataset Size:     720
Validation Dataset Size:   360


## Augmentation

### Augment Data

In [11]:
import albumentations as A
import numpy as np
import cv2

In [12]:
def get_transform(set='train'):
    bboxes_params = A.BboxParams(format='coco', min_visibility=0.3, label_fields=['class_labels']) # TODO: check min_visibility

    if set == 'train':
        transform = A.Compose([
            A.RandomResizedCrop(height=640, width=640, scale=(0.8, 1.0), ratio=(0.9, 1.1), p=0.25), # TODO: check h,w
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
            A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
            A.RandomShadow(num_shadows_lower=1, num_shadows_upper=3, shadow_dimension=5, shadow_roi=(0, 0.5, 1, 1), p=0.25),
            A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8), p=0.5),
            A.OneOf([
                A.MotionBlur(blur_limit=7, p=0.5),
                A.MedianBlur(blur_limit=7, p=0.5),
                A.GaussianBlur(blur_limit=7, p=0.5),
            ], p=0.3),
            A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, border_mode=0, p=0.3)
        ], bbox_params=bboxes_params)

    elif set == 'validation':
        transform = A.Compose([
            A.HorizontalFlip(p=0.5),
            A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8), p=0.5),
        ], bbox_params=bboxes_params)

    return transform

In [13]:
with open(os.path.join(train_dataset_path, 'labels.json'), 'r') as f:
    train_json = json.load(f)

with open(os.path.join(validation_dataset_path, 'labels.json'), 'r') as f:
    val_json = json.load(f)

with open(os.path.join(test_dataset_path, 'labels.json'), 'r') as f:
    test_json = json.load(f)

n_images = max(train_json['images'][-1]['id'], val_json['images'][-1]['id'], test_json['images'][-1]['id'])
n_annotations = max(train_json['annotations'][-1]['id'], val_json['annotations'][-1]['id'], test_json['annotations'][-1]['id'])

In [14]:
def clip_bbox(bbox, image_width, image_height):
    x_min, y_min, width, height = bbox

    x_min = max(0, min(x_min, image_width - 1)) # TODO: check -1
    y_min = max(0, min(y_min, image_height - 1)) # TODO: check -1
    width = min(width, image_width - x_min)
    height = min(height, image_height - y_min)

    return [x_min, y_min, width, height]

In [15]:
def validate_bbox(bbox, image_width, image_height):
    x, y, w, h = bbox

    return 0 <= x < image_width and 0 <= y < image_height and x + w <= image_width and y + h <= image_height

In [16]:
def apply_augmentation(image_path, bboxes, class_labels, output_path, output_filename, transform):
    # Read the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_height, image_width = image.shape[:2]

    # Apply the augmentation
    try:
        transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels)
    except Exception as e:
        print(f"Error during transformation: {e}")
        return [], []

    # Save the augmented image
    augmented_image_path = os.path.join(output_path, output_filename)
    cv2.imwrite(augmented_image_path, cv2.cvtColor(transformed['image'], cv2.COLOR_RGB2BGR))

    return transformed['bboxes'], transformed['class_labels']

In [17]:
def augment_dataset(input_path, output_path, transform, n_images, n_annotations, num_augmentations=5):
    # Load the original COCO JSON file
    with open(os.path.join(input_path, 'labels.json'), 'r') as f:
        coco_data = json.load(f)

    new_images = []
    new_annotations = []

    # Copy original images and annotations
    for img in tqdm(coco_data['images'], desc="Copying original images"):

        src_path = os.path.join(input_path, 'images', img['file_name'])
        dst_path = os.path.join(output_path, 'images', img['file_name'])
        shutil.copy2(src_path, dst_path)

        new_images.append(img)
        img_anns = [ann for ann in coco_data['annotations'] if ann['image_id'] == img['id']]
        new_annotations.extend(img_anns)

    '''debug'''
    print("Before augmentation:")
    print(f"Number of images: {len(new_images)}")
    print(f"Number of annotations: {len(new_annotations)}")

    # Apply augmentations
    for img in tqdm(coco_data['images'], desc="Augmenting images"):
        image_path = os.path.join(input_path, 'images', img['file_name'])

        annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] == img['id']]

        image = cv2.imread(image_path)
        image_height, image_width = image.shape[:2]

        for i in range(num_augmentations):
            bboxes = [ann['bbox'] for ann in annotations]
            class_labels = [ann['category_id'] for ann in annotations]

            # TODO: should I call the function clip_bbox() regardless of the function validate_bbox()?
            for bbox in bboxes:
                if not validate_bbox(bbox, image_width, image_height):
                    bboxes = [clip_bbox(bbox, image_width, image_height) for bbox in bboxes]

            new_filename = f"{os.path.splitext(img['file_name'])[0]}_aug_{i}.jpg"

            new_bboxes, new_class_labels = apply_augmentation(
                image_path, bboxes, class_labels,
                os.path.join(output_path, 'images'), new_filename, transform
            )

            new_img_id = n_images + 1
            new_images.append({
                'id': new_img_id,
                'file_name': new_filename
            })

            n_images = n_images + 1

            for bbox, cat_id in zip(new_bboxes, new_class_labels):
                new_annotations.append({
                    'id': n_annotations + 1,
                    'image_id': new_img_id,
                    'category_id': cat_id,
                    'bbox': bbox
                })

                n_annotations = n_annotations + 1

    '''debug'''
    print("After augmentation:")
    print(f"Number of images: {len(new_images)}")
    print(f"Number of annotations: {len(new_annotations)}")

    # Create the new COCO JSON file
    new_coco_data = {
        'categories': coco_data['categories'],
        'images': new_images,
        'annotations': new_annotations
    }

    # Save the new COCO JSON file
    with open(os.path.join(output_path, 'labels.json'), 'w') as f:
        json.dump(new_coco_data, f, indent=4)

    return n_images, n_annotations

In [18]:
augmented_train_dataset_path = dest_base_path + 'train_augmented/'
#augmented_validation_dataset_path = dest_base_path + 'validation_augmented/'

os.makedirs(os.path.join(augmented_train_dataset_path, 'images'), exist_ok=True)
#os.makedirs(os.path.join(augmented_validation_dataset_path, 'images'), exist_ok=True)

if os.listdir(augmented_train_dataset_path + 'images/') == []:
    n_images, n_annotations = augment_dataset(train_dataset_path, augmented_train_dataset_path, get_transform('train'), n_images, n_annotations, num_augmentations=4)
else:
    print("Augmentation on the training set has already been made.")

Augmentation on the training set has already been made.


In [19]:
count1 = sum(1 for filename in os.listdir(os.path.join(augmented_train_dataset_path, 'images')) if any(filename.lower().endswith(ext) for ext in ['.jpg', '.jpeg']))
count2 = sum(1 for filename in os.listdir(os.path.join(validation_dataset_path, 'images')) if any(filename.lower().endswith(ext) for ext in ['.jpg', '.jpeg']))

print(f"Number of images in the train_augmented folder: {count1}") # TODO: make prettier
print(f"Number of images in the validation folder: {count2}") # TODO: make prettier

Number of images in the train_augmented folder: 3600
Number of images in the validation folder: 360


### Rewrite Datasets

In [None]:
# TODO: add if condition (if augmentation has been executed)

In [None]:
shutil.rmtree("/tmp/od_data/augmented_train") # TODO: do I need this instruction ?
shutil.rmtree("/tmp/od_data/augmented_validation") # TODO: do I need this instruction ?

FileNotFoundError: [Errno 2] No such file or directory: '/tmp/od_data/augmented_train'

In [20]:
train_data = object_detector.Dataset.from_coco_folder(augmented_train_dataset_path, cache_dir="/tmp/od_data/augmented_train")

print(f"{'New Training Dataset Size:':<25} {train_data.size:>6} elements")
print(f"{'New Validation Dataset Size:':<25} {validation_data.size:>4} images")

New Training Dataset Size:   3599 elements
New Validation Dataset Size:  360 images


## Retrain model

### Set retraining options

In [21]:
spec = object_detector.SupportedModels.MOBILENET_MULTI_AVG_I384

hparams = object_detector.HParams(
    learning_rate=0.01, # 0.015 (is it possible to implement a scheduler?)
    batch_size=128, # try 64, 256
    epochs=120,
    cosine_decay_epochs=120,
    cosine_decay_alpha=0.1,
    shuffle=True, # TODO: check
    export_dir='exported_model'
)

model_options = object_detector.ModelOptions(
    l2_weight_decay=1e-4  # or experiment with values like 5e-5, 1e-5, 1e-4
)

options = object_detector.ObjectDetectorOptions(
    supported_model=spec,
    hparams=hparams,
    model_options=model_options
)

### Run retraining

In [22]:
model = object_detector.ObjectDetector.create(
    train_data=train_data,
    validation_data=validation_data,
    options=options
)

  inputs = self._flatten_to_reference_inputs(inputs)


Downloading https://storage.googleapis.com/tf_model_garden/vision/qat/mobilenetv2_ssd_coco/mobilenetv3.5_ssd_i384_ckpt.tar.gz to /tmp/model_maker/object_detector/mobilenetmultiavg_i384
Model: "retina_net_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobile_net (MobileNet)      {'2': (None, 96, 96, 32   3704416   
                             ),                                  
                              '3': (None, 48, 48, 64             
                             ),                                  
                              '4': (None, 24, 24, 16             
                             0),                                 
                              '5': (None, 12, 12, 19             
                             2),                                 
                              '6': (None, 1, 1, 1280             
                             )}                                



Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 7

### Evaluate the model performance

In [23]:
import pickle

In [24]:
loss, coco_metrics = model.evaluate(validation_data, batch_size=32) # TODO: check batch_size
print(f"Validation loss: {loss}")
print(f"Validation coco metrics: {coco_metrics}")

with open(os.path.join(dest_base_path, 'model_evaluation_batch_size_32.pkl'), 'wb') as f:
    pickle.dump({'loss': loss, 'coco_metrics': coco_metrics}, f)

creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.48s).
Accumulating evaluation results...
DONE (t=0.46s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.573
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.869
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.637
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.205
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.576
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.626
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.659
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.659
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 

In [None]:
#@title Visualize metrics

# Assuming coco_metrics is a dictionary as shown in the output above

# 1. Graph of the main COCO metrics
plt.figure(figsize=(10, 6))
metrics = ['AP', 'AP50', 'AP75', 'APl', 'ARmax1', 'ARmax10', 'ARmax100']
values = [coco_metrics[m] for m in metrics]
plt.bar(metrics, values)
plt.title('COCO Metrics')
plt.ylabel('Score')
plt.ylim(0, 1)
for i, v in enumerate(values):
    plt.text(i, v, f'{v:.3f}', ha='center', va='bottom')
plt.show()

# 2. Loss distribution
plt.figure(figsize=(10, 6))
loss_types = ['total_loss', 'cls_loss', 'box_loss', 'model_loss']
plt.bar(loss_types, loss)
plt.title('Loss Distribution')
plt.ylabel('Loss Value')
for i, v in enumerate(loss):
    plt.text(i, v, f'{v:.4f}', ha='center', va='bottom')
plt.show()

# 3. AP comparison by object size
plt.figure(figsize=(10, 6))
ap_sizes = ['APl', 'APm', 'APs']
ap_values = [coco_metrics[size] for size in ap_sizes]
plt.bar(ap_sizes, ap_values)
plt.title('AP by Object Size')
plt.ylabel('Average Precision')
plt.ylim(-1, 1)
for i, v in enumerate(ap_values):
    plt.text(i, v, f'{v:.3f}', ha='center', va='bottom')
plt.show()

In [25]:
loss, coco_metrics = model.evaluate(validation_data, batch_size=64) # TODO: check batch_size
print(f"Validation loss: {loss}")
print(f"Validation coco metrics: {coco_metrics}")

with open(os.path.join(dest_base_path, 'model_evaluation_batch_size_64.pkl'), 'wb') as f:
    pickle.dump({'loss': loss, 'coco_metrics': coco_metrics}, f)

creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.48s).
Accumulating evaluation results...
DONE (t=0.45s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.573
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.869
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.637
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.205
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.577
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.626
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.659
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.659
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 

In [None]:
#@title Visualize metrics

# Assuming coco_metrics is a dictionary as shown in the output above

# 1. Graph of the main COCO metrics
plt.figure(figsize=(10, 6))
metrics = ['AP', 'AP50', 'AP75', 'APl', 'ARmax1', 'ARmax10', 'ARmax100']
values = [coco_metrics[m] for m in metrics]
plt.bar(metrics, values)
plt.title('COCO Metrics')
plt.ylabel('Score')
plt.ylim(0, 1)
for i, v in enumerate(values):
    plt.text(i, v, f'{v:.3f}', ha='center', va='bottom')
plt.show()

# 2. Loss distribution
plt.figure(figsize=(10, 6))
loss_types = ['total_loss', 'cls_loss', 'box_loss', 'model_loss']
plt.bar(loss_types, loss)
plt.title('Loss Distribution')
plt.ylabel('Loss Value')
for i, v in enumerate(loss):
    plt.text(i, v, f'{v:.4f}', ha='center', va='bottom')
plt.show()

# 3. AP comparison by object size
plt.figure(figsize=(10, 6))
ap_sizes = ['APl', 'APm', 'APs']
ap_values = [coco_metrics[size] for size in ap_sizes]
plt.bar(ap_sizes, ap_values)
plt.title('AP by Object Size')
plt.ylabel('Average Precision')
plt.ylim(-1, 1)
for i, v in enumerate(ap_values):
    plt.text(i, v, f'{v:.3f}', ha='center', va='bottom')
plt.show()

In [26]:
loss, coco_metrics = model.evaluate(validation_data, batch_size=128)
print(f"Validation loss: {loss}")
print(f"Validation coco metrics: {coco_metrics}")

with open(os.path.join(dest_base_path, 'model_evaluation_batch_size_128.pkl'), 'wb') as f:
    pickle.dump({'loss': loss, 'coco_metrics': coco_metrics}, f)

creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.48s).
Accumulating evaluation results...
DONE (t=0.45s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.573
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.869
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.637
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.205
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.577
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.626
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.659
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.659
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 

In [None]:
#@title Visualize metrics

# Assuming coco_metrics is a dictionary as shown in the output above

# 1. Graph of the main COCO metrics
plt.figure(figsize=(10, 6))
metrics = ['AP', 'AP50', 'AP75', 'APl', 'ARmax1', 'ARmax10', 'ARmax100']
values = [coco_metrics[m] for m in metrics]
plt.bar(metrics, values)
plt.title('COCO Metrics')
plt.ylabel('Score')
plt.ylim(0, 1)
for i, v in enumerate(values):
    plt.text(i, v, f'{v:.3f}', ha='center', va='bottom')
plt.show()

# 2. Loss distribution
plt.figure(figsize=(10, 6))
loss_types = ['total_loss', 'cls_loss', 'box_loss', 'model_loss']
plt.bar(loss_types, loss)
plt.title('Loss Distribution')
plt.ylabel('Loss Value')
for i, v in enumerate(loss):
    plt.text(i, v, f'{v:.4f}', ha='center', va='bottom')
plt.show()

# 3. AP comparison by object size
plt.figure(figsize=(10, 6))
ap_sizes = ['APl', 'APm', 'APs']
ap_values = [coco_metrics[size] for size in ap_sizes]
plt.bar(ap_sizes, ap_values)
plt.title('AP by Object Size')
plt.ylabel('Average Precision')
plt.ylim(-1, 1)
for i, v in enumerate(ap_values):
    plt.text(i, v, f'{v:.3f}', ha='center', va='bottom')
plt.show()

## Export model

In [None]:
# TODO: do I need to remove the existing model first?

In [27]:
model.export_model()
!ls exported_model
files.download('exported_model/model.tflite')

Exporting a floating point model


  inputs = self._flatten_to_reference_inputs(inputs)


float_ckpt.data-00000-of-00001	float_ckpt.index  metadata.json  model.tflite


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Model quantization

### Quantization aware training (int8 quantization)

### Post-training quantization (fp16 quantization)

In [28]:
from mediapipe_model_maker import quantization

In [29]:
quantization_config = quantization.QuantizationConfig.for_float16()

In [30]:
model.restore_float_ckpt()
model.export_model(model_name="model_fp16.tflite", quantization_config=quantization_config)
!ls -lh exported_model
files.download('exported_model/model_fp16.tflite')

  inputs = self._flatten_to_reference_inputs(inputs)


Using existing files at /tmp/model_maker/object_detector/mobilenetmultiavg_i384
Model: "retina_net_model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobile_net_1 (MobileNet)    {'2': (None, 96, 96, 32   3704416   
                             ),                                  
                              '3': (None, 48, 48, 64             
                             ),                                  
                              '4': (None, 24, 24, 16             
                             0),                                 
                              '5': (None, 12, 12, 19             
                             2),                                 
                              '6': (None, 1, 1, 1280             
                             )}                                  
                                                                 
 fpn_1 (FPN)                 {'5':

  inputs = self._flatten_to_reference_inputs(inputs)


total 40M
-rw-r--r-- 1 root root  16M Aug 27 12:01 float_ckpt.data-00000-of-00001
-rw-r--r-- 1 root root  32K Aug 27 12:01 float_ckpt.index
-rw-r--r-- 1 root root 5.4M Aug 27 12:22 metadata.json
-rw-r--r-- 1 root root 6.7M Aug 27 12:22 model_fp16.tflite
-rw-r--r-- 1 root root  13M Aug 27 12:11 model.tflite


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>