Docs: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/running_on_mobile_tf2.md

Load Depenencies

In [49]:
import tensorflow as tf
import pathlib
import numpy as np
from object_detection.utils import label_map_util
from tflite_support.metadata_writers import object_detector
from tflite_support.metadata_writers import writer_utils
from tflite_support import metadata
import cv2
from tflite_support import metadata as _metadata
from tflite_support import metadata_schema_py_generated as _metadata_fb
from tensorflow_lite_support.metadata.python.metadata_writers import metadata_info
import json

#Define Locations
_PATH_TO_SAVED_MODEL_DIR = '/home/developer/Documents/git/Tensorflow/workspace/exported_model/tf2/BA/MED3_ssd_mobilenet_v2_fpn_640x640_postprocess/saved_model'
_tflite_models_dir = pathlib.Path(_PATH_TO_SAVED_MODEL_DIR)

_PATH_TO_DATASET = '/home/developer/Documents/git/Tensorflow/workspace/datasets/'
_dataset_dir = pathlib.Path(_PATH_TO_DATASET)

_ODT_LABEL_MAP_PATH = _dataset_dir/'dataset/MED3-REV1.00/MED3-REV1.00-detect_label_map.pptxt'
_ODT_RECORD_FILE = _dataset_dir/'train.record'
_TFLITE_LABEL_PATH = _tflite_models_dir/'tflite_label_map.txt'
_TFLITE_MODEL_WITH_METADATA_PATH = _tflite_models_dir/'model_quant_metadata.tflite'
_TFLITE_MODEL_PATH =  _tflite_models_dir/'model_quant.tflite'

## Define representative_dataset for weight and activation calibration

Doc at: https://www.tensorflow.org/lite/performance/post_training_quantization

The representative_dataset is a generator, that is used by TFLiteConverter.

It uses the encoded images from a tfrecord file and provides a preprocessed input image for the calibration process

In [2]:
train_dataset = tf.data.TFRecordDataset(_ODT_RECORD_FILE)

def decode_img(encoded_img):
    image_np = tf.image.decode_jpeg(encoded_img, channels=3).numpy()
    return image_np

def preprocess_image(encoded_image ):
    image = decode_img(encoded_image)
    image = cv2.resize(image, (640,640))
    image = (image.astype(np.float32) -127.5)/127.5
    image = np.expand_dims(image, axis=0)
    return image

def representative_dataset():
    """
    yield/return a input tensor for the model, we use the images from tfrecord
    """
    for data in train_dataset.shuffle(1000):
        example = tf.train.Example()
        example.ParseFromString(data.numpy())
        encoded_image = example.features.feature['image/encoded'].bytes_list.value[0]
        image_np = preprocess_image(encoded_image)
        yield [image_np]


2023-12-21 10:36:11.783083: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/developer/Documents/git/Tensorflow/python-env/lib/python3.9/site-packages/cv2/../../lib64:
2023-12-21 10:36:11.783168: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2023-12-21 10:36:11.783246: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fs-development-machine): /proc/driver/nvidia/version does not exist
2023-12-21 10:36:11.785076: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compi

## Quantization
Here are examples for different Post-Train-Quantization options.
### Dynamic range quantization
Dynamic range quantization is a recommended starting point because it provides reduced memory usage and faster computation without you having to provide a representative dataset for calibration. This type of quantization, statically quantizes only the weights from floating point to integer at conversion time, which provides 8-bits of precision:

In [3]:
converter = tf.lite.TFLiteConverter.from_saved_model(_PATH_TO_SAVED_MODEL_DIR)
#converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()

2023-12-21 10:36:32.979271: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-12-21 10:36:32.979340: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-12-21 10:36:32.980842: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /home/developer/Documents/git/Tensorflow/workspace/exported_model/tf2/BA/MED3_ssd_resnet_v1_fpn_640x640_3k0/tflite/saved_model
2023-12-21 10:36:33.097934: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-12-21 10:36:33.098007: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /home/developer/Documents/git/Tensorflow/workspace/exported_model/tf2/BA/MED3_ssd_resnet_v1_fpn_640x640_3k0/tflite/saved_model
2023-12-21 10:36:33.367387: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2023-12-21 10:36:33.427423: I t

Estimated count of arithmetic ops: 164.927 G  ops, equivalently 82.464 G  MACs


To further reduce latency during inference, "dynamic-range" operators dynamically quantize activations based on their range to 8-bits and perform computations with 8-bit weights and activations. This optimization provides latencies close to fully fixed-point inferences. However, the outputs are still stored using floating point so the increased speed of dynamic-range ops is less than a full fixed-point computation.

### Full integer quantization

You can get further latency improvements, reductions in peak memory usage, and compatibility with integer only hardware devices or accelerators by making sure all model math is integer quantized.

For full integer quantization, you need to calibrate or estimate the range, i.e, (min, max) of all floating-point tensors in the model. Unlike constant tensors such as weights and biases, variable tensors such as model input, activations (outputs of intermediate layers) and model output cannot be calibrated unless we run a few inference cycles. As a result, the converter requires a representative dataset to calibrate them. This dataset can be a small subset (around ~100-500 samples) of the training or validation data. Refer to the representative_dataset() function above.

#### Integer with float fallback (using default float input/output)
In order to fully integer quantize a model, but use float operators when they don't have an integer implementation (to ensure conversion occurs smoothly), use the following steps:

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(_PATH_TO_SAVED_MODEL_DIR)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()

Note: This tflite_quant_model won't be compatible with integer only devices (such as 8-bit microcontrollers) and accelerators (such as the Coral Edge TPU) because the input and output still remain float in order to have the same interface as the original float only model.

#### Integer only

Creating integer only models is a common use case for TensorFlow Lite for Microcontrollers and Coral Edge TPUs.
Additionally, to ensure compatibility with integer only devices (such as 8-bit microcontrollers) and accelerators (such as the Coral Edge TPU), you can enforce full integer quantization for all ops including the input and output, by using the following steps:


In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(_PATH_TO_SAVED_MODEL_DIR)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
#converter.inference_input_type = tf.uint8  # or tf.int8
#converter.inference_output_type = tf.uint8  # or tf.int8
tflite_quant_model = converter.convert()

#### Integer only: 16-bit activations with 8-bit weights (experimental)

This is an experimental quantization scheme. It is similar to the "integer only" scheme, but activations are quantized based on their range to 16-bits, weights are quantized in 8-bit integer and bias is quantized into 64-bit integer. This is referred to as 16x8 quantization further.

The main advantage of this quantization is that it can improve accuracy significantly, but only slightly increase model size.

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(_PATH_TO_SAVED_MODEL_DIR)
converter.representative_dataset = representative_dataset
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8,
                                       tf.lite.OpsSet.TFLITE_BUILTINS]

tflite_quant_model = converter.convert()

The disadvantage of this quantization is:

    Currently inference is noticeably slower than 8-bit full integer due to the lack of optimized kernel implementation.
    Currently it is incompatible with the existing hardware accelerated TFLite delegates, but it seems to work with ethos-u


##### Show Input and Output types of Model

In [4]:
interpreter = tf.lite.Interpreter(model_content=tflite_quant_model)
input_type = interpreter.get_input_details()[0]['dtype']
print('input 0 =', input_type)
for i in range (0,4):
    output_type = interpreter.get_output_details()[i]['dtype']
    print('output',i , '=', output_type)

input 0 = <class 'numpy.float32'>
output 0 = <class 'numpy.float32'>
output 1 = <class 'numpy.float32'>
output 2 = <class 'numpy.float32'>
output 3 = <class 'numpy.float32'>


##### Save Model

In [5]:
tflite_model_quant_file = _TFLITE_MODEL_PATH
tflite_model_quant_file.write_bytes(tflite_quant_model)

204400632

##### Include META_DATA into tflite model

In [7]:
#Create Labe Map
category_index = label_map_util.create_category_index_from_labelmap(_ODT_LABEL_MAP_PATH)
f = open(_TFLITE_LABEL_PATH, 'w')

#Range are all  label ids in label map
for class_id in range(1, 32+1):
  if class_id not in category_index:
    f.write('???\n')
    continue
  
  name = category_index[class_id]['name']
  f.write(name+'\n')
  
f.close()

In [15]:
_MODEL_NAME = "ObjectDetector"
_MODEL_DESCRIPTION = (
    "Identify which of a known set of objects might be present and provide "
    "information about their positions within the given image or a video "
    "stream.")
_INPUT_NAME = "image"
_INPUT_DESCRIPTION = "Input image to be detected."

_OUTPUT_CATRGORY_NAME = "category"
_OUTPUT_CATEGORY_DESCRIPTION = "The categories of the detected boxes."
_OUTPUT_SCORE_NAME = "score"
_OUTPUT_SCORE_DESCRIPTION = "The scores of the detected boxes."

ResNet V1

In [36]:
#create general info
general_md = metadata_info.GeneralMd(
    name="ResNet-V1 " + _MODEL_NAME,
    version="V1",
    description=_MODEL_DESCRIPTION,
    author="F&S Elektronik Systeme GmbH"
)

# Creates input info
input_md = metadata_info.InputImageTensorMd(
    name=_INPUT_NAME,
    description="VGG",
    norm_mean=(127.5,),
    norm_std=(127.5,),
    color_space_type=_metadata_fb.ColorSpaceType.RGB,
    tensor_type=writer_utils.get_input_tensor_types(writer_utils.load_file(_TFLITE_MODEL_PATH))[0])


MobileNet V2

In [35]:
#create general info
general_md = metadata_info.GeneralMd(
    name="MobileNet-V1 "+ _MODEL_NAME,
    version="V1",
    description=_MODEL_DESCRIPTION,
    author="F&S Elektronik Systeme GmbH"
)

# Creates input info
input_md = metadata_info.InputImageTensorMd(
    name=_INPUT_NAME,
    description="normalized",
    norm_mean=(0,),
    norm_std=(1,),
    color_space_type=_metadata_fb.ColorSpaceType.RGB ,
    tensor_type=writer_utils.get_input_tensor_types(writer_utils.load_file(_TFLITE_MODEL_PATH))[0])

input_md.color_space_type = 1

In [23]:
# Creates output info.
output_category_md = metadata_info.CategoryTensorMd(
    name=_OUTPUT_CATRGORY_NAME,
    description=_OUTPUT_CATEGORY_DESCRIPTION,
    label_files=[
        metadata_info.LabelFileMd(file_path=file_path)
        for file_path in [_TFLITE_LABEL_PATH]
    ])

output_score_md = metadata_info.ClassificationTensorMd(
    name=_OUTPUT_SCORE_NAME,
    description=_OUTPUT_SCORE_DESCRIPTION,
    score_calibration_md=None
)

In [37]:
#Include Metadata
writer = object_detector.MetadataWriter.create_from_metadata_info(
    writer_utils.load_file(_TFLITE_MODEL_PATH),
    general_md=general_md, input_md=input_md,
    output_category_md=output_category_md,
    output_score_md=output_score_md
)
writer_utils.save_file(writer.populate(), _TFLITE_MODEL_WITH_METADATA_PATH)


In [50]:
displayer = metadata.MetadataDisplayer.with_model_file(_TFLITE_MODEL_WITH_METADATA_PATH)
print("Metadata populated:")
print(displayer.get_metadata_json())
print("=============================")
print("Associated file(s) populated:")
print(displayer.get_packed_associated_file_list())
data = json.loads(displayer.get_metadata_json())
print(data["subgraph_metadata"][0]["input_tensor_metadata"][0]["description"])


Metadata populated:
{
  "name": "ResNet-V1 ObjectDetector",
  "description": "Identify which of a known set of objects might be present and provide information about their positions within the given image or a video stream.",
  "version": "V1",
  "subgraph_metadata": [
    {
      "input_tensor_metadata": [
        {
          "name": "image",
          "description": "VGG",
          "content": {
            "content_properties_type": "ImageProperties",
            "content_properties": {
              "color_space": "RGB"
            }
          },
          "process_units": [
            {
              "options_type": "NormalizationOptions",
              "options": {
                "mean": [
                  127.5
                ],
                "std": [
                  127.5
                ]
              }
            }
          ],
          "stats": {
            "max": [
              1.0
            ],
            "min": [
              -1.0
            ]
          }

In [None]:
interpreter = tf.lite.Interpreter(str(_TFLITE_MODEL_WITH_METADATA_PATH))
input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)

print('input_details = \n', interpreter.get_input_details())
print('output_details = \n', interpreter.get_output_details())