In [8]:
import tensorflow as tf
import os
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
SAVED_MODEL_DIR = "person_detection_saved_model"

In [9]:
model_fitted = tf.keras.models.load_model(
    'model.h5', custom_objects=None, compile=False,
    options=None
)

In [10]:
def convert_into_flite(input_dir, output_name):
    # Convert the model
    converter = tf.lite.TFLiteConverter.from_saved_model(input_dir) # path to the SavedModel directory
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS  # enable ONLY TensorFlow Lite ops.
    ]
    tflite_model = converter.convert()

    # Save the model.
    with open(output_name, 'wb') as f:
        f.write(tflite_model)

### Convert into Flite, no quantization

In [11]:
convert_into_flite(SAVED_MODEL_DIR, "person_detection.tflite")

2023-03-18 20:54:24.901099: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-03-18 20:54:24.901127: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-03-18 20:54:24.901258: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: person_detection_saved_model
2023-03-18 20:54:24.905597: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-03-18 20:54:24.905622: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: person_detection_saved_model
2023-03-18 20:54:24.917329: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2023-03-18 20:54:24.984662: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: person_detection_saved_model
2023-03-18 20:54:25.003889: I tensorflow/cc/saved_model/loader.cc:305] SavedModel load for tags { serve 

In [12]:
from PIL import Image
import numpy as np

SIZE = (256, 256)
DATA_ROOT_FOLDER = "/media/edge7/TOSHIBA EXT/"  # Modify this as needed, you should have this variable set in get_dataset.ipynb, use the same
DATA_IS_IN = os.path.join(DATA_ROOT_FOLDER, 'data')
PERSON_FILES = os.path.join(DATA_IS_IN, 'person')
NO_PERSON_FILES = os.path.join(DATA_IS_IN, 'notperson')
all_files_train_p = [os.path.join( DATA_IS_IN, 'person', x) for x in os.listdir(PERSON_FILES)]
all_files_train_np = [os.path.join( DATA_IS_IN, 'notperson', x) for x in os.listdir(NO_PERSON_FILES)]
all_files = all_files_train_p + all_files_train_np

def load_grayscale_images(limit=-1):
    counter = 0
    for path in all_files:
        if counter > limit != -1:
            break
        counter +=1
        image = Image.open(path)
        image = np.array(image.resize(SIZE, resample=Image.BILINEAR))
        if image.ndim == 3 and image.shape[2] == 4:
            image = image[..., :3]  # Remove the alpha channel

        if image.ndim == 3:
            image = np.dot(image[...,:3], [0.2989, 0.5870, 0.1140]) # To gray scale
        image = image / 255.0
        image = tf.expand_dims(image, axis=-1)  # Add channel dimension
        image = tf.expand_dims(image, axis=0)
        image = tf.image.convert_image_dtype(image, tf.float32)
        yield [image]

In [13]:

def convert_tf_lite_integer_only(input_dir, output_name):
    # Create TFLiteConverter object and set optimizations
    converter = tf.lite.TFLiteConverter.from_saved_model(input_dir)
    # Set representative dataset
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = load_grayscale_images
    tflite_quant_model = converter.convert()
    # Save the model.
    with open(output_name, 'wb') as f:
        f.write(tflite_quant_model)

In [14]:
convert_tf_lite_integer_only(SAVED_MODEL_DIR, "person_detection_quantized.tflite")

2023-03-18 20:54:25.726814: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-03-18 20:54:25.726837: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-03-18 20:54:25.726952: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: person_detection_saved_model
2023-03-18 20:54:25.731514: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-03-18 20:54:25.731533: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: person_detection_saved_model
2023-03-18 20:54:25.744228: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2023-03-18 20:54:25.805746: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: person_detection_saved_model
2023-03-18 20:54:25.824683: I tensorflow/cc/saved_model/loader.cc:305] SavedModel load for tags { serve 

### Show difference, in tensor details, between quantized and not quantized model

In [15]:
interpreter_quant = tf.lite.Interpreter(model_path="person_detection_quantized.tflite")
# Get input and output tensors.
input_details = interpreter_quant.get_input_details()
output_details = interpreter_quant.get_output_details()
print(input_details, output_details)
print("tensor details")
print(interpreter_quant.get_tensor_details())
for op in interpreter_quant.get_tensor_details():
    print(op['name'])

[{'name': 'serving_default_input_2:0', 'index': 0, 'shape': array([  1, 256, 256,   1], dtype=int32), 'shape_signature': array([ -1, 256, 256,   1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}] [{'name': 'StatefulPartitionedCall:0', 'index': 41, 'shape': array([1, 1], dtype=int32), 'shape_signature': array([-1,  1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
tensor details
[{'name': 'serving_default_input_2:0', 'index': 0, 'shape': array([  1, 256, 256,   1], dtype=int32), 'shape_signature': array([ -1, 256, 256,   1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters'

In [16]:
interpreter = tf.lite.Interpreter(model_path="person_detection.tflite")
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details, output_details)
print("tensor details")
print(interpreter.get_tensor_details())

[{'name': 'serving_default_input_2:0', 'index': 0, 'shape': array([  1, 256, 256,   1], dtype=int32), 'shape_signature': array([ -1, 256, 256,   1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}] [{'name': 'StatefulPartitionedCall:0', 'index': 39, 'shape': array([1, 1], dtype=int32), 'shape_signature': array([-1,  1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
tensor details
[{'name': 'serving_default_input_2:0', 'index': 0, 'shape': array([  1, 256, 256,   1], dtype=int32), 'shape_signature': array([ -1, 256, 256,   1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters'

### Check inference difference between TFLITE, TFLITE quantized and original TF model

In [17]:
def get_prediction_from_tflite(interpreter, input):
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    interpreter.set_tensor(input_details[0]['index'], input)
    output_details = interpreter.get_output_details()
    interpreter.invoke()
    return interpreter.get_tensor(output_details[0]['index'])[0]


In [18]:
for image in load_grayscale_images(limit=5):
    image = image[0]
    prediction_from_tflite = get_prediction_from_tflite(interpreter, image)
    prediction_from_tflite_quant = get_prediction_from_tflite(interpreter_quant, image)
    prediction_from_tf = model_fitted.predict(image)[0]
    print(prediction_from_tflite, prediction_from_tflite_quant, prediction_from_tf)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


[0.7237123] [0.7109375] [0.72371197]
[0.9737151] [0.96875] [0.9737151]
[0.9739039] [0.97265625] [0.9739038]
[0.9744072] [0.96875] [0.97440726]
[0.9995046] [0.99609375] [0.9995046]
[0.92357093] [0.890625] [0.92357093]


In [19]:
### Transform it into char array

In [21]:
! xxd -i person_detection_quantized.tflite > esp32_model.cc
