# From Keras to TensorflowLite

There are three types of post-training quantiziation for TensorflowLite models.

1. __Float16 quantization__: reduce model size by up to half (since all weights are now half the original size) with minimal loss in accuracy. Can speed up processing with GPUs.
2. __Weight quantization__: quantizes *only the weights* from floating point to 8-bits integers, reducing the model size up to 4x and speeding up inference. During inference some operations will be executed with integer kernel, others with float kernel (*hybrid operators*).
3. __Integer quantization__: all model values (weights and activations) are quantized to 8-bit integers. This results in a 4x reduction in model size and a 3 to 4x performance improvement on CPU performance. It needs a rapresentative part of the dataset to qunatize activations. If all the operations are supported it results in a __full integer quantization__, compatible with some hardware accelartors (e.g. Coral). Otherways the incompatible operations fall back in float32.

In [1]:
import tensorflow as tf
import numpy as np
import os
import pathlib

In [2]:
# set the folder path where is located the model 
DIR = './bin'
name_model = 'model.h5'

model_fp = os.path.join(DIR, name_model)

## Conversion without quantization

### Load the model

In [None]:
tf.keras.load_model()

### Convert the model and save it

In [None]:
# import the converter loading the model
converter = tf.lite.TFLiteConverter.from_keras_model(model_fp)

tflite_model = converter.convert()

tflite_model_file = os.path.join(DIR,"model.tflite")
pathlib.Path(tflite_model_file).write_bytes(tflite_model)

## Float16 quantization

### Load the model

In [None]:
tf.keras.load_model()

### Create the converter object

In [None]:
# import the converter loading the model
converter = tf.lite.TFLiteConverter.from_keras_model(model_fp)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

### Convert the model and save it

In [None]:
tflite_fp16_model = converter.convert()

tflite_model_fp16_file = os.path.join(DIR,"model_fp16.tflite")
pathlib.Path(tflite_model_fp16_file).write_bytes(tflite_fp16_model)

## Weight quantization

### Load the model

In [None]:
tf.keras.load_model()

### Create the converter object

In [None]:
# import the converter loading the model
converter = tf.lite.TFLiteConverter.from_keras_model(model_fp)

converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]

### Convert the model and save it

In [None]:
tflite_8bit_model = converter.convert()

tflite_model_8bit_file = os.path.join(DIR,"model_8bit.tflite")
pathlib.Path(tflite_model_8bit_file).write_bytes(tflite_8bit_model)

## Integer quantization

In [73]:
input_size = (150,150)  # input size of the model
data_range = 255  # range to normalize data
n_data = 100      # number of representative inputs

DATASET_DIR = './dataset' #the directory must contain at least n_data images

dataset_fp = pathlib.Path(DATASET_DIR)

### Import the dataset

In [78]:
list_ds = tf.data.Dataset.list_files(str(dataset_fp/"*"))

def representative_dataset_gen():
    for _ in range(n_data):
        for img_f in list_ds.take(1):
            img = tf.io.decode_image(tf.io.read_file(img_f), channels=3, dtype=tf.dtypes.uint8)
            img = tf.image.resize(img, input_size, method=tf.image.ResizeMethod.AREA)
            yield ([img[None]])

### Load the model

In [None]:
tf.keras.load_model()

### Create the converter object

In [None]:
# import the converter loading the model
converter = tf.lite.TFLiteConverter.from_keras_model_file(os.path.join(DIR, name_model))

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

### Convert the model and save it

In [None]:
tflite_integer_model = converter.convert()

tflite_model_integer_file = os.path.join(DIR,"model_integer.tflite")
pathlib.Path(tflite_model_integer_file).write_bytes(tflite_integer_model)

## Full integer quantization

In [73]:
input_size = (150,150)  # input size of the model
data_range = 255  # range to normalize data
n_data = 100      # number of representative inputs

DATASET_DIR = './dataset' #the directory must contain at least n_data images

dataset_fp = pathlib.Path(DATASET_DIR)

### Import the dataset

In [78]:
list_ds = tf.data.Dataset.list_files(str(dataset_fp/"*"))

def representative_dataset_gen():
    for _ in range(n_data):
        for img_f in list_ds.take(1):
            img = tf.io.decode_image(tf.io.read_file(img_f), channels=3, dtype=tf.dtypes.uint8)
            img = tf.image.resize(img, input_size, method=tf.image.ResizeMethod.AREA)
            yield ([img[None]])

### Load the model

In [None]:
tf.keras.load_model()

### Create the converter object

In [None]:
# import the converter loading the model
converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(os.path.join(DIR, name_model)) #TF2.0 currently not compatible

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

### Convert the model and save it

In [None]:
tflite_integer_model = converter.convert()

tflite_model_integer_file = os.path.join(DIR,"model_integer.tflite")
pathlib.Path(tflite_model_integer_file).write_bytes(tflite_integer_model)