# From Keras to TensorflowLite

There are four types of post-training conversion for TensorflowLite models:

1. __No quantization__: the model is converted with some optimization operation (e.g. pruning of training-related nodes), weights and activations are stored as float32 numbers.
2. __Float16 quantization__: reduces model size by up to half (since all weights are now half the original size) with minimal loss in accuracy. Model still executes as float32 operations. Can speed up processing with GPUs.
3. __Weight quantization__: quantizes *only the weights* from floating point to 8-bits integers, reducing the model size up to 4x and speeding up inference. During inference some operations will be executed with integer kernel, others with float kernel (*hybrid operators*).
4. __Integer quantization__: all model values (weights and activations) are quantized to 8-bit integers. This results in a 4x reduction in model size and a 3 to 4x performance improvement on CPU performance. It needs a representative part of the dataset to quantize activations. If all the operations are supported it results in a __full integer quantization__, compatible with some hardware accelartors (e.g. Coral). Otherways the incompatible operations fall back in float32.

In [1]:
import os
import tensorflow as tf
import numpy as np
import pathlib

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
print(gpus[0])

PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [3]:
# set the folder path where is located the model (saved in .h5)
DIR = './models'
model_name = 'model'

model_fp = os.path.join(DIR, model_name + '.h5')

## Conversion without quantization

### Load the model

In [4]:
model = tf.keras.models.load_model(model_fp)
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 100, 100, 16)      448       
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 50, 50, 16)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 50, 50, 16)        2320      
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 25, 25, 16)        0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 25, 25, 8)         1160      
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 12, 12, 8)         0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 1152)            

### Convert the model and save it

In [5]:
# import the converter loading the model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

tflite_model = converter.convert()

tflite_model_file = os.path.join(DIR,model_name) + ".tflite"
pathlib.Path(tflite_model_file).write_bytes(tflite_model)
print(f"Model {tflite_model_file} saved.")

Model ./models/model.tflite saved.


## Float16 quantization

### Load the model

In [6]:
model = tf.keras.models.load_model(model_fp)
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 100, 100, 16)      448       
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 50, 50, 16)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 50, 50, 16)        2320      
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 25, 25, 16)        0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 25, 25, 8)         1160      
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 12, 12, 8)         0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 1152)            

### Create the converter object

In [7]:
# import the converter loading the model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

### Convert the model and save it

In [8]:
tflite_fp16_model = converter.convert()

tflite_model_fp16_file = os.path.join(DIR,model_name) + "_fp16.tflite"
pathlib.Path(tflite_model_fp16_file).write_bytes(tflite_fp16_model)
print(f"Model {tflite_model_fp16_file} saved.")

Model ./models/model_fp16.tflite saved.


## Weight quantization

### Load the model

In [9]:
model = tf.keras.models.load_model(model_fp)
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 100, 100, 16)      448       
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 50, 50, 16)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 50, 50, 16)        2320      
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 25, 25, 16)        0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 25, 25, 8)         1160      
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 12, 12, 8)         0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 1152)            

### Create the converter object

In [10]:
# import the converter loading the model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]

### Convert the model and save it

In [11]:
tflite_8bit_model = converter.convert()

tflite_model_8bit_file = os.path.join(DIR,model_name) + "_8bit.tflite"
pathlib.Path(tflite_model_8bit_file).write_bytes(tflite_8bit_model)
print(f"Model {tflite_model_8bit_file} saved.")

Model ./models/model_8bit.tflite saved.


## Integer quantization

We need a **representative dataset** to perform activations quantization.

In [12]:
input_size = (100,100)  # input size of the model
data_range = 255  # range to normalize data
n_data = 100      # number of representative inputs

DATASET_DIR = './dataset' #the directory must contain at least n_data images

dataset_fp = pathlib.Path(DATASET_DIR)

### Import the dataset

In [13]:
list_ds = tf.data.Dataset.list_files(str(dataset_fp/"*"))

def representative_dataset_gen():
    for _ in range(n_data):
        for img_f in list_ds.take(1):
            img = tf.io.decode_image(tf.io.read_file(img_f), channels=3, dtype=tf.dtypes.uint8)
            img = tf.image.resize(img, input_size, method=tf.image.ResizeMethod.AREA)
            yield ([img[None]])

### Load the model

In [14]:
model = tf.keras.models.load_model(model_fp)
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 100, 100, 16)      448       
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 50, 50, 16)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 50, 50, 16)        2320      
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 25, 25, 16)        0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 25, 25, 8)         1160      
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 12, 12, 8)         0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 1152)            

### Create the converter object

In [15]:
# import the converter loading the model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

### Convert the model and save it

In [16]:
tflite_integer_model = converter.convert()

tflite_model_integer_file = os.path.join(DIR,model_name) + "_integer.tflite"
pathlib.Path(tflite_model_integer_file).write_bytes(tflite_integer_model)
print(f"Model {tflite_model_integer_file} saved.")

Model ./models/model_integer.tflite saved.


## Full integer quantization

We need a **representative dataset** to perform activations quantization. All the operations must be supported.

In [17]:
input_size = (100,100)  # input size of the model
data_range = 255  # range to normalize data
n_data = 100      # number of representative inputs

DATASET_DIR = './dataset' #the directory must contain at least n_data images

dataset_fp = pathlib.Path(DATASET_DIR)

### Import the dataset

In [18]:
list_ds = tf.data.Dataset.list_files(str(dataset_fp/"*"))

def representative_dataset_gen():
    for _ in range(n_data):
        for img_f in list_ds.take(1):
            img = tf.io.decode_image(tf.io.read_file(img_f), channels=3, dtype=tf.dtypes.uint8)
            img = tf.image.resize(img, input_size, method=tf.image.ResizeMethod.AREA)
            yield ([img[None]])

### Create the converter object

In [19]:
# import the converter loading the model
converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(model_fp) #TF2.0 currently not compatible

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8



### Convert the model and save it

In [20]:
tflite_full_integer_model = converter.convert()

tflite_model_full_integer_file = os.path.join(DIR,model_name) + "_full_integer.tflite"
pathlib.Path(tflite_model_full_integer_file).write_bytes(tflite_full_integer_model)
print(f"Model {tflite_model_full_integer_file} saved.")

Model ./models/model_full_integer.tflite saved.
