In [17]:
import logging
logging.getLogger("tensorflow").setLevel(logging.DEBUG)

import tensorflow as tf
import numpy as np
import pathlib
assert float(tf.__version__[:3]) >= 2.3

## Generate a Tensorflow Model

In [2]:
mnist = tf.keras.datasets.mnist

In [61]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the input image so that each pixel value is between 0 to 1
train_images = train_images.astype(np.float32) / 255.0
test_images = test_images.astype(np.float32) / 255.0

# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(28,28)),
    tf.keras.layers.Reshape(target_shape=(28,28, 1)),
    tf.keras.layers.Conv2D(filters=12, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10)
])

# Train the digit classification model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              metrics=['accuracy'])
model.fit(
    train_images,
    train_labels,
    epochs=5,
    validation_data=(test_images, test_labels)
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fa736229c10>

## Convert to a TensorFlow Lite model

In [62]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmpbn09k4ew/assets


INFO:tensorflow:Assets written to: /tmp/tmpbn09k4ew/assets
2021-12-09 18:10:05.160276: I tensorflow/core/grappler/devices.cc:78] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-12-09 18:10:05.161050: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2021-12-09 18:10:05.163751: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:928] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.006ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2021-12-09 18:10:05.210384: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:316] Ignored output_format.
2021-12-09 18:10:05.210453: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:319] Ignored drop_control_dependency.


## Quantization

### Convert using float fallback quantization

In [63]:
# Only quantizes weights by default
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_quant_ff_weights = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmpvbsqruw3/assets


INFO:tensorflow:Assets written to: /tmp/tmpvbsqruw3/assets
2021-12-09 18:10:57.410095: I tensorflow/core/grappler/devices.cc:78] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-12-09 18:10:57.410759: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2021-12-09 18:10:57.413181: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:928] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.004ms.
  function_optimizer: function_optimizer did nothing. time = 0ms.

2021-12-09 18:10:57.454372: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:316] Ignored output_format.
2021-12-09 18:10:57.454437: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:319] Ignored drop_control_dependency.
2021-12-09 18:10:57.474041: I tensorflow/lite/tools/optimize/quantize_weights.cc:222] S

In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 26, 26, 12)        120       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 12)        0         
_________________________________________________________________
flatten (Flatten)            (None, 2028)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                20290     
Total params: 20,410
Trainable params: 20,410
Non-trainable params: 0
_________________________________________________________________


### Convert using float fallback quantization

In [64]:
# If ops does not support quantized operations, fallback to float
# Quantize weights and variable data too
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(train_images).batch(1).take(100):
        yield [input_value]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
tflite_model_quant_ff_all = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmpe7dmo1lu/assets


INFO:tensorflow:Assets written to: /tmp/tmpe7dmo1lu/assets
2021-12-09 18:11:18.109287: I tensorflow/core/grappler/devices.cc:78] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-12-09 18:11:18.109698: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2021-12-09 18:11:18.112145: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:928] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.007ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2021-12-09 18:11:18.153403: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:316] Ignored output_format.
2021-12-09 18:11:18.153471: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:319] Ignored drop_control_dependency.


In [65]:
interpreter = tf.lite.Interpreter(model_content=tflite_model_quant_ff_weights)
input_type = interpreter.get_input_details()[0]['dtype']
print(f'input: {input_type}')
output_type = interpreter.get_output_details()[0]['dtype']
print(f'output: {output_type}')

input: <class 'numpy.float32'>
output: <class 'numpy.float32'>


### Convert using integer-only quantization

In [66]:
# Had to downgrade python-flatbuffers for this to run
# !conda list | grep flat
# Quantize weights and variable data too but using uint8
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(train_images).batch(1).take(100):
        yield [input_value]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint8 (APIs added in r2.3)
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model_quant_uint8 = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmpnxm5jkez/assets


INFO:tensorflow:Assets written to: /tmp/tmpnxm5jkez/assets
2021-12-09 18:11:43.352401: I tensorflow/core/grappler/devices.cc:78] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-12-09 18:11:43.353033: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2021-12-09 18:11:43.355383: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:928] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.003ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2021-12-09 18:11:43.398952: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:316] Ignored output_format.
2021-12-09 18:11:43.399021: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:319] Ignored drop_control_dependency.


In [67]:
interpreter = tf.lite.Interpreter(model_content=tflite_model_quant_uint8)
input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)

input:  <class 'numpy.uint8'>
output:  <class 'numpy.uint8'>


In [68]:
tflite_models_dir = pathlib.Path("/tmp/mnist_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

# save the unquantized/float model
tflite_model_file = tflite_models_dir/"mnist_model.tflite"
tflite_model_file.write_bytes(tflite_model)

# save the quantized model float fall back
tflite_model_quant_file = tflite_models_dir/'mnist_model_quant_float_ff_weights_only.tflite'
tflite_model_quant_file.write_bytes(tflite_model_quant_ff_weights)

# save the quantized model float fall back
tflite_model_quant_file = tflite_models_dir/'mnist_model_quant_float_ff_weights_data.tflite'
tflite_model_quant_file.write_bytes(tflite_model_quant_ff_all)

# save the quantized model
tflite_model_quant_file = tflite_models_dir/'mnist_model_quant_uint8.tflite'
tflite_model_quant_file.write_bytes(tflite_model_quant_uint8)

24648

In [47]:
!ls -altG /tmp/mnist_tflite_models/

total 516
drwxrwxr-x  4 ubuntu   4096 Dec  9 17:48 .
-rw-rw-r--  1 ubuntu  24672 Dec  9 17:48 mnist_model_quant_float_ff_weights_only.tflite
-rw-rw-r--  1 ubuntu  23888 Dec  9 17:48 mnist_model_quant.tflite
-rw-rw-r--  1 ubuntu  84488 Dec  9 17:48 mnist_model.tflite
drwxrwxrwt 14 root     4096 Dec  9 17:45 ..
-rw-rw-r--  1 ubuntu 271400 Dec  9 17:30 mnist_orig.h5
-rw-rw-r--  1 ubuntu  97901 Dec  9 17:26 saved_model.pb
drwxr-xr-x  2 ubuntu   4096 Dec  9 17:26 assets
drwxr-xr-x  2 ubuntu   4096 Dec  9 17:26 variables


In [41]:
# What does converting a model to tensorflowlite reduce its size before quantization ?
# https://www.tensorflow.org/lite/performance/model_optimization#:~:text=with%20TensorFlow%20Lite.-,Quantization,model%20size%20and%20faster%20computation.
model.save("/tmp/mnist_tflite_models/mnist_orig.h5")

In [42]:
!ls -altG  /tmp/mnist_tflite_models/

total 488
-rw-rw-r--  1 ubuntu 271400 Dec  9 17:30 mnist_orig.h5
drwxrwxr-x  4 ubuntu   4096 Dec  9 17:30 .
-rw-rw-r--  1 ubuntu  97901 Dec  9 17:26 saved_model.pb
drwxr-xr-x  2 ubuntu   4096 Dec  9 17:26 assets
drwxr-xr-x  2 ubuntu   4096 Dec  9 17:26 variables
-rw-rw-r--  1 ubuntu  24576 Dec  9 17:19 mnist_model_quant.tflite
-rw-rw-r--  1 ubuntu  84488 Dec  9 17:19 mnist_model.tflite
drwxrwxrwt 14 root     4096 Dec  9 17:15 ..


In [57]:
!ls -altG /tmp/mnist_tflite_models/

total 564
drwxrwxr-x  4 ubuntu   4096 Dec  9 17:57 .
-rw-rw-r--  1 ubuntu  24672 Dec  9 17:57 mnist_model_quant_float_ff_weights_data.tflite
-rw-rw-r--  1 ubuntu  23888 Dec  9 17:57 mnist_model_quant_float_ff_weights_only.tflite
-rw-rw-r--  1 ubuntu  24576 Dec  9 17:57 mnist_model_quant_uint8.tflite
-rw-rw-r--  1 ubuntu  84488 Dec  9 17:57 mnist_model.tflite
drwxrwxrwt 14 root     4096 Dec  9 17:54 ..
-rw-rw-r--  1 ubuntu  23888 Dec  9 17:48 mnist_model_quant.tflite
-rw-rw-r--  1 ubuntu 271400 Dec  9 17:30 mnist_orig.h5
-rw-rw-r--  1 ubuntu  97901 Dec  9 17:26 saved_model.pb
drwxr-xr-x  2 ubuntu   4096 Dec  9 17:26 assets
drwxr-xr-x  2 ubuntu   4096 Dec  9 17:26 variables


In [58]:
!rm /tmp/mnist_tflite_models/mnist_model_quant*

In [59]:
!rm /tmp/mnist_tflite_models/mnist_model.tflite

In [69]:
!ls -altG /tmp/mnist_tflite_models/

total 544
drwxrwxr-x  4 ubuntu   4096 Dec  9 18:12 .
-rw-rw-r--  1 ubuntu  24752 Dec  9 18:12 mnist_model_quant_float_ff_weights_data.tflite
-rw-rw-r--  1 ubuntu  23968 Dec  9 18:12 mnist_model_quant_float_ff_weights_only.tflite
-rw-rw-r--  1 ubuntu  24648 Dec  9 18:12 mnist_model_quant_uint8.tflite
-rw-rw-r--  1 ubuntu  84564 Dec  9 18:12 mnist_model.tflite
drwxrwxrwt 14 root     4096 Dec  9 18:11 ..
-rw-rw-r--  1 ubuntu 271400 Dec  9 17:30 mnist_orig.h5
-rw-rw-r--  1 ubuntu  97901 Dec  9 17:26 saved_model.pb
drwxr-xr-x  2 ubuntu   4096 Dec  9 17:26 assets
drwxr-xr-x  2 ubuntu   4096 Dec  9 17:26 variables


In [71]:
import pandas as pd

In [75]:
pd.DataFrame({'model.pb': [97901], 'model.h5': [271400], 'weights_f32':[23968], 'weights_data_f32': [24752], 'weights_data_uint8': [24648]})

Unnamed: 0,model.pb,model.h5,weights_f32,weights_data_f32,weights_data_uint8
0,97901,271400,23968,24752,24648
