# TFLite Conversion

In [36]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [37]:
import numpy as np
import plotly.express as px
from sklearn import metrics
from tqdm.notebook import tqdm
import tensorflow as tf
from tensorflow import keras

In [38]:
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.13.0


Mount Drive if on Colab

In [39]:
# from google.colab import drive
# drive.mount('/content/drive')

Unzipping the model

In [40]:
# !unzip -qu '/content/drive/MyDrive/Colab Notebooks/EEAI/Project/ASL Alphabet/asl_mobilenet_tuned.zip'

In [41]:
SAVED_MODEL_PATH = "../models/asl_mobilenet_tuned"
TFLITE_MODELS_PATH = "../models/asl_mobilenet"

In [42]:
TFLITE_MODELS_NAMES = ['_no_quant.tflite', '_dr_quant.tflite', '_full_quant.tflite']

In [43]:
print("Datatypes of the model's layers:")
tfmodel = tf.keras.models.load_model(SAVED_MODEL_PATH)
for layer in tfmodel.layers:
    print(f'- {layer.name}: {layer.dtype}')

Datatypes of the model's layers:
- preprocessing: float32
- mobilenet_0.10_96: float32
- pooling: float32
- classifier: float32


Unzipping the data set

In [44]:
# !unzip -qu '/content/drive/MyDrive/Colab Notebooks/EEAI/Project/ASL Alphabet/ASL.zip' -d 'data'
# !unzip -qu '/content/drive/MyDrive/Colab Notebooks/EEAI/Project/ASL Alphabet/ASL Real.zip' -d 'data'

Cropping the Images

Function to plot the class distribution

In [45]:
def plot_class_distribution(dataset, y_lim):
  labels = np.concatenate([y for x, y in tqdm(dataset)], axis=0)
  labels_str = np.array([class_mapping[i] for i in labels])
  fig = px.histogram(x=labels_str)
  fig.update_layout(
      title_text='Class distribution',
      xaxis_title_text='Class name',
      yaxis_title_text='Count',
      bargap=0.5,
      yaxis_range=[0, y_lim]
  )
  fig.show()

Defining paths and classes mapping

In [46]:
ASL_PATH = '../data/asl_alphabet_train/asl_alphabet_train'
ASL_REAL_PATH = '../data/asl_alphabet_real/asl_alphabet_real'

In [47]:
class_labels = ['A', 'B', 'C', 'D', 'del', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'nothing', 'O', 'P', 'Q', 'R', 'S', 'space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
class_mapping = {i:label for i, label in enumerate(class_labels)}

Loading the dataset

In [48]:
split_threshold = 0.2
batch_size = 32
validation_ds: tf.data.Dataset = keras.utils.image_dataset_from_directory(ASL_PATH, batch_size=batch_size, validation_split=split_threshold, subset='validation', seed=42, class_names=class_labels)

Found 87000 files belonging to 29 classes.
Using 17400 files for validation.


## Data Preprocessing

In [49]:
crop = lambda x, y: (tf.image.central_crop(x, 0.96), y)
resize_image = lambda x, y: (tf.image.resize(x, (96, 96)), y)

In [50]:
validation_ds = validation_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
validation_ds = validation_ds.map(crop, num_parallel_calls=tf.data.AUTOTUNE)
validation_ds = validation_ds.map(resize_image, num_parallel_calls=tf.data.AUTOTUNE)

In [51]:
plot_class_distribution(validation_ds, 1000)

  0%|          | 0/544 [00:00<?, ?it/s]

# Converting to TFLite and performing PTQ

Here we will generate 3 versions

## Version 1: conversion only (no quantization)

In [52]:
converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL_PATH)

tflite_model = converter.convert()

with open(TFLITE_MODELS_PATH  + TFLITE_MODELS_NAMES[0], 'wb') as f:
  print(f"Bytes written: {f.write(tflite_model) / 1024} KB")

Bytes written: 174.58203125 KB


## Version 2: dynamic range quantization

In [53]:
converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL_PATH)

# Post Training Quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open(TFLITE_MODELS_PATH + TFLITE_MODELS_NAMES[1], 'wb') as f:
  print(f"Bytes written: {f.write(tflite_model) / 1024} KB")

Bytes written: 84.6171875 KB


## Version 3: full integer quantization

In [54]:
# this returns some representative data in order to perform a more efficient quantization
def representative_data_gen():
  for image, _ in validation_ds.unbatch().batch(1).take(1000):
    yield [image]

In [55]:
converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL_PATH)

# Post Training Quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model = converter.convert()

with open(TFLITE_MODELS_PATH + TFLITE_MODELS_NAMES[2], 'wb') as f:
  print(f"Bytes written: {f.write(tflite_model) / 1024} KB")

Bytes written: 90.1875 KB


fully_quantize: 0, inference_type: 6, input_inference_type: UINT8, output_inference_type: UINT8


Check the datatypes of the TFLite models

In [56]:
for i, MODEL_PATH in enumerate(TFLITE_MODELS_NAMES):
  print("Version ", i + 1)
  # Load the TFLite model
  interpreter = tf.lite.Interpreter(model_path=TFLITE_MODELS_PATH + MODEL_PATH)
  interpreter.allocate_tensors()

  # Get information about the model inputs and outputs
  input_details = interpreter.get_input_details()
  output_details = interpreter.get_output_details()

  # Print the data types of the inputs
  print("Input Data Types:")
  for input_detail in input_details:
      print(input_detail['name'], input_detail['dtype'])

  # Print the data types of the outputs
  print("\nOutput Data Types:")
  for output_detail in output_details:
      print(output_detail['name'], output_detail['dtype'])
  print("\n")

Version  1
Input Data Types:
serving_default_first_layer:0 <class 'numpy.float32'>

Output Data Types:
StatefulPartitionedCall:0 <class 'numpy.float32'>


Version  2
Input Data Types:
serving_default_first_layer:0 <class 'numpy.float32'>

Output Data Types:
StatefulPartitionedCall:0 <class 'numpy.float32'>


Version  3
Input Data Types:
serving_default_first_layer:0 <class 'numpy.uint8'>

Output Data Types:
StatefulPartitionedCall:0 <class 'numpy.uint8'>




# Model evaluation

We compute the size, the accuracy and the latency of the 3 TFLite models

Helper functions

In [57]:
def get_details(interpreter: tf.lite.Interpreter):
  input_details = interpreter.get_input_details()[0]
  output_details = interpreter.get_output_details()[0]
  print(f'Input shape: {input_details["shape"]}')
  print(f'Input datatype: {input_details["dtype"]}')
  print(f'Output shape: {output_details["shape"]}')
  print(f'Output datatype: {output_details["dtype"]}')

Dynamic input shapes in TF models are replaced with a placeholder of 1 in TFLite models.
We need to resize the input shape manually.

In [58]:
def resize_input_shape(interpreter: tf.lite.Interpreter):
  # Resize input shape for dynamic shape model and allocate tensor
  input_details = interpreter.get_input_details()[0]
  interpreter.resize_tensor_input(input_details['index'], [32, 96, 96, 3])
  interpreter.allocate_tensors()
  print(f'Resized input shape: {interpreter.get_input_details()[0]["shape"]}')

Define a custom function to compute the predictions for a batch

In [59]:
def predict_on_batch(interpreter, images_batch):
  input_details = interpreter.get_input_details()[0]
  interpreter.set_tensor(input_details['index'], images_batch)
  interpreter.invoke()
  output_details = interpreter.get_output_details()[0]
  return interpreter.get_tensor(output_details['index'])

## Model 1

In [60]:
interpreter = tf.lite.Interpreter(model_path=TFLITE_MODELS_PATH + TFLITE_MODELS_NAMES[0])
get_details(interpreter)
resize_input_shape(interpreter)

Input shape: [ 1 96 96  3]
Input datatype: <class 'numpy.float32'>
Output shape: [ 1 29]
Output datatype: <class 'numpy.float32'>
Resized input shape: [32 96 96  3]


Compute the accuracy on the validation set

In [61]:
true_labels = np.array([])
pred_labels = np.array([])
for batch in tqdm(validation_ds):
  # Tuple unpacking
  images, t_labels = batch

  if images.shape[0] == batch_size:
    # Compute the predicted labels
    p_labels = predict_on_batch(interpreter, images)
    p_labels = np.argmax(p_labels, axis=1)

    # Concatenate in a single vector
    true_labels = np.concatenate([true_labels, t_labels])
    pred_labels = np.concatenate([pred_labels, p_labels])

  0%|          | 0/544 [00:00<?, ?it/s]

Latency: 119.97 it/s

In [62]:
print(f'Accuracy: {metrics.classification_report(true_labels, pred_labels, output_dict=True)["accuracy"]}')

Accuracy: 0.9662177716390423


## Model 2

In [63]:
interpreter = tf.lite.Interpreter(model_path=TFLITE_MODELS_PATH + TFLITE_MODELS_NAMES[1])
get_details(interpreter)
resize_input_shape(interpreter)

Input shape: [ 1 96 96  3]
Input datatype: <class 'numpy.float32'>
Output shape: [ 1 29]
Output datatype: <class 'numpy.float32'>
Resized input shape: [32 96 96  3]


In [64]:
true_labels = np.array([])
pred_labels = np.array([])
for batch in tqdm(validation_ds):
  # Tuple unpacking
  images, t_labels = batch

  if images.shape[0] == batch_size:
    # Compute the predicted labels
    p_labels = predict_on_batch(interpreter, images)
    p_labels = np.argmax(p_labels, axis=1)

    # Concatenate in a single vector
    true_labels = np.concatenate([true_labels, t_labels])
    pred_labels = np.concatenate([pred_labels, p_labels])

  0%|          | 0/544 [00:00<?, ?it/s]

Latency: 104.47 it/s

In [65]:
print(f'Accuracy: {metrics.classification_report(true_labels, pred_labels, output_dict=True)["accuracy"]}')

Accuracy: 0.963512891344383


## Model 3

In [66]:
interpreter = tf.lite.Interpreter(model_path=TFLITE_MODELS_PATH + TFLITE_MODELS_NAMES[2])
get_details(interpreter)
resize_input_shape(interpreter)

Input shape: [ 1 96 96  3]
Input datatype: <class 'numpy.uint8'>
Output shape: [ 1 29]
Output datatype: <class 'numpy.uint8'>
Resized input shape: [32 96 96  3]


In order to make it work the full-integer quantization, we want in input images that are UINT8, so here we quantize the input images

In [67]:
true_labels = np.array([])
pred_labels = np.array([])
for batch in tqdm(validation_ds):
  # Tuple unpacking
  images, t_labels = batch

  if images.shape[0] == batch_size:
    # Quantize the input
    input_details = interpreter.get_input_details()[0]
    input_scale, input_zero_point = input_details["quantization"]
    images = images / input_scale + input_zero_point
    images = images.numpy().astype("uint8")

    # Compute the predicted labels
    p_labels = predict_on_batch(interpreter, images)
    p_labels = np.argmax(p_labels, axis=1)

    # Concatenate in a single vector
    true_labels = np.concatenate([true_labels, t_labels])
    pred_labels = np.concatenate([pred_labels, p_labels])

  0%|          | 0/544 [00:00<?, ?it/s]

Latency: 85.64 it/s

In [68]:
print(f'Accuracy: {metrics.classification_report(true_labels, pred_labels, output_dict=True)["accuracy"]}')

Accuracy: 0.9178176795580111


# Results

| Model                 | Size   | Latency     | Accuracy |
|-----------------------|--------|-------------|----------|
| No quantization       | 573 KB | 119.97 it/s | 0.992    |
| DR quantization       | 204 KB | 104.47 it/s | 0.990    |
| Full int quantization | 218 KB | 85.64 it/s  | 0.937    |

# Full quantization model

In [69]:
size = 1000
cm = metrics.confusion_matrix(true_labels, pred_labels)
fig = px.imshow(cm, x=class_labels, y=class_labels, text_auto=True, width=size, height=size, color_continuous_scale='blues')
fig.update_layout(
  title_text='Multiclass confusion matrix',
  xaxis_title_text='Actual class',
  yaxis_title_text='Predicted class',
)

Relative error per class

In [70]:
correct_percentage =  cm.diagonal() / cm.sum(axis=1)
relative_errors = 1 - correct_percentage
fig = px.bar(x=class_labels, y=relative_errors, orientation='v')
fig.update_layout(
  title_text='Percentage of miss-classifications per class',
  xaxis_title_text='Class name',
  yaxis_title_text='Count',
  bargap=0.3,
)