## Quantization

In [14]:
# IMPORT LIBRARIES
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pathlib

In [15]:
# LOAD MNIST DATA
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [16]:
# NORMALIZA THE IMAGE DATA
train_images = train_images / 255.0
test_images = test_images / 255.0

In [17]:
# DEFINE THE MODEL ARCHITECTURE
model = keras.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28)),
  keras.layers.Reshape(target_shape=(28, 28, 1)),
  keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation=tf.nn.relu),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(10)
])

In [18]:
# TRAIN THE DIGIT CLASSIFICATION MODEL
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.fit(
  train_images,
  train_labels,
  epochs=1,
  validation_data=(test_images, test_labels)
)



<keras.src.callbacks.History at 0x7c8d6c506380>

## Dynamic range quantization

In [19]:
# USING THE TENSORFLOW LITE CONVERTER, WE CAN CONVERT THE TRAINED MODEL INTO THE TENSORFLOW LITE
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

In [20]:
# SAVE THE MODEL INTO DIRECTORIES
tflite_models_dir = pathlib.Path("mnist_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/"mnist_model.tflite"
tflite_model_file.write_bytes(tflite_model)

84844

In [21]:
# TO QUANNTIZE A MODEL ON EXPORT, SET THE OPTIMIZATIONS FLAG TO OPTIMIZE FOR SIZE
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()
tflite_model_quant_file = tflite_models_dir/"mnist_model_quant.tflite"
tflite_model_quant_file.write_bytes(tflite_quant_model)

24088

In [23]:
ls -lh {tflite_models_dir}

total 108K
-rw-r--r-- 1 root root 83K Jan  1 12:28 mnist_model.tflite
-rw-r--r-- 1 root root 24K Jan  1 12:29 mnist_model_quant.tflite


WE HAVE MINIMIZED THE SIZE OF THE MODEL, WHAT ABOUT IT PERFORMANCE?

In [24]:
# INTERPRETER

# ORIGINAL MODEL
interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter.allocate_tensors()

# QUANTIZED MODEL
interpreter_quant = tf.lite.Interpreter(model_path=str(tflite_model_quant_file))
interpreter_quant.allocate_tensors()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [25]:
# TEST THE MODEL ON SINGLE IMAGE
test_image = np.expand_dims(test_images[0], axis=0).astype(np.float32)

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

interpreter.set_tensor(input_index, test_image)
interpreter.invoke()
predictions = interpreter.get_tensor(output_index)

In [26]:
#  A FUNCTION TO EVALUATE THE TF LITE MODEL USING TEST DATA
def evaluate_model(interpreter):
    input_index = interpreter.get_input_details()[0]["index"]
    output_index = interpreter.get_output_details()[0]["index"]
    prediction_digits = []
    for test_image in test_images:
        test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
        interpreter.set_tensor(input_index, test_image)

        interpreter.invoke()

        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        prediction_digits.append(digit)
    accurate_count = 0
    for index in range(len(prediction_digits)):
        if prediction_digits[index] == test_labels[index]:
            accurate_count += 1
        accuracy = accurate_count * 1.0 / len(prediction_digits)

    return accuracy

In [27]:
print(evaluate_model(interpreter))

0.9671


In [28]:
print(evaluate_model(interpreter_quant))

0.9666
