In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

In [5]:
!pip install -q tensorflow-model-optimization

In [6]:
#load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels),(test_images, test_labels) = mnist.load_data()

# Normalize the input image
train_images = train_images/255.0
test_images = test_images/255.0

# # Define the model architecture
# model = keras.Sequential([
#     keras.layers.InputLayer(input_shape = (28,28)),
#     keras.layers.Reshape(target_shape = (28,28,1)),
#     keras.layers.Conv2D(filters = 12, kernel_size = (3,3), activation = 'relu'),
#     keras.layers.MaxPooling2D(pool_size = (2,2)),
#     keras.layers.Flatten(),
#     keras.layers.Dense(10)
# ])

# Define the model architecture
model = keras.Sequential([
    keras.layers.InputLayer(input_shape=(28, 28, 1)),  # Include channel dimension directly
    keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(10)
])

# **Optimize For Quantization**

In [7]:
import tensorflow_model_optimization as tfmot

*The error you're encountering occurs because the Reshape layer is not supported directly by the quantization-aware training API in TensorFlow. To fix this issue, you need to use quantize_scope to handle custom layers or unsupported layers.*

In [8]:
# Use quantize_scope to handle the Reshape layer
with tfmot.quantization.keras.quantize_scope():
    # model training quantization can be done by quantize model API
    quantize_model = tfmot.quantization.keras.quantize_model

    # Simply pass model & it will return a new model
    q_aware_model = quantize_model(model)

ValueError: `to_quantize` can only either be a keras Sequential or Functional model.

In [None]:
# model training quantization can be done by quantize model API
quantize_model = tfmot.quantization.keras.quantize_model

# simply pass model & it will return new model
q_aware_model = quantize_model(model)

# recompile new model
q_aware_model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True), metrics = ['accuracy'])

ValueError: Unable to clone model. This generally happens if you used custom Keras layers or objects in your model. Please specify them via `quantize_scope` for your calls to `quantize_model` and `quantize_apply`. [Layer <tf_keras.src.layers.convolutional.conv2d.Conv2D object at 0x788cc2925f30> supplied to wrapper is not a supported layer type. Please ensure wrapped layer is a valid Keras layer.].

# **Quantization aware Training**

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/242.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/242.5 kB[0m [31m964.8 kB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/242.5 kB[0m [31m1.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m235.5/242.5 kB[0m [31m2.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h

# **Finetune baseline model to become quantize aware **

In [None]:
import tempfile # to create temperory files and directories
import os # to perform OS related tasks


#load ,nist dataset

mnist = keras.datasets.mnist
(train_images, train_labels),(test_images, test_labels) = mnist.load_data()

#Mormalize
train_images = train_images/255.0
test_images = test_images/255.0

#define Model Architecture
model = keras.Sequential([keras.layers.InputLayer(input_shape = (28,28)),
                         keras.layers.Reshape(target_shape(28,28,1)),
                         keras.layers.Conv2D(filter = 12, kernal_size =(3,3), activation = 'relu'),
                         keras.layers.MaxPooling2D(pool_size = (2,2)),
                         keras.layers.Flatten(),
                         keras.layers.Dense(10)
                         ])
model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), metrics = ['accuracy'])


NameError: name 'target_shape' is not defined

Train baseline model

In [None]:
#instead of using train data as validation data , use 10% of training data as validation data
model.fit(train_images, train_labels, epochs = 1, validation_split = 0.1)


RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.

Fine tune baseline model with Quantization aware training

In [None]:
# what quantization aware traininig does is it basically mimic the inference pipeline &
# estimate the possible errors that could arise due to converion to TF Lite model & also post training quantization

In [None]:
import tenserflow_model_optimization as tfmot

#instintiate api
auantize_model = tfmot.quantization.keras.quantize_model

#create quantization awawre model
q_aware_model = quantize_model(model)

# recompile model
q_aware_model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), metrics = ['accuracy'])

#Traini fine tuned model
train_images_subset = train_images[0:1000] #out of 60000
train_labels_subset = train_labels[0:1000]

#train quantization aware model
q_aware_model.fit(train_images_subset, train_labels_subset , batch_size = 500, epochs = 1, validation_split = 0.1)


ModuleNotFoundError: No module named 'tenserflow_model_optimization'

Evaluate both models on test dataset


In [None]:
_,baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose = 0)
_,quantize_aware_model_accuracy = q_aware_model.evaluate(test_images, test_labels, verbose = 0)

print( " Baseline test accuracy : ", baseline_model_accuracy)
print("Quanti aware  test accuracy : ", q_aware_model_accuracy)


# Now we have Quantized aware tenserflow model, now we gonna convert them into TF Lite model

create Quantized model for TFLite backend

In [None]:
#instentiate converter
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

#convert the model
quantized_QA_tflite_model = converter.convert()

Observe persistence of accuracy from TF to TF Lite

In [None]:
import numpy as np
def evaluate_model(interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  #run predictions on every image of test dataset
  prediction_digits = []
  for i, test_image in enumerate ( test_images):
    if i%1000 == 0:
      print("Evaluted on {n} results ".format(n=i))
      #preprocessing : add batch dimension & convert to floats to match with model input format
      test_image = np.expand_dims(test_image, axis = 0).astype(np.float32)
      interpreter.set_tensor(input_index, test_image)
      #run inference
      interpreter.invoke()
      #post processing: remove batch dimension & find digit with highest probability
      output = interpreter.tensor(output_index)
      digit = np.argmax(output()[0])
      prediction_digits.append(digit)
  print("\n")

  #compare prediction results with ground truth to calculate accuracy
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy



In [None]:
#instintiate interpreter
interpreter = tf.lite.Interpreter(model_content = quantized_QA_tflite_model)
interpreter.allocate_tensors()
TFLite_quant_QA_accuracy = evaluate_model(interpreter)

print(" Quant Tenserflow test accuracy: ", q_aware_model_accuracy)
print("Quant TFLite test accuracy: ", TFLite_quant_QA_accuracy)