In [2]:
try:
    import tensorflow as tf
    print(f"TensorFlow is installed. Version: {tf.__version__}")
except ImportError as e:
    print("TensorFlow is not installed or cannot be imported.")
    print(f"Error: {e}")

TensorFlow is installed. Version: 2.19.0


In [3]:
import tensorflow as tf

# Load the CIFAR-10 dataset from TensorFlow Keras
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# One-hot encode the labels (convert class numbers to a binary vector)
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

print("Shape of training images:", x_train.shape)
print("Shape of training labels:", y_train.shape)
print("Shape of testing images:", x_test.shape)
print("Shape of testing labels:", y_test.shape)

Shape of training images: (50000, 32, 32, 3)
Shape of training labels: (50000, 10)
Shape of testing images: (10000, 32, 32, 3)
Shape of testing labels: (10000, 10)


In [4]:
from tensorflow.keras import layers, models

def create_cnn_model():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(100, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))
    return model

cnn_model = create_cnn_model()
cnn_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
cnn_model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

In [6]:
history = cnn_model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

Epoch 1/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 16ms/step - accuracy: 0.3820 - loss: 1.6986 - val_accuracy: 0.5809 - val_loss: 1.1938
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 15ms/step - accuracy: 0.6021 - loss: 1.1274 - val_accuracy: 0.6523 - val_loss: 0.9920
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 16ms/step - accuracy: 0.6645 - loss: 0.9588 - val_accuracy: 0.6651 - val_loss: 0.9717
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 16ms/step - accuracy: 0.6979 - loss: 0.8657 - val_accuracy: 0.6798 - val_loss: 0.9353
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 15ms/step - accuracy: 0.7242 - loss: 0.8011 - val_accuracy: 0.6827 - val_loss: 0.9139
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 15ms/step - accuracy: 0.7473 - loss: 0.7218 - val_accuracy: 0.6840 - val_loss: 0.9145
Epoc

In [7]:
loss, accuracy = cnn_model.evaluate(x_test, y_test, verbose=0)
print(f"Baseline CNN Model Accuracy: {accuracy:.4f}")

Baseline CNN Model Accuracy: 0.6942


In [13]:
converter = tf.lite.TFLiteConverter.from_keras_model(cnn_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_tflite_model_weights_only = converter.convert()


# Save the unquantized TFLite model (optional, for size comparison later)
with open('cnn_quantized_weights_only.tflite', 'wb') as f:
    f.write(quantized_tflite_model_weights_only)

print("Quantized TFLite model (weights only) created: cnn_quantized_weights_only.tflite")


INFO:tensorflow:Assets written to: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmp0c1nkyu2/assets


INFO:tensorflow:Assets written to: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmp0c1nkyu2/assets


Saved artifact at '/var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmp0c1nkyu2'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  5002336336: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002341264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002766192: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002763024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002768656: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002937616: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002945008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002939904: TensorSpec(shape=(), dtype=tf.resource, name=None)
Quantized TFLite model (weights only) created: cnn_quantized_weights_only.tflite


W0000 00:00:1744087947.674651  100240 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1744087947.674664  100240 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-04-08 10:22:27.674804: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmp0c1nkyu2
2025-04-08 10:22:27.675236: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-04-08 10:22:27.675243: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmp0c1nkyu2
2025-04-08 10:22:27.678774: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-04-08 10:22:27.700476: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmp0c1nkyu2
2025-04-08 10:22:27.707068: I tensorflow/cc/saved_model/loader.cc:

In [14]:
import os

baseline_tflite_size = os.path.getsize('cnn_baseline.tflite') / (1024 * 1024)  # in MB
quantized_weights_only_size = os.path.getsize('cnn_quantized_weights_only.tflite') / (1024 * 1024)  # in MB
quantized_full_int_size = os.path.getsize('cnn_quantized_full_int.tflite') / (1024 * 1024) if os.path.exists('cnn_quantized_full_int.tflite') else "Not Created" # in MB

print(f"Baseline TFLite Model Size: {baseline_tflite_size:.2f} MB")
print(f"Quantized (Weights Only) TFLite Model Size: {quantized_weights_only_size:.2f} MB")
if isinstance(quantized_full_int_size, str):
    print(f"Fully Integer Quantized TFLite Model Size: {quantized_full_int_size}")
else:
    print(f"Fully Integer Quantized TFLite Model Size: {quantized_full_int_size:.2f} MB")

Baseline TFLite Model Size: 0.96 MB
Quantized (Weights Only) TFLite Model Size: 0.25 MB
Fully Integer Quantized TFLite Model Size: Not Created


In [17]:
converter = tf.lite.TFLiteConverter.from_keras_model(cnn_model)
converter.optimizations = ["FLOAT16"]

quantized_tflite_model_float16 = converter.convert()

# Save the float16 quantized TFLite model
with open('cnn_quantized_float16.tflite', 'wb') as f:
    f.write(quantized_tflite_model_float16)

print("Quantized TFLite model (float16 weights) created: cnn_quantized_float16.tflite")

INFO:tensorflow:Assets written to: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmpfkz6213p/assets


INFO:tensorflow:Assets written to: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmpfkz6213p/assets


Saved artifact at '/var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmpfkz6213p'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  5002336336: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002341264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002766192: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002763024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002768656: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002937616: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002945008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5002939904: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1744104860.439120  100240 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.


Quantized TFLite model (float16 weights) created: cnn_quantized_float16.tflite


W0000 00:00:1744104860.440631  100240 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-04-08 15:04:20.443910: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmpfkz6213p
2025-04-08 15:04:20.444473: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-04-08 15:04:20.444480: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmpfkz6213p
2025-04-08 15:04:20.454004: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-04-08 15:04:20.527964: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/bq/lxd231rj5297hw1mjr7s_7t40000gn/T/tmpfkz6213p
2025-04-08 15:04:20.534892: I tensorflow/cc/saved_model/loader.cc:471] SavedModel load for tags { serve }; Status: success: OK. Took 90982 microseconds.


In [18]:
# Evaluate the quantized TFLite model (float16 weights)
with open('cnn_quantized_float16.tflite', 'rb') as f:
    quantized_float16_tflite_model = f.read()
quantized_float16_accuracy = evaluate_tflite_model(quantized_float16_tflite_model, x_test, y_test)
print(f"Quantized (Float16 Weights) TFLite Model Accuracy: {quantized_float16_accuracy:.4f}")

    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


Quantized (Float16 Weights) TFLite Model Accuracy: 0.6942


In [19]:
baseline_tflite_size = os.path.getsize('cnn_baseline.tflite') / (1024 * 1024)
quantized_weights_only_size = os.path.getsize('cnn_quantized_weights_only.tflite') / (1024 * 1024)
quantized_full_int_size = os.path.getsize('cnn_quantized_full_int.tflite') / (1024 * 1024) if os.path.exists('cnn_quantized_full_int.tflite') else "Not Created"
quantized_float16_size = os.path.getsize('cnn_quantized_float16.tflite') / (1024 * 1024)

print(f"Baseline TFLite Model Size: {baseline_tflite_size:.2f} MB")
print(f"Quantized (Weights Only) TFLite Model Size: {quantized_weights_only_size:.2f} MB")
if isinstance(quantized_full_int_size, str):
    print(f"Fully Integer Quantized TFLite Model Size: {quantized_full_int_size}")
else:
    print(f"Fully Integer Quantized TFLite Model Size: {quantized_full_int_size:.2f} MB")
print(f"Quantized (Float16 Weights) TFLite Model Size: {quantized_float16_size:.2f} MB")

Baseline TFLite Model Size: 0.96 MB
Quantized (Weights Only) TFLite Model Size: 0.25 MB
Fully Integer Quantized TFLite Model Size: Not Created
Quantized (Float16 Weights) TFLite Model Size: 0.96 MB


In [20]:
import tensorflow as tf
import numpy as np

# Load the CIFAR-10 dataset
(_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Normalize test images
x_test = x_test.astype('float32') / 255.0

# Choose an image index from the test set
image_index = 0
test_image = x_test[image_index]
true_label = np.argmax(tf.keras.utils.to_categorical(y_test[image_index], 10))

# Add a batch dimension (models typically expect a batch of images)
test_image = np.expand_dims(test_image, axis=0).astype(np.float32)

class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

print(f"True label for the test image: {class_names[true_label]}")
print("Shape of the test image:", test_image.shape)

True label for the test image: cat
Shape of the test image: (1, 32, 32, 3)


In [21]:
interpreter = tf.lite.Interpreter(model_path='cnn_baseline.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

interpreter.set_tensor(input_details[0]['index'], test_image)
interpreter.invoke()
predictions = interpreter.get_tensor(output_details[0]['index'])
predicted_class = np.argmax(predictions[0])
predicted_label = class_names[predicted_class]

print(f"Baseline TFLite Model Prediction: {predicted_label}")
print(f"Confidence scores: {predictions[0]}")

Baseline TFLite Model Prediction: cat
Confidence scores: [4.2388205e-05 1.7820208e-05 4.8666570e-04 8.5286611e-01 5.5649900e-05
 1.3791017e-01 5.2321791e-03 1.5701162e-05 3.3733596e-03 7.1313771e-08]


In [22]:
# For Weights Only Quantized Model
interpreter_quant_wo = tf.lite.Interpreter(model_path='cnn_quantized_weights_only.tflite')
interpreter_quant_wo.allocate_tensors()

input_details_quant_wo = interpreter_quant_wo.get_input_details()
output_details_quant_wo = interpreter_quant_wo.get_output_details()

interpreter_quant_wo.set_tensor(input_details_quant_wo[0]['index'], test_image)
interpreter_quant_wo.invoke()
predictions_quant_wo = interpreter_quant_wo.get_tensor(output_details_quant_wo[0]['index'])
predicted_class_quant_wo = np.argmax(predictions_quant_wo[0])
predicted_label_quant_wo = class_names[predicted_class_quant_wo]

print(f"Quantized (Weights Only) TFLite Model Prediction: {predicted_label_quant_wo}")
print(f"Confidence scores: {predictions_quant_wo[0]}")

# For Fully Integer Quantized Model (Adapt the inference part)
if os.path.exists('cnn_quantized_full_int.tflite'):
    interpreter_quant_full_int = tf.lite.Interpreter(model_path='cnn_quantized_full_int.tflite')
    interpreter_quant_full_int.allocate_tensors()

    input_details_quant_full_int = interpreter_quant_full_int.get_input_details()
    output_details_quant_full_int = interpreter_quant_full_int.get_output_details()

    # Quantize input
    input_scale, input_zero_point = input_details_quant_full_int[0]['quantization']
    input_tensor_quantized = (test_image / input_scale + input_zero_point).astype(np.int8)
    interpreter_quant_full_int.set_tensor(input_details_quant_full_int[0]['index'], input_tensor_quantized)

    interpreter_quant_full_int.invoke()

    # Dequantize output
    output_scale, output_zero_point = output_details_quant_full_int[0]['quantization']
    output_tensor_quantized = interpreter_quant_full_int.get_tensor(output_details_quant_full_int[0]['index'])
    output_tensor_dequantized = (output_tensor_quantized - output_zero_point) * output_scale
    predicted_class_quant_full_int = np.argmax(output_tensor_dequantized[0])
    predicted_label_quant_full_int = class_names[predicted_class_quant_full_int]

    print(f"Fully Integer Quantized TFLite Model Prediction: {predicted_label_quant_full_int}")
    print(f"Dequantized confidence scores: {output_tensor_dequantized[0]}")

# For Float16 Weights Quantized Model
interpreter_quant_fp16 = tf.lite.Interpreter(model_path='cnn_quantized_float16.tflite')
interpreter_quant_fp16.allocate_tensors()

input_details_quant_fp16 = interpreter_quant_fp16.get_input_details()
output_details_quant_fp16 = interpreter_quant_fp16.get_output_details()

interpreter_quant_fp16.set_tensor(input_details_quant_fp16[0]['index'], test_image)
interpreter_quant_fp16.invoke()
predictions_quant_fp16 = interpreter_quant_fp16.get_tensor(output_details_quant_fp16[0]['index'])
predicted_class_quant_fp16 = np.argmax(predictions_quant_fp16[0])
predicted_label_quant_fp16 = class_names[predicted_class_quant_fp16]

print(f"Quantized (Float16 Weights) TFLite Model Prediction: {predicted_label_quant_fp16}")
print(f"Confidence scores: {predictions_quant_fp16[0]}")

Quantized (Weights Only) TFLite Model Prediction: cat
Confidence scores: [4.3973836e-05 1.6392552e-05 4.7250974e-04 8.5544968e-01 5.6994795e-05
 1.3543701e-01 5.2743200e-03 1.4503495e-05 3.2345827e-03 6.9102782e-08]
Quantized (Float16 Weights) TFLite Model Prediction: cat
Confidence scores: [4.2388205e-05 1.7820208e-05 4.8666570e-04 8.5286611e-01 5.5649900e-05
 1.3791017e-01 5.2321791e-03 1.5701162e-05 3.3733596e-03 7.1313771e-08]
