<a href="https://colab.research.google.com/github/fateen108/TFLite-Model-Optimization/blob/main/Project_Optimize_a_Neural_Network_for_AI_Hardware_(FPGA_Edge_TPU).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import time

# Load pre-trained MobileNet model
model = tf.keras.applications.MobileNetV2(weights="imagenet")

# Convert model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model
with open("mobilenet.tflite", "wb") as f:
    f.write(tflite_model)

print("Model converted to TensorFlow Lite format!")


Saved artifact at '/tmp/tmphrvwop4k'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_312')
Output Type:
  TensorSpec(shape=(None, 1000), dtype=tf.float32, name=None)
Captures:
  135155372943952: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372944912: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372946064: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372945488: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372943760: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372945296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155448447632: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155448448016: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372947024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372944144: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1351554

In [None]:
import numpy as np

# Load TensorFlow Lite model
interpreter = tf.lite.Interpreter(model_path="mobilenet.tflite")
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Generate a random input tensor with the correct shape
input_shape = input_details[0]['shape']
input_data = np.random.rand(*input_shape).astype(np.float32)

# Run inference & measure time
start_time = time.time()
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
end_time = time.time()

# Compute latency
inference_time = (end_time - start_time) * 1000
print(f"Inference Time: {inference_time:.2f} ms")


Inference Time: 62.39 ms


In [None]:
# Apply quantization (reduces precision to int8)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Convert model again
tflite_quantized_model = converter.convert()

# Save quantized model
with open("mobilenet_quantized.tflite", "wb") as f:
    f.write(tflite_quantized_model)

print("Quantized model saved!")


Saved artifact at '/tmp/tmp4h5h3xuu'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_312')
Output Type:
  TensorSpec(shape=(None, 1000), dtype=tf.float32, name=None)
Captures:
  135155372943952: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372944912: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372946064: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372945488: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372943760: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372945296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155448447632: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155448448016: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372947024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135155372944144: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1351554

In [None]:
import os

# Get file sizes
original_size = os.path.getsize("mobilenet.tflite") / 1024 / 1024  # Convert to MB
quantized_size = os.path.getsize("mobilenet_quantized.tflite") / 1024 / 1024  # Convert to MB

print(f"Original Model Size: {original_size:.2f} MB")
print(f"Quantized Model Size: {quantized_size:.2f} MB")

Original Model Size: 13.34 MB
Quantized Model Size: 3.62 MB


In [None]:
# Load quantized model
interpreter_q = tf.lite.Interpreter(model_path="mobilenet_quantized.tflite")
interpreter_q.allocate_tensors()

# Measure inference time for quantized model
start_time = time.time()
interpreter_q.set_tensor(input_details[0]['index'], input_data)
interpreter_q.invoke()
end_time = time.time()

inference_time_q = (end_time - start_time) * 1000
print(f"Quantized Inference Time: {inference_time_q:.2f} ms")


Quantized Inference Time: 21.13 ms
