# *IT00CH92 Embedded AI - Spring 2024*



## Change Images to Header File

In [None]:
!python convert_image.py "inputs/0.jpg" "headers/0.h"

In [None]:
!python convert_image.py "inputs/2.jpg" "headers/2.h"

In [None]:
!python convert_image.py "inputs/3.jpg" "headers/3.h"

In [None]:
!python convert_image.py "inputs/5.jpg" "headers/5.h"

In [None]:
!python convert_image.py "inputs/8.jpg" "headers/8.h"

## Declaring Model

In [2]:
import os
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
tf.random.set_seed(
    seed=81
)

In [4]:
# Define the model architecture (LeNet-5).
model = keras.models.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28, 1)),
  keras.layers.Conv2D(filters=6, kernel_size=(5, 5), padding='same', activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Conv2D(filters=16, kernel_size=(5, 5), padding='same', activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(120),
  keras.layers.Dense(84),
  keras.layers.Dense(10, activation='softmax')
])

# As truth labels are integer encoded, we use sparse categorical cross-entropy as loss fn
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), # the ouputs are expected as probabilities
              metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 28, 28, 6)         156       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 14, 14, 6)         0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 14, 14, 16)        2416      
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 7, 7, 16)          0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_3 (Dense)             (None, 120)              

In [5]:
(ds_train, ds_val, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train[:90%]', 'train[90%:]', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

In [6]:
batch_size = 128
n_epochs = 4

In [7]:
def normalize_img(image:tf.uint8, label:tf.int64):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label

def normalize_splits(ds, split_name: str, batch_size: int):
  """Applies preprocessing to train, val and test sets"""
  ds = ds.map(
    normalize_img, num_parallel_calls=tf.data.AUTOTUNE
  )
  ds = ds.cache() # Caching makes it faster for consecutive runs
  if split_name != 'test':
    # Shuffling is not done for the test set
    ds = ds.shuffle(ds_info.splits[split_name].num_examples)
  ds = ds.batch(batch_size)
  ds = ds.prefetch(tf.data.AUTOTUNE)
  return ds

In [8]:
ds_train = normalize_splits(ds_train, split_name='train[:90%]', batch_size=batch_size)
ds_val = normalize_splits(ds_val, split_name='train[90%:]', batch_size=batch_size)
ds_test = normalize_splits(ds_test, split_name='test', batch_size=batch_size)

In [9]:
def representative_data_gen():
  for image, label in ds_train.unbatch().take(100):
    yield [tf.expand_dims(image, 0)]

## Pruning and Quantization

In [10]:
import tensorflow_model_optimization as tfmot
import tempfile

### Load Existing Model

In [11]:
keras_file = 'models/Full_Precision_MNIST_TF.h5'
# Load weights of the model
model.load_weights(keras_file)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 28, 28, 6)         156       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 14, 14, 6)         0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 14, 14, 16)        2416      
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 7, 7, 16)          0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_3 (Dense)             (None, 120)              

### Pruning

In [None]:
# pruning_params = {
#     'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(
#         target_sparsity=0.5,
#         begin_step=0
#     )
# }
# pruning_params = {
#     'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
#         initial_sparsity=0.50,
#         final_sparsity=0.80,
#         begin_step=0,
#         end_step=1000
#     )
# }

In [12]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
model_for_pruning = prune_low_magnitude(model)

# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

print(model_for_pruning.summary())
pruned_model = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 28, 28, 6)         308       
 _2 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 14, 14, 6)         1         
 oling2d_2 (PruneLowMagnitu                                      
 de)                                                             
                                                                 
 prune_low_magnitude_conv2d  (None, 14, 14, 16)        4818      
 _3 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 7, 7, 16)          1         
 oling2d_3 (PruneLowMagnitu                                      
 de)                                                  

### Quantization

In [13]:
# Dynamic Range Quantization
converter = tf.lite.TFLiteConverter.from_keras_model(pruned_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

In [None]:
# Float16
converter.target_spec.supported_types = [tf.float16]

In [14]:
# 8bit Integers
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8

In [15]:
# Create the quantized-pruned model
pruned_quant_tflite_model = converter.convert()

# Save model
tflite_model_path = "models/pruned_quant_model.tflite"
with open(tflite_model_path, "wb") as f:
    f.write(pruned_quant_tflite_model)

print('Saved pruned TFLite model to:', tflite_model_path)

INFO:tensorflow:Assets written to: /tmp/tmp4e4tl5_b/assets


INFO:tensorflow:Assets written to: /tmp/tmp4e4tl5_b/assets
2024-05-27 13:41:45.849536: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-05-27 13:41:45.849575: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-05-27 13:41:45.850062: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmp4e4tl5_b
2024-05-27 13:41:45.850849: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-05-27 13:41:45.850866: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmp4e4tl5_b
2024-05-27 13:41:45.852369: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-05-27 13:41:45.852989: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-05-27 13:41:45.871736: I tensorflow/cc/saved_model/loader.cc:217] Running initializatio

Saved pruned TFLite model to: models/pruned_quant_model.tflite


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8


In [17]:
import os
import zipfile

def get_gzipped_model_size(file):
  
  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  # Returns size of gzipped model, in bytes
  return os.path.getsize(zipped_file)

In [18]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(tflite_model_path)))

Size of gzipped baseline Keras model: 1200911.00 bytes
Size of gzipped pruned TFlite model: 96525.00 bytes


## Change Model to Header file

In [22]:
!echo "const unsigned char model[] = {" > headers/model.h
!cat "models/pruned_quant_model.tflite" | xxd -i >> headers/model.h
!echo "};"                              >> headers/model.h

import os
model_h_size = os.path.getsize("headers/model.h")
print(f"Header file, model.h, is {model_h_size:,} bytes.")
print("\nOpen the side panel (refresh if needed). Double click model.h to download the file.")

Header file, model.h, is 696,374 bytes.

Open the side panel (refresh if needed). Double click model.h to download the file.
