# *IT00CH92 Embedded AI - Spring 2024*



## Change Images to Header File

In [None]:
!python convert_image.py "inputs/0.jpg" "headers/0.h"

In [None]:
!python convert_image.py "inputs/2.jpg" "headers/2.h"

In [None]:
!python convert_image.py "inputs/3.jpg" "headers/3.h"

In [None]:
!python convert_image.py "inputs/5.jpg" "headers/5.h"

In [None]:
!python convert_image.py "inputs/8.jpg" "headers/8.h"

## Declaring Model

In [1]:
import os
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
tf.random.set_seed(
    seed=81
)

2024-05-27 15:13:03.850497: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-27 15:13:03.958216: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-27 15:13:03.958255: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-27 15:13:03.959707: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-27 15:13:03.968596: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-27 15:13:03.969959: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
# Define the model architecture (LeNet-5).
model = keras.models.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28, 1)),
  keras.layers.Conv2D(filters=6, kernel_size=(5, 5), padding='same', activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Conv2D(filters=16, kernel_size=(5, 5), padding='same', activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(120),
  keras.layers.Dense(84),
  keras.layers.Dense(10, activation='softmax')
])

# As truth labels are integer encoded, we use sparse categorical cross-entropy as loss fn
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), # the ouputs are expected as probabilities
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 6)         156       
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 6)         0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 16)        2416      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 7, 7, 16)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 120)               9

In [3]:
(ds_train, ds_val, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train[:90%]', 'train[90%:]', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

In [4]:
batch_size = 128
n_epochs = 4
n_finetune_epochs = 2

In [5]:
def normalize_img(image:tf.uint8, label:tf.int64):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label

def normalize_splits(ds, split_name: str, batch_size: int):
  """Applies preprocessing to train, val and test sets"""
  ds = ds.map(
    normalize_img, num_parallel_calls=tf.data.AUTOTUNE
  )
  ds = ds.cache() # Caching makes it faster for consecutive runs
  if split_name != 'test':
    # Shuffling is not done for the test set
    ds = ds.shuffle(ds_info.splits[split_name].num_examples)
  ds = ds.batch(batch_size)
  ds = ds.prefetch(tf.data.AUTOTUNE)
  return ds

In [6]:
ds_train = normalize_splits(ds_train, split_name='train[:90%]', batch_size=batch_size)
ds_val = normalize_splits(ds_val, split_name='train[90%:]', batch_size=batch_size)
ds_test = normalize_splits(ds_test, split_name='test', batch_size=batch_size)

In [7]:
def representative_data_gen():
  for image, label in ds_train.unbatch().take(100):
    yield [tf.expand_dims(image, 0)]

## Pruning and Quantization

In [8]:
import tensorflow_model_optimization as tfmot
import tempfile

### Load Existing Model

In [9]:
keras_file = 'models/Full_Precision_MNIST_TF.h5'
# Load weights of the model
model.load_weights(keras_file)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 6)         156       
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 6)         0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 16)        2416      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 7, 7, 16)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 120)               9

### Pruning

In [10]:
# Compute end step to finish pruning after 2 epochs.
num_images = ds_info.splits['train[:90%]'].num_examples
end_step = np.ceil(num_images / batch_size).astype(np.int32) * n_finetune_epochs

In [11]:
# Define model for pruning.
# pruning_params = {
#     'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(
#         target_sparsity=0.5,
#         begin_step=0
#     )
# }
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.50,
        final_sparsity=0.80,
        begin_step=0,
        end_step=end_step
    )
}

In [12]:
pruned_model = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params)

# `prune_low_magnitude` requires a recompile.
pruned_model.compile(optimizer='adam',
                          loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                          metrics=['accuracy'])
pruned_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 28, 28, 6)         308       
  (PruneLowMagnitude)                                            
                                                                 
 prune_low_magnitude_max_po  (None, 14, 14, 6)         1         
 oling2d (PruneLowMagnitude                                      
 )                                                               
                                                                 
 prune_low_magnitude_conv2d  (None, 14, 14, 16)        4818      
 _1 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 7, 7, 16)          1         
 oling2d_1 (PruneLowMagnitu                                      
 de)                                                    

In [13]:
# Fine-tuning the model
logdir = "logs"

callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]

pruned_model.fit(ds_train,
                      validation_data=ds_val,
                      epochs=n_finetune_epochs,
                      callbacks=callbacks)

Epoch 1/2


  output, from_logits = _get_logits(


  3/422 [..............................] - ETA: 21s - loss: 0.0261 - accuracy: 0.9948 

2024-05-27 15:15:22.090472: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 26577600 exceeds 10% of free system memory.
2024-05-27 15:15:22.090635: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 26577600 exceeds 10% of free system memory.
2024-05-27 15:15:22.109162: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 25244800 exceeds 10% of free system memory.
2024-05-27 15:15:22.191594: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 26577600 exceeds 10% of free system memory.
2024-05-27 15:15:22.191655: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 26577600 exceeds 10% of free system memory.


Epoch 2/2


<keras.src.callbacks.History at 0x7fbf8efdd450>

In [14]:
# Calculate baseline accuracy
_, baseline_accuracy = model.evaluate(ds_test, verbose=0)
print('Baseline test accuracy:', baseline_accuracy)

Baseline test accuracy: 0.9848999977111816


In [16]:
# Calculate
_, pruned_model_accuracy = pruned_model.evaluate(ds_test, verbose=0)
print('Pruned test accuracy:', pruned_model_accuracy)

Pruned test accuracy: 0.9848999977111816


In [17]:
# Remove pruning wrappers
pruned_model = tfmot.sparsity.keras.strip_pruning(pruned_model)

In [18]:
# Create TFlite version of pruned model
converter = tf.lite.TFLiteConverter.from_keras_model(pruned_model)
pruned_tflite_model = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmplbjd9xx7/assets


INFO:tensorflow:Assets written to: /tmp/tmplbjd9xx7/assets
2024-05-27 15:17:28.723486: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-05-27 15:17:28.723516: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-05-27 15:17:28.723843: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmplbjd9xx7
2024-05-27 15:17:28.724529: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-05-27 15:17:28.724544: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmplbjd9xx7
2024-05-27 15:17:28.725707: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-05-27 15:17:28.726229: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-05-27 15:17:28.743625: I tensorflow/cc/saved_model/loader.cc:217] Running initializatio

In [20]:
# Save model
pruned_tflite_model_path = "models/pruned_model.tflite"
with open(pruned_tflite_model_path, "wb") as f:
    f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', pruned_tflite_model_path)

Saved pruned TFLite model to: models/pruned_model.tflite


### Quantization

In [21]:
# Dynamic Range Quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]

In [None]:
# Float16
converter.target_spec.supported_types = [tf.float16]

In [22]:
# 8bit Integers
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8

In [23]:
# Create the quantized-pruned model
quant_pruned_tflite_model = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmpyp06t563/assets


INFO:tensorflow:Assets written to: /tmp/tmpyp06t563/assets
2024-05-27 15:18:18.484739: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-05-27 15:18:18.484775: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-05-27 15:18:18.485022: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpyp06t563
2024-05-27 15:18:18.485842: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-05-27 15:18:18.485857: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmpyp06t563
2024-05-27 15:18:18.487524: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-05-27 15:18:18.504968: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmpyp06t563
2024-05-27 15:18:18.512297: I tensorflow/cc/saved_model/loader.cc:316] SavedModel

In [24]:
# Save model
quant_pruned_tflite_model_path = "models/quant_pruned_model.tflite"
with open(quant_pruned_tflite_model_path, "wb") as f:
    f.write(quant_pruned_tflite_model)

print('Saved quant pruned TFLite model to:', quant_pruned_tflite_model_path)

Saved quant pruned TFLite model to: models/quant_pruned_model.tflite


In [25]:
import os
import zipfile

def get_gzipped_model_size(file):
  
  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  # Returns size of gzipped model, in bytes
  return os.path.getsize(zipped_file)

In [26]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_model_path)))
print("Size of gzipped quantized and pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(quant_pruned_tflite_model_path)))

Size of gzipped baseline Keras model: 1200911.00 bytes
Size of gzipped pruned TFlite model: 127958.00 bytes
Size of gzipped quantized and pruned TFlite model: 39015.00 bytes


## Change Model to Header file

In [27]:
!echo "const unsigned char model[] = {" > headers/model.h
!cat "models/quant_pruned_model.tflite" | xxd -i >> headers/model.h
!echo "};"                              >> headers/model.h

import os
model_h_size = os.path.getsize("headers/model.h")
print(f"Header file, model.h, is {model_h_size:,} bytes.")
print("\nOpen the side panel (refresh if needed). Double click model.h to download the file.")

Header file, model.h, is 696,374 bytes.

Open the side panel (refresh if needed). Double click model.h to download the file.
