In [None]:
%tensorflow_version 2.x
import tensorflow as tf
from google.colab import files
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import math
import glob
import os
!apt-get update && apt-get -qq install xxd

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

## Generate a TensorFlow Lite Model

Convert the frozen graph into a TensorFlow Lite model, which is fully quantized for use with embedded devices. The following cell will also print the model size.

In [4]:
MODELS_DIR = 'models'
if not os.path.exists(MODELS_DIR):
    os.mkdir(MODELS_DIR)

SAVED_MODEL_FILENAME = os.path.join(MODELS_DIR, "mhr")
FLOAT_TFL_MODEL_FILENAME = os.path.join(MODELS_DIR, "mhr_float.tfl")
QUANTIZED_TFL_MODEL_FILENAME = os.path.join(MODELS_DIR, "mhr.tfl")
TFL_CC_MODEL_FILENAME = os.path.join(MODELS_DIR, "mhr.cc")

In [None]:
!wget https://raw.githubusercontent.com/cargilgar/Smart-Alarm-using-tinyML/main/data/models/mhr_saved_model.pb
!wget https://raw.githubusercontent.com/cargilgar/Smart-Alarm-using-tinyML/main/data/models/keras_metadata.pb

!mkdir ./models/mhr/

!mv mhr_saved_model.pb saved_model.pb

!mv *.pb ./models/mhr/

In [None]:
# MODELS_DIR = 'models'
# if not os.path.exists(MODELS_DIR):
#     os.mkdir(MODELS_DIR)

# SAVED_MODEL_FILENAME = os.path.join(MODELS_DIR, "mhr")
# FLOAT_TFL_MODEL_FILENAME = os.path.join(MODELS_DIR, "mhr_float.tfl")
# QUANTIZED_TFL_MODEL_FILENAME = os.path.join(MODELS_DIR, "mhr.tfl")
# TFL_CC_MODEL_FILENAME = os.path.join(MODELS_DIR, "mhr.cc")

# load model
# !wget model.pb SAVED_MODEL_FILENAME


converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL_FILENAME)
model_no_quant_tflite = converter.convert()

# Save the model to disk
open(FLOAT_TFL_MODEL_FILENAME, "wb").write(model_no_quant_tflite)

# Set the optimization flag.
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Enforce integer only quantization
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

DATA_SAMPLE_SIZE = 100

def representative_data_gen():
    df_sample = np.array(df.sample(DATA_SAMPLE_SIZE), dtype=np.float32)

    for row in range(DATA_SAMPLE_SIZE):
        ret = df_sample[row][:-1]  # take all the columns except for labels
        yield list(ret.reshape(1, ret.shape[0]))  # the input must be 1 


converter.representative_dataset = representative_data_gen

model_tflite = converter.convert()

# Save the model to disk
open(QUANTIZED_TFL_MODEL_FILENAME, "wb").write(model_tflite)

In [None]:
# command line
# tflite_convert --output_file=model.tflite --saved_model_dir=/tmp/saved_model

Compare the sizes of the Tensorflow, TensorFlow Lite and Quantized TensorFlow Lite models.

In [None]:
def get_dir_size(dir):
    size = 0
    for f in os.scandir(dir):
        if f.is_file():
            size += f.stat().st_size
    elif f.is_dir():
        size += get_dir_size(f.path)
    return size

In [None]:
# Calculate size
size_tf = get_dir_size(SAVED_MODEL_FILENAME)
size_no_quant_tflite = os.path.getsize(FLOAT_TFL_MODEL_FILENAME)
size_tflite = os.path.getsize(QUANTIZED_TFL_MODEL_FILENAME)

# Compare size
pd.DataFrame.from_records(
    [["TensorFlow", f"{size_tf} bytes", ""],
     ["TensorFlow Lite", f"{size_no_quant_tflite} bytes ", f"(reduced by {size_tf - size_no_quant_tflite} bytes)"],
     ["TensorFlow Lite Quantized", f"{size_tflite} bytes", f"(reduced by {size_no_quant_tflite - size_tflite} bytes)"]],
     columns = ["Model", "Size", ""])


The generated `quantized model` depicts a **x4** times reduction in size compared to the `float model` (orginial version).

So far so good. Let's see how much the penalty has been for the accuracy metric.

### Testing the accuracy after Quantization

Verify that the model we've exported is still accurate, using the TF Lite Python API and our test set.

In [None]:
# Helper function to run inference
def run_tflite_inference_testSet(tflite_model_path, model_type="Float"):
    
    # --- Load test data    
    SAMPLES = 10000
    test_data = np.array(test_df.sample(SAMPLES)).reshape(SAMPLES, test_df.shape[1])
    
    test_samples = np.zeros((SAMPLES, test_data.shape[1]-1), dtype=np.float32)
    test_labels = np.zeros((SAMPLES, 1), dtype=np.float32)
    
    for row in range(SAMPLES):
        test_labels[row] = test_data[row][-1:]
        test_samples[row] = test_data[row][:-1]

    test_samples = np.expand_dims(test_samples, axis=1).astype(np.float32)
    
    # --- Initialize the interpreter
    interpreter = tf.lite.Interpreter(tflite_model_path)
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]
    
    
    # --- For quantized models, manually quantize the input data from float to integer    
    if model_type == "Quantized":
        input_scale, input_zero_point = input_details["quantization"]
        test_samples = test_samples / input_scale + input_zero_point
        test_samples = test_samples.astype(input_details["dtype"])

    
    # --- Evaluate the predictions    
    correct_predictions = 0
    for i in range(len(test_samples)):
        interpreter.set_tensor(input_details["index"], test_samples[i])
        interpreter.invoke()
        output = interpreter.get_tensor(output_details["index"])[0]
        top_prediction = output.argmax()
        correct_predictions += (top_prediction == test_labels[i])

    print('%s model accuracy is %f%% (Number of test samples=%d)' % (
        model_type, (correct_predictions * 100) / len(test_samples), len(test_samples)))

In [None]:
# Compute float model accuracy
run_tflite_inference_testSet(FLOAT_TFL_MODEL_FILENAME)

# Compute quantized model accuracy
run_tflite_inference_testSet(QUANTIZED_TFL_MODEL_FILENAME, model_type='Quantized')

## Generate TensorFlow Lite for Microcontrollers Model
Using the ```xxd``` tool to convert the ```.tflite``` TensorFlow Lite quantized model into a ```.cc``` C source file, that can be loaded by TensorFlow Lite for Microcontrollers on the Arduino.

In [None]:
# Convert to a C source file, i.e, a TensorFlow Lite for Microcontrollers model
!xxd -i {QUANTIZED_TFL_MODEL_FILENAME} > {TFL_CC_MODEL_FILENAME}

# Update variable names
REPLACE_TEXT = QUANTIZED_TFL_MODEL_FILENAME.replace('/', '_').replace('.', '_')
!sed -i 's/'{REPLACE_TEXT}'/g_magic_wand_model_data/g' {TFL_CC_MODEL_FILENAME}

That's it! You've successfully converted your TensorFlow Lite model into a TensorFlow Lite for Microcontrollers model! Run the cell below to print out its contents which we'll need for our next step, deploying the model using the Arudino IDE!

In [None]:
# Print the C source file
!cat {TFL_CC_MODEL_FILENAME}