## Setup Environment

Import Dependencies

In [None]:
# TensorFlow is an open source machine learning library
import tensorflow as tf
from tensorflow import keras
# Numpy is a math library
import numpy as np
# Pandas is a data manipulation library
import pandas as pd

from matplotlib import pyplot as plt
import seaborn as sns

# Useful function for dealing with data, and classical machine learning
from sklearn import preprocessing
from sklearn.metrics import classification_report, confusion_matrix
# Define paths to model files
import os
import sys

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

%load_ext autoreload
%autoreload 2

from helpers import (read_data,  # noqa E402
                     create_segments_and_labels,
                     save_converted_model,
                     compare)

# Set some standard parameters upfront
pd.options.display.float_format = '{:.1f}'.format
sns.set()  # Default seaborn look and feel
plt.style.use('ggplot')
print('keras version ', keras.__version__)
%matplotlib inline

MODELS_DIR = '../models/cnn/'
MODEL_TF = MODELS_DIR + 'saved2'
MODEL_NO_QUANT_TFLITE = MODELS_DIR + 'model_no_quant.tflite'
MODEL_TFLITE = MODELS_DIR + 'model.tflite'
MODEL_TFLITE_MICRO = MODELS_DIR + 'model.cc'
# Set seed for experiment reproducibility
seed = 1
np.random.seed(seed)
tf.random.set_seed(seed)

## Get test data

In [None]:
# The number of steps within one time segment
TIME_PERIODS = 80
# The steps to take from one segment to the next; if this value is equal to
# TIME_PERIODS, then there is no overlap between the segments
STEP_DISTANCE = 40

In [None]:
df1 = read_data('../data/data_adem.txt')
df2 = read_data('../data/data_mathis.txt')
df2 = df2[df2['activity'] != 'Jogging']

df = pd.concat([df1, df2])
# df = df1
df

In [None]:
# Get labels from data
LABELS = df["activity"].unique()
print(LABELS)

In [None]:
# Define column name of the label vector
LABEL = "ActivityEncoded"
# Transform the labels from String to Integer via LabelEncoder
le = preprocessing.LabelEncoder()
# Add a new column to the existing DataFrame with the encoded values
df[LABEL] = le.fit_transform(df["activity"].values.ravel())

In [None]:
df = df.round({'x-axis': 6, 'y-axis': 6, 'z-axis': 6})
x_test, y_test = create_segments_and_labels(df,
                                            TIME_PERIODS,
                                            STEP_DISTANCE,
                                            LABEL)

## Generate a TensorFlow Lite Model

### 1. Generate Models with or without Quantization
We now have an acceptably accurate model. We'll use the [TensorFlow Lite Converter](https://www.tensorflow.org/lite/convert) to convert the model into a special, space-efficient format for use on memory-constrained devices.

Since this model is going to be deployed on a microcontroller, we want it to be as tiny as possible! One technique for reducing the size of a model is called [quantization](https://www.tensorflow.org/lite/performance/post_training_quantization). It reduces the precision of the model's weights, and possibly the activations (output of each layer) as well, which saves memory, often without much impact on accuracy. Quantized models also run faster, since the calculations required are simpler.

In the following cell, we'll convert the model twice: once with quantization, once without.

In [None]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_saved_model(MODEL_TF)
model_no_quant_tflite = converter.convert()

# Save the model to disk
open(MODEL_NO_QUANT_TFLITE, "wb").write(model_no_quant_tflite)

# Convert the model to the TensorFlow Lite format with quantization


def representative_dataset():
    for x in tf.data.Dataset.from_tensor_slices((x_test)).batch(1).take(100):
        yield [tf.dtypes.cast(x, tf.float32)]


# Set the optimization flag.
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Enforce integer only quantization
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
# Provide a representative dataset to ensure we quantize correctly.
converter.representative_dataset = representative_dataset
model_tflite = converter.convert()

# Save the model to disk
open(MODEL_TFLITE, "wb").write(model_tflite)

### 2. Compare Model Performance

To prove these models are accurate even after conversion and quantization, we'll compare their predictions and loss on our test dataset.

**Helper functions**

We define the `predict` (for predictions) and `evaluate` (for loss) functions for TFLite models. *Note: These are already included in a TF model, but not in  a TFLite model.*

In [None]:
num_time_periods, num_sensors = x_test.shape[1], x_test.shape[2]
input_shape = (TIME_PERIODS, num_sensors)

In [None]:
def predict_tflite(tflite_model, x_test):
    # Prepare the test data
    x_test_ = x_test.copy()

    # Initialize the TFLite interpreter
    interpreter = tf.lite.Interpreter(model_content=tflite_model)
    interpreter.resize_tensor_input(0, [1, input_shape[0], input_shape[1]],
                                    strict=True)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    # If required, quantize the input layer (from float to integer)
    input_scale, input_zero_point = input_details["quantization"]
    if (input_scale, input_zero_point) != (0.0, 0):
        x_test_ = x_test_ / input_scale + input_zero_point
        x_test_ = x_test_.astype(input_details["dtype"])

    # Invoke the interpreter
    y_pred = []
    for i in range(len(x_test_)):
        interpreter.set_tensor(input_details["index"], [x_test_[i]])
        interpreter.invoke()
        y_pred.append(interpreter.get_tensor(output_details["index"]))
    y_pred = np.asarray(y_pred, dtype=output_details['dtype'])
    # If required, dequantized the output layer (from integer to float)
    output_scale, output_zero_point = output_details["quantization"]
    if (output_scale, output_zero_point) != (0.0, 0):
        y_pred = y_pred.astype(np.float32)
        y_pred = (y_pred - output_zero_point) * output_scale

    return y_pred


def evaluate_tflite(tflite_model, x_test, y_true):
    global model
    y_pred = predict_tflite(tflite_model, x_test)
    loss_function = tf.keras.losses.get(model.loss)
    loss = loss_function(y_true, np.argmax(y_pred, axis=1)).numpy()
    return loss

**1. Predictions**

In [None]:
model = tf.keras.models.load_model(MODEL_TF)
print(model.summary())

In [None]:
# Calculate predictions
y_test_pred_tf = model.predict(x_test)
y_test_pred_no_quant_tflite = predict_tflite(model_no_quant_tflite, x_test)
y_test_pred_tflite = predict_tflite(model_tflite, x_test)

In [None]:
y_test_pred_no_quant_tflite = y_test_pred_no_quant_tflite.reshape(-1, 6)
y_test_pred_tflite = y_test_pred_tflite.reshape(-1, 6)

In [None]:
print('========== TensorFlow ========== \n')
max_y_pred_tf = np.argmax(y_test_pred_tf, axis=1)
cf_matrix = pd.DataFrame(confusion_matrix(y_test, max_y_pred_tf),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix/np.sum(cf_matrix), annot=True, fmt='.2%', cmap='Greens')
print(classification_report(y_test, max_y_pred_tf))

In [None]:
print('======== TensorFlowLite ======== \n')
max_y_pred_nq_tflite = np.argmax(y_test_pred_no_quant_tflite, axis=1)
cf_matrix = pd.DataFrame(confusion_matrix(y_test, max_y_pred_nq_tflite),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix/np.sum(cf_matrix), annot=True, fmt='.2%', cmap='Greens')
print(classification_report(y_test, max_y_pred_nq_tflite))

In [None]:
print('======= TFLite Quantized ======= \n')
max_y_pred_tflite = np.argmax(y_test_pred_tflite, axis=1)
cf_matrix = pd.DataFrame(confusion_matrix(y_test, max_y_pred_tflite),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix/np.sum(cf_matrix), annot=True, fmt='.2%', cmap='Greens')
print(classification_report(y_test, max_y_pred_tflite))

**2. Loss (MSE/Mean Squared Error)**

In [None]:
# Calculate loss
y_true = np.array(pd.get_dummies(y_test))
loss_function = tf.keras.losses.CategoricalCrossentropy()
loss_tf, _ = model.evaluate(x_test, y_true, verbose=0)
loss_nq_tflite = loss_function(y_true, y_test_pred_no_quant_tflite).numpy()
loss_tflite = loss_function(y_true, y_test_pred_tflite).numpy()

In [None]:
# Compare loss
df = pd.DataFrame.from_records(
    [["TensorFlow", loss_tf],
     ["TensorFlow Lite", loss_nq_tflite],
     ["TensorFlow Lite Quantized", loss_tflite]],
    columns=["Model", "Loss/MSE"], index="Model").round(4)
df

**3. Size**

In [None]:
def get_total_size(dirpath):
    size = 0
    for path in os.listdir(dirpath):
        if os.path.isfile(dirpath + '/' + path):
            size += os.path.getsize(dirpath + '/' + path)
        elif os.path.isdir(dirpath + '/' + path):
            size += get_total_size(dirpath + '/' + path)
    return size

In [None]:
# Calculate size
size_tf = get_total_size(MODEL_TF)
size_nq_tflite = os.path.getsize(MODEL_NO_QUANT_TFLITE)
size_tflite = os.path.getsize(MODEL_TFLITE)

tf_to_lite = 100*(size_tf - size_nq_tflite)/size_tf
lite_to_quant = 100*(size_nq_tflite - size_tflite)/size_nq_tflite
tf_to_quant = 100*(size_tf - size_tflite)/size_tf

In [None]:
# Compare size
pd.DataFrame.from_records(
    [["TensorFlow", f"{size_tf} bytes", ""],
     ["TensorFlow Lite", f"{size_nq_tflite} bytes ",
      f"(reduced by  {tf_to_lite:.1f}%)"],
     ["TensorFlow Lite Quantized", f"{size_tflite} bytes",
      f"(reduced by {lite_to_quant:.1f}%, total: {tf_to_quant:.1f}%)"]],
    columns=["Model", "Size", ""], index="Model")

**Summary**

We can see from the predictions (graph) and loss (table) that the original TF model, the TFLite model, and the quantized TFLite model are all close enough to be indistinguishable - even though they differ in size (table). This implies that the quantized (smallest) model is ready to use!

*Note: The quantized (integer) TFLite model is just 300 bytes smaller than the original (float) TFLite model - a tiny reduction in size! This is because the model is already so small that quantization has little effect. Complex models with more weights, can have upto a 4x reduction in size!*

## Generate a TensorFlow Lite for Microcontrollers Model
Convert the TensorFlow Lite quantized model into a C source file that can be loaded by TensorFlow Lite for Microcontrollers.

In [None]:
# Install xxd if it is not available
# !apt-get update && apt-get -qq install xxd
# Convert to a C source file, i.e, a TensorFlow Lite for Microcontrollers model
# !xxd -i {MODEL_TFLITE} > {MODEL_TFLITE_MICRO}
# Update variable names
# REPLACE_TEXT = MODEL_TFLITE.replace('/', '_').replace('.', '_')
# !sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}

## Deploy to a Microcontroller

Follow the instructions in the [hello_world](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/hello_world) README.md for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview) to deploy this model on a specific microcontroller.

**Reference Model:** If you have not modified this notebook, you can follow the instructions as is, to deploy the model. Refer to the [`hello_world/train/models`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/train/models) directory to access the models generated in this notebook.

**New Model:** If you have generated a new model, then update the values assigned to the variables defined in [`hello_world/model.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/model.cc) with values displayed after running the following cell.

In [None]:
# Print the C source file
# !cat {MODEL_TFLITE_MICRO}