> Copyright 2022 University of Luxembourg
> 
> Licensed under the Apache License, Version 2.0 (the "License");  
> you may not use this file except in compliance with the License.  
> You may obtain a copy of the License at  
>
>    https://www.apache.org/licenses/LICENSE-2.0
>
> Unless required by applicable law or agreed to in writing, software  
> distributed under the License is distributed on an "AS IS" BASIS,  
> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
> See the License for the specific language governing permissions and  
> limitations under the License.  
>
***

Author: André Stemper (andre.stemper@uni.lu)

***

# Quantize model to tflite model


To save a model use:
model.save("model.h5")


In [None]:
import tensorflow as tf
import numpy as np
import os

### Settings


In [None]:
tf_model_filename = "atmonsat_model.h5"  # name of the keras model to load
tf_lite_model_filename = "atmonsat_model.tflite"  # name of the .tflite model
cc_header_filename = "atmonsat_model.h"  # cc header file
cc_source_filename = "atmonsat_model.cc"  # cc implementation
guard_name = "ATMONSAT_MODEL"  # name of the header guard
variable_name = "atmonsat_model" # name of the variable that points to the model data


In [None]:
print("Loading keras model: {}".format(tf_model_filename))
print("Converting to tflite model: {}".format(tf_lite_model_filename))
print("Converting to cc source file: {}".format(cc_source_filename))
print("Converting to cc header file: {}".format(cc_header_filename))
print("Header guard: __{}_H__".format(guard_name))
print("Variable to access the tflite model: {}".format(variable_name))


### Load model


In [None]:
if not os.path.exists(tf_model_filename):
    raise FileExistsError(
        "Cannot find TF model file {}. Aborting.".format(tf_model_filename))
model = tf.keras.models.load_model(tf_model_filename)


### View summary


In [None]:
model.summary()
print(model.input.shape)
print(model.output.shape)


### Convert to tflite model


In [None]:
quantization_profile = 0 

### Load calibration tensor


In [None]:
filename = 'calibration_tensor.npy'
calibration_input = np.load(filename)
print(calibration_input.shape)
np.max(np.amax(calibration_input, axis=0), axis=0)
print(calibration_input[200, 0, :])


#### Default quantization


In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tf_lite_model = converter.convert()


### Analyze mode after quantization


In [None]:
tf.lite.experimental.Analyzer.analyze(model_content=tf_lite_model)


### Generate model outputs for calibration data with non-quantized model


In [None]:
import matplotlib.pyplot as plt


In [None]:
test_plot_range = range(4)
test_scale = (-500, 500)


In [None]:
if 'calibration_input' in locals():
    calibration_output = model(np.array(calibration_input))
    print(calibration_output[0, :])

    for i in test_plot_range:
        plt.ylim(test_scale)
        plt.plot(calibration_output[:, i])


### Generate model outputs for calibration data with quantized model


In [None]:
if 'calibration_input' in locals():
    interpreter = tf.lite.Interpreter(model_content=tf_lite_model)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.allocate_tensors()

    input_scale = input_details[0]['quantization'][0]
    input_zero_point = input_details[0]['quantization'][1]
    output_scale = output_details[0]['quantization'][0]
    output_zero_point = output_details[0]['quantization'][1]

    quantized_model_outputs = []

    for input_unscaled in calibration_input:
        if quantization_profile == 2:
            input_scaled = np.array(
                (input_unscaled / input_scale) + input_zero_point, dtype=np.int8).reshape(1, 150, 18)
        elif quantization_profile == 3:
            input_scaled = np.array(
                (input_unscaled / input_scale) + input_zero_point, dtype=np.uint8).reshape(1, 150, 18)
        else:
            input_scaled = np.array(
                input_unscaled, dtype=np.float32).reshape(1, 150, 18)

        interpreter.set_tensor(input_details[0]['index'], input_scaled)
        interpreter.invoke()
        output_scaled = interpreter.get_tensor(output_details[0]['index'])
        if (quantization_profile == 2) or (quantization_profile == 3):
            output_unscaled = (
                output_scaled - output_zero_point) * output_scale
        else:
            output_unscaled = output_scaled
        quantized_model_outputs.append(output_unscaled[0])

    quantized_model_outputs = np.array(quantized_model_outputs)

    print(quantized_model_outputs.shape)
    print(quantized_model_outputs[0, :])

    for i in test_plot_range:
        plt.ylim(test_scale)
        plt.plot(quantized_model_outputs[:, i])


In [None]:
if 'calibration_input' in locals():
    for i in range(1):
        plt.plot(np.abs(quantized_model_outputs[:, i]-calibration_output[:, i]))


### Save to .tflite file


In [None]:
with open(tf_lite_model_filename, 'wb') as file:
    file.write(tf_lite_model)


### Convert to C header


In [None]:
copyright="""/*
 Copyright 2022 University of Luxembourg

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

      https://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
*/

"""

In [None]:
def to_chunks(hex_data, characters_per_line=80, characters_per_value=6):
    values_per_line = int(characters_per_line / characters_per_value)
    return '  '+',\n  '.join([', '.join(hex_data[i:i + values_per_line]) for i in range(0, len(hex_data), values_per_line)])


def to_cc(data, header_filename, guard_name, variable_name):
    global copyright
    lines = copyright.splitlines()
    lines.append("")
    lines.append("#include <cstdint>")
    lines.append("#include <{}>".format(header_filename))
    lines.append("")
    lines.append("/**")
    lines.append(" * @brief Quantized tensorflow lite model size in bytes")
    lines.append(" */")
    lines.append("const uint32_t {}_size = {};".format(
        variable_name, str(len(data))))
    lines.append("")
    lines.append("/**")
    lines.append(" * @brief Quantized tensorflow lite model")
    lines.append(" */")
    lines.append("alignas(16) const uint8_t {}[] = {{".format(variable_name))
    lines.append(to_chunks([format(v, '#04x') for v in data]))
    lines.append("};")
    return '\n'.join(lines)


def to_h(data, guard_name, variable_name):
    global copyright
    lines = copyright.splitlines()
    lines.append("#ifndef __{}_H__".format(guard_name.upper()))
    lines.append("#define __{}_H__".format(guard_name.upper()))
    lines.append("")
    lines.append("#include <cstdint>")
    lines.append("")
    lines.append("#ifdef __cplusplus")
    lines.append("extern \"C\"")
    lines.append("{")
    lines.append("#endif")
    lines.append("")
    lines.append("/**")
    lines.append(" * @brief Quantized tensorflow lite model size in bytes")
    lines.append(" */")
    lines.append("extern const uint32_t {}_size;".format(variable_name))
    lines.append("")
    lines.append("/**")
    lines.append(" * @brief Quantized tensorflow lite model")
    lines.append(" */")
    lines.append("extern const uint8_t {}[];".format(variable_name))
    lines.append("")
    lines.append("#ifdef __cplusplus")
    lines.append("}")
    lines.append("#endif")
    lines.append("")
    lines.append("#endif")
    return '\n'.join(lines)


with open(cc_source_filename, 'w') as file:
    file.write(to_cc(tf_lite_model, cc_header_filename,
               guard_name, variable_name))

with open(cc_header_filename, 'w') as file:
    file.write(to_h(tf_lite_model, guard_name, variable_name))
