In [None]:
from nntool.api import NNGraph
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import logging

logging.basicConfig(level = logging.ERROR)
import cv2
import glob
import os

In [None]:
# Configuration
dimension = 128

In [None]:
model = NNGraph.load_graph(f"YOLOv5_HeadCount_{dimension}x{dimension}.tflite", load_quantization = False)
model.name = "YOLOv5"

# Model show returns a table of information on the Graph
model.adjust_order()
print(model.show())

print(f"Total ops: {model.total_ops*1E-6:.0f} MFLOPs")
print(f"Total parameters: {model.total_memory_usage[1]*1E-3:.0f} K Item")
print(f"Total memory usage: {model.total_memory_usage[0]*1E-3:.1f} K Item")

In [None]:
# The equivalent of the fusions --scale8 command. The fusions method can be given a series of fusions to apply
model.fusions('scaled_match_group')

# model.remove_nodes(node_from = model["CONCAT_0_253"], node_to = None, up = False, leave = None)
# model.remove_nodes(node_from = model["LOGISTIC_0_243"], node_to = None, up = False, leave = True)

# model.remove_nodes(node_from = model["CONCAT_0_233"], node_to = None, up = False, leave = None)
# model.remove_nodes(node_from = model["LOGISTIC_0_230"], node_to = None, up = False, leave = True)

model.adjust_order()
print(model.show())

In [None]:
from PIL import Image
import numpy as np


class MyDataLoader():

    def __init__(self, image_files, max_idx = None, transpose_to_chw = True):
        self._file_list = image_files
        self._idx = 0
        self._max_idx = max_idx if max_idx is not None else len(image_files)
        self._transpose_to_chw = transpose_to_chw

    def __iter__(self):
        self._idx = 0
        return self

    def __next__(self):
        if self._idx >= self._max_idx:
            raise StopIteration()
        filename = self._file_list[self._idx]

        # Here we read the image and make it a numpy array
        image = Image.open(filename)
        img_array = np.array(image)
        #img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)

        # Apply some preprocessing
        img_array = cv2.resize(img_array, (dimension, dimension))
        img_array = img_array / 255.0
        img_array = img_array.astype(np.float32)

        # Detect if channel is the last or first
        if img_array.shape[2] <= 3 and self._transpose_to_chw:
            img_array = img_array.transpose(2, 0, 1)

        self._idx += 1
        return img_array

In [None]:
# The executer returns all the layer output. Each layer output is an array of the outputs from each output of a layer
# Generally layers have one output but some (like a split for example) can have multiple outputs
# Here we select the first output of the last layer which in a graph with one output will always be the the
# graph output
data_loader = MyDataLoader(glob.glob("input_images_HeadCount/*"), transpose_to_chw = False)
test_image = next(data_loader)

print(test_image.shape)

# Transpose for to HWC for imshow
# show_image = test_image.transpose(1, 2, 0)
plt.imshow(test_image)

Run these cells if you want to quantize


In [None]:
statistics = model.collect_statistics(data_loader)
fig = statistics.plot_node_stats(model[0])
fig = statistics.plot_node_stats(model[-1])
fig.show()

# fig = statistics.plot_all_stats()
# fig.show()

In [None]:
model.quantize(
    statistics,  #=None, # Already quantized in tflite --> use the statistics from the graph itself
    graph_options = {
        "scheme": "SQ8",
        "use_ne16": True,
        #            "scheme": "FLOAT",
        #            "float_type": "bfloat16",
        #            "force_input_size": 16,
        #            "force_output_size": 16,
        #            "force_external_size": 16,
        #            "weight_bits": 8,
        "hwc": True
    },
)
print(model.show())

In [None]:
print(f"Total ops: {model.total_ops*1E-6:.0f} MFLOPs")
print(f"Total parameters: {model.total_memory_usage[1]*1E-3:.0f} KB")
print(f"Total memory usage: {model.total_memory_usage[0]*1E-3:.1f} K Item")

In [None]:
# Load a new image to repeat the inference
data_loader._idx = 0
test_image = next(data_loader)

In [None]:
print("execute quantized model with dequantizing data")
print(test_image.shape)
output = model.execute(test_image, quantize = True, dequantize = True)

print(output[-1][0][0].shape)

# Print first 10 outputs
for i, (x, y, w, h, confidence, class_id) in enumerate(output[-1][0][0]):
    print("- %d: %f %f %f %f %f %f" % (i, x, y, w, h, confidence, class_id))


In [None]:
plt.figure()
plt.imshow(test_image)
ax = plt.gca()

label = output[-1][0]
for x, y, w, h, confidence, class_id in label[0]:
    if confidence > 0.4:

        x, w, y, h = x * dimension, w * dimension, y * dimension, h * dimension
         # Create a Rectangle patch
        rect = patches.Rectangle((x - w / 2, y - h / 2), w, h, linewidth = 1, edgecolor = 'b', facecolor = 'none')

        # Add the patch to the Axes
        ax.add_patch(rect)
plt.show()



In [None]:
quant_execution = model.execute(test_image, quantize = True, dequantize = False)
print([quant_execution[inp.step_idx][0].shape for inp in model.input_nodes()])

# Print first 10 outputs
for i, (x, y, w, h, confidence, class_id) in enumerate(quant_execution[-1][0][0]):
    print("- %d: %d %d %d %d %d %d" % (i, x, y, w, h, confidence, class_id))
    if i == 10:
        break

In [None]:
from nntool.api.utils import model_settings

quant_execution = model.execute(test_image, quantize = True, dequantize = False)
print([quant_execution[inp.step_idx][0].shape for inp in model.input_nodes()])

print(len(quant_execution))

input_tensors = [quant_execution[inp.step_idx][0] for inp in model.input_nodes()]
print(input_tensors[0].shape)

print(model.quantization[model.input_nodes()[0].name])
print(model.quantization[model.output_nodes()[0].name])

scale = model.quantization[model.output_nodes()[0].name].out_qs[0].scale[0]
zero_point = model.quantization[model.output_nodes()[0].name].out_qs[0].zero_point[0]

print(scale, zero_point)

plt.figure()
# plt.imshow(test_image)
plt.imshow(input_tensors[0])

# Get color order of the image
ax = plt.gca()

label = quant_execution[-1][0].astype(np.float32)
label = (label - zero_point) * scale
for i, (x, y, w, h, confidence, class_id) in enumerate(label[0]):
    if confidence > 0.4:
        x, w, y, h = x * dimension, w * dimension, y * dimension, h * dimension
        # Create a Rectangle patch
        rect = patches.Rectangle((x - w / 2, y - h / 2), w, h, linewidth = 1, edgecolor = 'b', facecolor = 'none')

        # Add the patch to the Axes
        ax.add_patch(rect)
plt.show()


In [None]:
current_dir = os.path.abspath("")
gen_dir = os.path.join(current_dir, f"../../src/networks/YOLOv5/generated_{dimension}x{dimension}/")

# Normalized the path
gen_dir = os.path.normpath(gen_dir)
print(f"Using generated directory: {gen_dir}")

In [None]:
if dimension >= 448:
  input_location = 'AT_MEM_L3_DEFAULTRAM'
else:
  input_location = 'AT_MEM_L2'

res = model.gen_project(directory = gen_dir,
                        input_tensors = [quant_execution[inp.step_idx][0] for inp in model.input_nodes()],
                        settings = model_settings(graph_async_fork = True,
                                                  l3_flash_device = 'AT_MEM_L3_MRAMFLASH',
                                                  default_input_home_location = input_location,
                                                  default_input_exec_location = input_location,
                                                  l3_ram_ext_managed=True,
                                                  tensor_directory = './weights_tensors'),
                        at_loglevel = 2)

In [None]:
if False:
    res = model.execute_on_target(directory = gen_dir,
                                  input_tensors = [quant_execution[inp.step_idx][0] for inp in model.input_nodes()],
                                  check_on_target = False,
                                  settings = model_settings(l1_size = 128000,
                                                            l2_size = 300000,
                                                            graph_async_fork = True,
                                                            l3_flash_device = 'AT_MEM_L3_MRAMFLASH',
                                                            tensor_directory = './weights_tensors'),
                                #   do_clean = True,
                                  print_output = True,
                                  at_loglevel = 2)