# End-to-End FINN Flow  for MobileNet-V1
-------------------------------------------------------------


In [5]:
from PIL import Image
import numpy as np
import brevitas.onnx as bo
import torch

# get single image as input
img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg")
img = img.resize((224, 224))
img = np.asarray(img).copy().astype(np.int32)
img = img.transpose(2, 0, 1)
# our network is trained with BGR instead of RGB images,
# so we need to invert the order of channels in the channel axis:
img = img[::-1, :, :].copy()
# finally, we need to subtract the mean per-channel pixel intensity
# since this is how this network has been trained
img[0] = img[0] - 104
img[1] = img[1] - 117
img[2] = img[2] - 123
img = img.reshape(1, 3, 224, 224)
input_tensor = torch.from_numpy(img).float()
assert input_tensor.shape == (1, 3, 224, 224)

In [6]:
from finn.util.test import get_test_model_trained
mobilenet = get_test_model_trained("mobilenet", 4, 4)

In [7]:
# golden output
# do forward pass in PyTorch/Brevitas
mobilenet.eval()
expected = mobilenet.forward(input_tensor).detach().numpy()
expected_topk = expected.flatten()
expected_top5 = np.argsort(expected_topk)[-5:]
expected_top5 = np.flip(expected_top5)
expected_top5_prob = []
for index in expected_top5:
    expected_top5_prob.append(expected_topk[index])

In [6]:
from finn.core.modelwrapper import ModelWrapper

bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), "quant_mobilenet_v1_4b.onnx", input_t=input_tensor)
model = ModelWrapper("quant_mobilenet_v1_4b.onnx")

In [7]:
from finn.util.visualization import showInNetron
showInNetron("quant_mobilenet_v1_4b.onnx")

Serving 'quant_mobilenet_v1_4b.onnx' at http://0.0.0.0:8081


In [1]:
from finn.core.modelwrapper import ModelWrapper
from finn.util.visualization import showInNetron
model = ModelWrapper("quant_mobilenet_v1_4b.onnx")

from finn.transformation.infer_shapes import InferShapes
from finn.transformation.infer_data_layouts import InferDataLayouts
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.general import (
    GiveReadableTensorNames,
    GiveUniqueNodeNames,
    GiveUniqueParameterTensors,
)
from finn.transformation.insert_topk import InsertTopK
import finn.transformation.streamline.absorb as absorb

# tidy-up transformations
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(InsertTopK())
# get initializer from Mul that will be absorbed into topk
a0 = model.get_initializer(model.graph.node[-2].input[1])
model = model.transform(absorb.AbsorbScalarMulIntoTopK())
model = model.transform(InferShapes())
model = model.transform(InferDataTypes())
model = model.transform(InferDataLayouts())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveUniqueParameterTensors())
model = model.transform(GiveReadableTensorNames())

model.save("quant_mobilenet_v1_4b_before.onnx")
showInNetron("quant_mobilenet_v1_4b_before.onnx")



Serving 'quant_mobilenet_v1_4b_before.onnx' at http://0.0.0.0:8081


In [2]:
from finn.transformation.streamline import Streamline
from finn.transformation.double_to_single_float import DoubleToSingleFloat
from finn.transformation.streamline.reorder import (
    MoveMulPastDWConv,
    MoveTransposePastScalarMul,
    MoveFlattenPastAffine,
    MoveFlattenPastTopK,
    MoveScalarMulPastMatMul,
)
from finn.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
from finn.transformation.streamline.collapse_repeated import CollapseRepeatedMul
from finn.transformation.streamline.remove import RemoveIdentityOps

model = ModelWrapper("quant_mobilenet_v1_4b_before.onnx")
model = model.transform(Streamline())
model = model.transform(DoubleToSingleFloat())
model = model.transform(MoveMulPastDWConv())
model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
model = model.transform(ChangeDataLayoutQuantAvgPool2d())
model = model.transform(InferDataLayouts())
model = model.transform(MoveTransposePastScalarMul())
model = model.transform(absorb.AbsorbTransposeIntoFlatten())
model = model.transform(MoveFlattenPastAffine())
model = model.transform(MoveFlattenPastTopK())
model = model.transform(MoveScalarMulPastMatMul())
model = model.transform(CollapseRepeatedMul())
model = model.transform(RemoveIdentityOps())

model.save("quant_mobilenet_v1_4b_streamlined.onnx")
showInNetron("quant_mobilenet_v1_4b_streamlined.onnx")


Stopping http://0.0.0.0:8081
Serving 'quant_mobilenet_v1_4b_streamlined.onnx' at http://0.0.0.0:8081


In [3]:
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold

model = ModelWrapper("quant_mobilenet_v1_4b_streamlined.onnx")

model = model.transform(LowerConvsToMatMul())
model = model.transform(AbsorbTransposeIntoMultiThreshold())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())

model.save("quant_mobilenet_v1_4b.onnx")
showInNetron("quant_mobilenet_v1_4b.onnx")


Stopping http://0.0.0.0:8081
Serving 'quant_mobilenet_v1_4b.onnx' at http://0.0.0.0:8081


In [8]:
import finn.core.onnx_exec as oxe
model = ModelWrapper("quant_mobilenet_v1_4b.onnx")
idict = {model.graph.input[0].name: img.astype(np.float32)}
odict = oxe.execute_onnx(model, idict, True)
produced = odict[model.graph.output[0].name]
produced_prob = odict["TopK_0_out0"] * a0

In [9]:
print(expected_top5)
print(produced)
print(expected_top5_prob)
print(produced_prob)

[219 220 213 365 156]
[[219 220 213 365 156]]
[14.313654, 12.159303, 11.355986, 10.625698, 9.238149]
[[[[14.313654 12.159303 11.355986 10.625697  9.23815 ]]]]


In [18]:
showInNetron("quant_mobilenet_v1_4b.onnx")


Stopping http://0.0.0.0:8081
Serving 'quant_mobilenet_v1_4b.onnx' at http://0.0.0.0:8081
