# Overview
![Flow](img/evolution.jpg)


In [55]:
import torch
import torch.nn as nn
import brevitas.nn as qnn
from brevitas.export import export_qonnx
from finn.util.visualization import showInNetron

# Netron Port Settings
iport=8081
fport=8082

IN_CH = 3
OUT_CH = 128
BATCH_SIZE = 1

# set seed
torch.manual_seed(0)
inp = torch.randn(BATCH_SIZE, IN_CH)

linear_path = 'linear_qonnx.onnx'
qlinear_path = 'quant_linear_qonnx.onnx'

### Step 1, Pytorch model

create and export a pytorch model to standard onnx graph, in this example we have a GEMM node for the linear layer.

In [57]:
linear = nn.Sequential(
    nn.Linear(IN_CH, OUT_CH, bias=True),
    nn.ReLU()
)

exported_model = export_qonnx(linear, args=inp, export_path=linear_path, opset_version=13)
showInNetron(linear_path,localhost_url="localhost", port=iport, forwarded_port=fport)


Stopping http://0.0.0.0:8081


Serving 'linear_qonnx.onnx' at http://0.0.0.0:8081


### Step 2, Quantize

Convert the model to a quantized mode, see Brevitas documentation on QAT or PTQ methods for training. 
the Brevitas API replaces the Pytorch NN APIs with the addition of bit width parameters.


In [58]:

qlinear = nn.Sequential(
    qnn.QuantIdentity(bit_width=4, return_quant_tensor=True),
    qnn.QuantLinear(IN_CH, OUT_CH, bias=True, weight_bit_width=4),
    qnn.QuantReLU(bit_width=4)
)

# qlinear = qnn.QuantLinear(IN_CH, OUT_CH, bias=True, weight_bit_width=4)
exported_model = export_qonnx(qlinear, args=inp, export_path=qlinear_path, opset_version=13)

showInNetron(qlinear_path,localhost_url="localhost", port=iport, forwarded_port=fport)

Stopping http://0.0.0.0:8081
Serving 'quant_linear_qonnx.onnx' at http://0.0.0.0:8081


Next step is to feed this model into FINN.

# QONNX To FINN ONNX

In [59]:
from qonnx.core.modelwrapper import ModelWrapper
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.util.cleanup import cleanup_model
from qonnx.core.datatype import DataType

model = ModelWrapper(qlinear_path)
inp_name = model.graph.input[0].name
model.set_tensor_datatype(inp_name, DataType["UINT4"])
model = cleanup_model(model)
model = model.transform(ConvertQONNXtoFINN())
        
model.save("qonnx_2_finn.onnx")

showInNetron("qonnx_2_finn.onnx",localhost_url="localhost", port=iport, forwarded_port=fport)

Stopping http://0.0.0.0:8081




Serving 'qonnx_2_finn.onnx' at http://0.0.0.0:8081


# Streamline 

It is possible at this point to run CPP Sim to generate reference values

In [60]:
from qonnx.transformation.general import GiveUniqueNodeNames
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.infer_shapes import InferShapes
from finn.builder.build_dataflow_steps import step_streamline

import finn.builder.build_dataflow_config as build_cfg

cfg = build_cfg.DataflowBuildConfig(
    verbose=True,
    output_dir = "output",
    fpga_part="xcvm1802-vsvd1760-2MP-e-S",
    synth_clk_period_ns=3.0,
    generate_outputs=[],
    standalone_thresholds=True
)


model = model.transform(InferShapes())
model = model.transform(InferDataTypes())
model = model.transform(GiveUniqueNodeNames())

model = step_streamline(model, cfg)

model.save("prep.onnx")

netron.stop(("0.0.0.0", iport))
showInNetron("prep.onnx",localhost_url="localhost", port=iport, forwarded_port=fport)

Stopping http://0.0.0.0:8081
Serving 'prep.onnx' at http://0.0.0.0:8081




# Convert to HW

In [61]:
from finn.transformation.fpgadataflow.convert_to_hw_layers import InferThresholdingLayer

model = model.transform(InferThresholdingLayer())
model.save("infer_threshold.onnx")
netron.stop(("0.0.0.0", iport))
showInNetron("infer_threshold.onnx",localhost_url="localhost", port=iport, forwarded_port=fport)

Stopping http://0.0.0.0:8081
Serving 'infer_threshold2.onnx' at http://0.0.0.0:8081


# Convert all OPS to FINN HW Ops

In [62]:
from finn.builder.build_dataflow_steps import step_convert_to_hw

model = step_convert_to_hw(model,cfg)
model.save("step_convert_to_hw.onnx")
netron.stop(("0.0.0.0", iport))
showInNetron("step_convert_to_hw.onnx",localhost_url="localhost", port=iport, forwarded_port=fport)

Stopping http://0.0.0.0:8081
Serving 'step_convert_to_hw.onnx' at http://0.0.0.0:8081


# Specialize Layers

Select RTL module