In [1]:
import torch.nn as nn
import torch.nn.functional as F
import brevitas.nn as qnn
import numpy as np

In [2]:
class BrevitasIEEE(nn.Module):
    def __init__(self, num_classes=8):
        super().__init__()
        myWeight_bit_width = 2
        self.features = nn.Sequential(
            nn.Identity(),

            qnn.QuantConv2d(1, 16, kernel_size=(1, 5),
                            weight_bit_width=myWeight_bit_width),
            qnn.QuantReLU(inplace=True, weight_bit_width=myWeight_bit_width),
            qnn.QuantConv2d(16, 32, kernel_size=(1, 5),
                            weight_bit_width=myWeight_bit_width),
            qnn.QuantReLU(inplace=True, weight_bit_width=myWeight_bit_width),
            nn.MaxPool2d(kernel_size=(1, 4), stride=(1, 4)),

            qnn.QuantConv2d(32, 16, kernel_size=(1, 3),
                            weight_bit_width=myWeight_bit_width),
            qnn.QuantReLU(inplace=True, weight_bit_width=myWeight_bit_width),
            qnn.QuantConv2d(16, 16, kernel_size=(1, 3),
                            weight_bit_width=myWeight_bit_width),
            qnn.QuantReLU(inplace=True, weight_bit_width=myWeight_bit_width),
            nn.MaxPool2d(kernel_size=(1, 2), stride=(1, 2)),
        )

        self.classifier = nn.Sequential(
            qnn.QuantLinear(16 * 13, num_classes, bias=True,
                            weight_bit_width=myWeight_bit_width),
            # qnn.QuantLinear(10, num_classes, bias=False,
            #                 weight_bit_width=myWeight_bit_width),
            # qnn.QuantLinear(40, num_classes, bias=False,
            #                 weight_bit_width=myWeight_bit_width, return_quant_tensor=True),
        )

    def forward(self, x):
        x = self.features(x)
        # x = x.view(x.size(0), 128 * 28)
        x = x.reshape(x.shape[0], -1)
        x = self.classifier(x)
        return x


In [3]:
model = BrevitasIEEE()

In [4]:
import torch

# trained_state_dict = torch.load("brevitas.pth")["models_state_dict"][0]
trained_state_dict = torch.load("brevitas09.pth")["state_dict"]

# print(trained_state_dict)

model.load_state_dict(trained_state_dict, strict=False)

<All keys matched successfully>

In [5]:
from copy import deepcopy

modified_model = deepcopy(model)

# print(modified_model.features[0].weight.data.detach().numpy())
W_orig = modified_model.features[1].weight.data.detach().numpy()
W_orig.shape

(16, 1, 1, 5)

In [6]:
from brevitas.core.quant import QuantType
from brevitas.nn import QuantIdentity


class BrevitasIEEEForExport(nn.Module):
    def __init__(self, my_pretrained_model):
        super(BrevitasIEEEForExport, self).__init__()
#         self.qnt_input = QuantIdentity(quant_type=QuantType.FP, bit_width=32)
        self.pretrained = my_pretrained_model
        self.qnt_output = QuantIdentity(quant_type=QuantType.INT, bit_width=8)
    
    def forward(self, x):
        # assume x contains bipolar {-1,1} elems
        # shift from {-1,1} -> {0,1} since that is the
        # input range for the trained network
#         x = (x + torch.tensor([1.0])) / 2.0  
        out_original = self.pretrained(x)
        out_final = self.qnt_output(out_original)   # output as {-1,1}     
        return out_final

model_for_export = BrevitasIEEEForExport(model)

In [7]:
# import brevitas.onnx as bo
# from brevitas.quant_tensor import QuantTensor

# ready_model_filename = "brevitas07-IEEE-ready.onnx"
# input_shape = (1,1,1,128)
# # create a QuantTensor instance to mark input as bipolar during export
# input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
# # input_a = 2 * input_a - 1
# scale = 1.0
# input_t = torch.from_numpy(input_a * scale)
# input_qt = QuantTensor(
#     input_t, scale=torch.tensor(scale), bit_width=torch.tensor(32.0), signed=True
# )

# bo.export_finn_onnx(
#     model, export_path=ready_model_filename, input_t=input_qt
# )

# print("Model saved to %s" % ready_model_filename)
import brevitas.onnx as bo
from brevitas.quant_tensor import QuantTensor

ready_model_filename = "brevitas09-IEEE-ready.onnx"
input_shape = (1,1,1,128)
# create a QuantTensor instance to mark input as bipolar during export
input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
# input_a = 2 * input_a - 1
scale = 1.0

input_t = torch.from_numpy(input_a * scale)
input_qt = QuantTensor(
    input_t, scale=torch.tensor(scale), bit_width=torch.tensor(8.0), signed=True
)

# print(input_qt)
bo.export_finn_onnx(
    model_for_export, export_path=ready_model_filename, input_t=input_qt
)

print("Model saved to %s" % ready_model_filename)

Model saved to brevitas09-IEEE-ready.onnx


In [8]:
from finn.util.visualization import showInNetron

showInNetron(ready_model_filename)

Serving 'brevitas09-IEEE-ready.onnx' at http://0.0.0.0:8081


In [9]:
pred = model(input_t)
print(pred)

tensor([[ 0.0193, -0.0887, -0.3648, -0.3569, -0.1951, -0.1704, -0.3849, -0.2674]],
       grad_fn=<AddmmBackward>)


In [10]:
from finn.core.modelwrapper import ModelWrapper

ready_model_filename = "brevitas09-IEEE-ready.onnx"
model_for_sim = ModelWrapper(ready_model_filename)
# dir(model_for_sim)

In [11]:
from finn.core.datatype import DataType

finnonnx_in_tensor_name = model_for_sim.graph.input[0].name
finnonnx_out_tensor_name = model_for_sim.graph.output[0].name
print("Input tensor name: %s" % finnonnx_in_tensor_name)
print("Output tensor name: %s" % finnonnx_out_tensor_name)
finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)
finnonnx_model_out_shape = model_for_sim.get_tensor_shape(finnonnx_out_tensor_name)
print("Input tensor shape: %s" % str(finnonnx_model_in_shape))
print("Output tensor shape: %s" % str(finnonnx_model_out_shape))
finnonnx_model_in_dt = model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)
finnonnx_model_out_dt = model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)
print("Input tensor datatype: %s" % str(finnonnx_model_in_dt.name))
print("Output tensor datatype: %s" % str(finnonnx_model_out_dt.name))
print("List of node operator types in the graph: ")
print([x.op_type for x in model_for_sim.graph.node])

Input tensor name: 0
Output tensor name: 78
Input tensor shape: [1, 1, 1, 128]
Output tensor shape: [1, 8]
Input tensor datatype: INT8
Output tensor datatype: FLOAT32
List of node operator types in the graph: 
['Mul', 'Conv', 'Mul', 'Add', 'MultiThreshold', 'Mul', 'Conv', 'Mul', 'Add', 'MultiThreshold', 'Mul', 'MaxPool', 'Conv', 'Mul', 'Add', 'MultiThreshold', 'Mul', 'Conv', 'Mul', 'Add', 'MultiThreshold', 'Mul', 'MaxPool', 'Shape', 'Gather', 'Unsqueeze', 'Concat', 'Reshape', 'MatMul', 'Mul', 'Add', 'MultiThreshold', 'Add', 'Mul']


In [12]:
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors

model_for_sim = model_for_sim.transform(InferShapes())
model_for_sim = model_for_sim.transform(FoldConstants())
model_for_sim = model_for_sim.transform(GiveUniqueNodeNames())
model_for_sim = model_for_sim.transform(GiveReadableTensorNames())
model_for_sim = model_for_sim.transform(InferDataTypes())
model_for_sim = model_for_sim.transform(RemoveStaticGraphInputs())
# model_for_sim = model_for_sim.transform(Change3DTo4DTensors())
model_for_sim = model_for_sim.transform(Change3DTo4DTensors())

verif_model_filename = "brevitas09-IEEE-verification.onnx"
model_for_sim.save(verif_model_filename)

In [13]:
from finn.util.visualization import showInNetron

showInNetron(verif_model_filename)

Stopping http://0.0.0.0:8081
Serving 'brevitas09-IEEE-verification.onnx' at http://0.0.0.0:8081


In [14]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

model_file = "brevitas09-IEEE-ready.onnx"

estimates_output_dir = "output_estimates_only"

#Delete previous run results if exist
if os.path.exists(estimates_output_dir):
    shutil.rmtree(estimates_output_dir)
    print("Previous run results deleted!")


cfg_estimates = build.DataflowBuildConfig(
    output_dir          = estimates_output_dir,
    mvau_wwidth_max     = 80,
    target_fps          = 10000,
    synth_clk_period_ns = 10.0,
    fpga_part           = "xc7z020clg400-1",
    board               = "Pynq-Z2",
    steps               = build_cfg.estimate_only_dataflow_steps,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
    ]
)

Previous run results deleted!


In [15]:
%%time
build.build_dataflow_cfg(model_file, cfg_estimates)

Building dataflow accelerator from brevitas09-IEEE-ready.onnx
Intermediate outputs will be generated in /home/ian/build
Final outputs will be generated in output_estimates_only
Build log is at output_estimates_only/build_dataflow.log
Running step: step_qonnx_to_finn [1/8]
Running step: step_tidy_up [2/8]
Running step: step_streamline [3/8]
Running step: step_convert_to_hls [4/8]
Running step: step_create_dataflow_partition [5/8]
Running step: step_target_fps_parallelization [6/8]
Running step: step_apply_folding_config [7/8]
Running step: step_generate_estimate_reports [8/8]
Completed successfully
CPU times: user 3.19 s, sys: 16.1 ms, total: 3.21 s
Wall time: 3.1 s


0

In [16]:
! cat {estimates_output_dir}/report/estimate_network_performance.json

{
  "critical_path_cycles": 36882,
  "max_cycles": 9984,
  "max_cycles_node_name": "StreamingFCLayer_Batch_3",
  "estimated_throughput_fps": 10016.02564102564,
  "estimated_latency_ns": 368820.0
}

In [17]:
import json
def read_json_dict(filename):
    with open(filename, "r") as f:
        ret = json.load(f)
    return ret

In [18]:
read_json_dict(estimates_output_dir + "/report/estimate_layer_cycles.json")

{'ConvolutionInputGenerator1D_0': 129,
 'StreamingFCLayer_Batch_0': 9920,
 'ConvolutionInputGenerator1D_1': 125,
 'StreamingFCLayer_Batch_1': 7680,
 'StreamingMaxPool_Batch_0': 124,
 'ConvolutionInputGenerator1D_2': 31,
 'StreamingFCLayer_Batch_2': 7168,
 'ConvolutionInputGenerator1D_3': 29,
 'StreamingFCLayer_Batch_3': 9984,
 'StreamingMaxPool_Batch_1': 28,
 'StreamingFCLayer_Batch_4': 1664}

In [19]:
read_json_dict(estimates_output_dir + "/report/estimate_layer_resources.json")

{'ConvolutionInputGenerator1D_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 396,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'StreamingFCLayer_Batch_0': {'BRAM_18K': 1,
  'BRAM_efficiency': 0.008680555555555556,
  'LUT': 7651,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator1D_1': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 1836,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'StreamingFCLayer_Batch_1': {'BRAM_18K': 3,
  'BRAM_efficiency': 0.09259259259259259,
  'LUT': 12044,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'StreamingMaxPool_Batch_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 0,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator1D_2': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 1324,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'StreamingFCLayer_Batch_2': {'BRAM_18K': 1,
  'BRAM_efficiency': 0.16666666666666666,
  'LUT': 10174,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP'

In [20]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

model_file = "brevitas09-IEEE-ready.onnx"

final_output_dir = "output_final"

#Delete previous run results if exist
if os.path.exists(final_output_dir):
    shutil.rmtree(final_output_dir)
    print("Previous run results deleted!")


cfg = build.DataflowBuildConfig(
    output_dir          = final_output_dir,
    mvau_wwidth_max     = 80,
    target_fps          = 10000,
    synth_clk_period_ns = 10.0,
    board               = "Pynq-Z2",
    shell_flow_type     = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    generate_outputs=[
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ]
)

Previous run results deleted!


In [21]:
%%time
build.build_dataflow_cfg(model_file, cfg)

Building dataflow accelerator from brevitas09-IEEE-ready.onnx
Intermediate outputs will be generated in /home/ian/build
Final outputs will be generated in output_final
Build log is at output_final/build_dataflow.log
Running step: step_qonnx_to_finn [1/17]
Running step: step_tidy_up [2/17]
Running step: step_streamline [3/17]
Running step: step_convert_to_hls [4/17]
Running step: step_create_dataflow_partition [5/17]
Running step: step_target_fps_parallelization [6/17]
Running step: step_apply_folding_config [7/17]
Running step: step_generate_estimate_reports [8/17]
Running step: step_hls_codegen [9/17]
Running step: step_hls_ipgen [10/17]
Running step: step_set_fifo_depths [11/17]
Running step: step_create_stitched_ip [12/17]
Running step: step_measure_rtlsim_performance [13/17]
Running step: step_out_of_context_synthesis [14/17]
Running step: step_synthesize_bitfile [15/17]
Running step: step_make_pynq_driver [16/17]
Running step: step_deployment_package [17/17]
Completed successfully

0