 # 1. Define Quantized MLP in Brevitas

In [17]:
import onnx
import torch
from brevitas.nn import QuantLinear, QuantReLU, QuantIdentity
from brevitas.core.quant import QuantType
import torch.nn as nn

class QuantizedCybSecMLP(nn.Module):
    def __init__(self):
        super(QuantizedCybSecMLP, self).__init__()
        self.features = nn.Sequential(
            QuantLinear(600, 64, bias=True, weight_bit_width=2),
            nn.BatchNorm1d(64),
            nn.Dropout(0.5),
            QuantReLU(bit_width=2),
            QuantLinear(64, 64, bias=True, weight_bit_width=2),
            nn.BatchNorm1d(64),
            nn.Dropout(0.5),
            QuantReLU(bit_width=2),
            QuantLinear(64, 64, bias=True, weight_bit_width=2),
            nn.BatchNorm1d(64),
            nn.Dropout(0.5),
            QuantReLU(bit_width=2),
            QuantLinear(64, 1, bias=True, weight_bit_width=2)
        )
        self.qnt_output = QuantIdentity(quant_type=QuantType.BINARY, bit_width=1, min_val=-1.0, max_val=1.0)
    
    def forward(self, x):
        x = (x + torch.tensor([1.0])) / 2.0  
        out_original = self.features(x)
        out_final = self.qnt_output(out_original)
        return out_final

# 2. Load dataset, measure accuracy with pretrained weights

In [25]:
brevitas_model = QuantizedCybSecMLP()
brevitas_model.load_state_dict(torch.load("state_dict_export.pth"), strict=False)

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [26]:
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

batch_size = 1000

part_data_in = torch.from_numpy(np.load("unsw_nb15_binarized_inputs.npy"))
part_data_out = torch.from_numpy(np.load("unsw_nb15_binarized_outputs.npy"))
dataset = TensorDataset(part_data_in, part_data_out)
test_quantized_loader = DataLoader(dataset, batch_size=batch_size)

In [27]:
from sklearn.metrics import accuracy_score

def test_brevitas_model_on_dataset(model, test_loader):    
    # ensure model is in eval mode
    model.eval() 
    y_true = []
    y_pred = []
   
    with torch.no_grad():
        for data in test_loader:
            inputs, target = data
            output = model(inputs.float())
            y_pred.extend(list(output.flatten()))
            y_true.extend(list(target.detach().numpy().flatten()))
        
    return accuracy_score(y_true, y_pred)

In [28]:
test_brevitas_model_on_dataset(brevitas_model, test_quantized_loader)

0.918075596365933

# 3. Export to ONNX for FINN and visualize with Netron

In [34]:
import brevitas.onnx as bo

export_onnx_path = "cybsec-mlp-nids-demo.onnx"
input_shape = (1, 600)
bo.export_finn_onnx(brevitas_model, input_shape, export_onnx_path)



In [35]:
from finn.core.modelwrapper import ModelWrapper
from finn.core.datatype import DataType

model_file = "cybsec-mlp-nids-demo.onnx"

model = ModelWrapper(model_file)
model.set_tensor_datatype(model.graph.input[0].name, DataType.BIPOLAR)
model.save(model_file)

In [36]:
from finn.util.visualization import showInNetron

showInNetron("cybsec-mlp-nids-demo.onnx")

Stopping http://0.0.0.0:8081
Serving 'cybsec-mlp-nids-demo.onnx' at http://0.0.0.0:8081


# 6. Build the Quantized MLP with FINN

In [3]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg

model_file = "cybsec-mlp-nids-demo.onnx"

rtlsim_output_dir = "nids_demo_rtlsim"

cfg = build.DataflowBuildConfig(
    # target performance and clock frequency
    target_fps          = 300000000,
    synth_clk_period_ns = 3.333,
    # target FPGA part number (for ZCU104)
    fpga_part           = "xczu7ev-ffvc1156-2-e",
    # where outputs will be written
    output_dir          = rtlsim_output_dir,
    # embed parameters as constants in bitstream
    default_mem_mode    = build_cfg.ComputeEngineMemMode.CONST,
    # enable full unfolding
    mvau_wwidth_max     = 1000000,
    # number of inputs for rtlsim performance measurement
    rtlsim_perf_n_inputs = 1000,
    # which output products to generate
    generate_outputs=[
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
    ]
)

In [10]:
build.build_dataflow_cfg(model_file, cfg)

Building dataflow accelerator from cybsec-mlp-nids-demo.onnx
Intermediate outputs will be generated in /data/nids-mlp-fold1
Final outputs will be generated in nids_demo_rtlsim
Build log is at nids_demo_rtlsim/build_dataflow.log
Running step: step_tidy_up [1/16]
Running step: step_streamline [2/16]
Running step: step_convert_to_hls [3/16]
Running step: step_create_dataflow_partition [4/16]
Running step: step_target_fps_parallelization [5/16]
Running step: step_apply_folding_config [6/16]
Running step: step_generate_estimate_reports [7/16]
Running step: step_hls_codegen [8/16]
Running step: step_hls_ipgen [9/16]
Running step: step_set_fifo_depths [10/16]
Running step: step_create_stitched_ip [11/16]
Running step: step_measure_rtlsim_performance [12/16]
Running step: step_make_pynq_driver [13/16]
Running step: step_out_of_context_synthesis [14/16]
Running step: step_synthesize_bitfile [15/16]
Running step: step_deployment_package [16/16]
Completed successfully


0

# 7. Examine the generated outputs

In [4]:
! ls {rtlsim_output_dir}/stitched_ip

all_verilog_srcs.txt		       ip
finn_vivado_stitch_proj.cache	       make_project.sh
finn_vivado_stitch_proj.gen	       make_project.tcl
finn_vivado_stitch_proj.hw	       vivado.jou
finn_vivado_stitch_proj.ip_user_files  vivado.log
finn_vivado_stitch_proj.sim	       vivado_1314.backup.jou
finn_vivado_stitch_proj.srcs	       vivado_1314.backup.log
finn_vivado_stitch_proj.xpr	       vivado_pid28868.str


In [5]:
! cat {rtlsim_output_dir}/report/rtlsim_performance.json

{
  "cycles": 1025,
  "runtime[ms]": 0.0034163250000000004,
  "throughput[images/s]": 292712198.04907316,
  "DRAM_in_bandwidth[Mb/s]": 21953.414853680486,
  "DRAM_out_bandwidth[Mb/s]": 36.58902475613415,
  "fclk[mhz]": 300.0300030003,
  "N": 1000,
  "latency_cycles": 26
}

# 8. Verify generated RTL model's accuracy

In [6]:
from finn.core.modelwrapper import ModelWrapper
from finn.core.onnx_exec import execute_onnx
from finn.util.basic import gen_finn_dt_tensor

rtlsim_model = ModelWrapper(rtlsim_output_dir + "/intermediate_models/11_step_create_stitched_ip.onnx")
rtlsim_model.set_metadata_prop("exec_mode", "rtlsim")

In [14]:
def test_rtlsim_model_on_dataset(model, test_loader):    
    y_true = []
    y_pred = []
   
    for data in test_loader:
        inputs, target = data
        batch_size = inputs.shape[0]
        model.set_tensor_shape("global_in", (batch_size, 600))
        model.set_tensor_shape("global_out", (batch_size, 1))
        output = execute_onnx(model, {"global_in" : inputs.float().numpy()})["global_out"]
        y_pred.extend(list(output.flatten()))
        y_true.extend(list(target.detach().numpy().flatten()))
        
    return accuracy_score(y_true, y_pred)

In [16]:
test_rtlsim_model_on_dataset(rtlsim_model, test_quantized_loader)

0.918075596365933