# Hardware Emit Pass
The `emit_verilog` transform pass generates a top-level RTL file and testbench file according to the `MaseGraph`, which includes a hardware implementation of each layer in the network. This top-level file instantiates modules from the `components` library in MASE and/or modules generated using [HLS](https://en.wikipedia.org/wiki/High-level_synthesis), when internal components are not available. The hardware can then be simulated using [Verilator](https://www.veripool.org/verilator/), or deployed on an FPGA.

First, add Machop to your system PATH (if you haven't already done so) and import the required libraries.

In [1]:
import os, sys
import torch
torch.manual_seed(0)

from chop.ir.graph.mase_graph import MaseGraph

from chop.passes.graph.analysis import (
    init_metadata_analysis_pass,
    add_common_metadata_analysis_pass,
    add_hardware_metadata_analysis_pass,
    add_software_metadata_analysis_pass,
    report_node_type_analysis_pass,
)

from chop.passes.graph.transforms import (
    emit_verilog_top_transform_pass,
    emit_internal_rtl_transform_pass,
    emit_bram_transform_pass,
    emit_cocotb_transform_pass,
    quantize_transform_pass,
)

from chop.tools.logger import set_logging_verbosity

set_logging_verbosity("debug")

import toml
import torch
import torch.nn as nn

# TO DO: remove
import os
os.environ["PATH"] = "/opt/homebrew/bin:" + os.environ["PATH"]
!verilator

  from .autonotebook import tqdm as notebook_tqdm
[32mINFO    [0m [34mSet logging level to debug[0m


Usage:
        verilator --help
        verilator --version
        verilator --binary -j 0 [options] [source_files.v]... [opt_c_files.cpp/c/cc/a/o/so]
        verilator --cc [options] [source_files.v]... [opt_c_files.cpp/c/cc/a/o/so]
        verilator --sc [options] [source_files.v]... [opt_c_files.cpp/c/cc/a/o/so]
        verilator --lint-only -Wall [source_files.v]...



# CNN Model

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
  def __init__(self, num_classes=10):
    # initialize the parent class—in this case, the nn.Module class from PyTorch.
    super().__init__()

    # Define layers in the model
    # When you instantiate a layer like nn.Conv2d inside the forward method,
    # it is created anew each time forward is called. Because that layer is constructed at runtime during every forward pass,
    # its weights are randomly re-initialized every time.
    # Hence the weights will not be tracked by PyTorch’s parameter-registration mechanism (they won’t show up in model.parameters()), and
    # Won’t get updated during backpropagation (since they’re re-created each time),
    # Are randomly re-initialized on every forward pass, which makes learning impossible.

    # Convolutional layers
    # kernel_size: the size of the CNN filter of small matrix that has weights
    # kernel_size=3: the kernel/filter will have both height and width of 3 (i.e., 3×3).
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
    self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)

    # Pooling layer
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    # Fully-connected layers
    # # After two 2x2 poolings on a 28x28 input, the spatial dimension is reduced to 7x7.
    # Hence, the input to the first linear layer is 64 * 7 * 7.
    self.fc1 = nn.Linear(in_features=64 * 7 * 7, out_features=128)
    self.fc2 = nn.Linear(in_features=128, out_features=num_classes)

  def forward(self, x):
    # Convolution -> ReLU -> Pool
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))

    # Flatten
    x = x.view(x.size(0), -1)  # same as x.view(-1, 64*7*7)

    # Fully-connected -> ReLU
    x = F.relu(self.fc1(x))

    # Final layer (logits)
    x = self.fc2(x)

    return x


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SingleConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        # A single 2D convolution layer
        self.conv1 = nn.Conv2d(
            in_channels=1,
            # input channel size of one example is 1, i.e. grey scale
            # output channel size becomes 2.
            # out_channels=2, that convolution layer learns two distinct filters
            out_channels=2,
            kernel_size=3,
            padding=1
        )

    def forward(self, x):
        # Apply convolution (and optionally an activation)
        # Here we show it with a ReLU, but you can remove F.relu(...) if you want pure convolution
        x=self.conv1(x)
        # x = F.relu(self.conv1(x))
        return x


In [4]:
if __name__ == "__main__":
    # Create a random batch of 1 image, 4×4 (grayscale)
    x = torch.randn((1, 1, 4, 4))

    model = SingleConvNet()
    output = model(x)

    print("Input shape:", x.shape)
    print("Input tensor:\n", x)        # Show the actual input values

    print("\nOutput shape:", output.shape)
    print("Output tensor:\n", output)  # Show the actual output values

    print("Weights:\n", model.conv1.weight)
    print("\nBias:\n", model.conv1.bias)

# Weights, bias are random

Input shape: torch.Size([1, 1, 4, 4])
Input tensor:
 tensor([[[[-1.1258, -1.1524, -0.2506, -0.4339],
          [ 0.8487,  0.6920, -0.3160, -2.1152],
          [ 0.3223, -1.2633,  0.3500,  0.3081],
          [ 0.1198,  1.2377,  1.1168, -0.2473]]]])

Output shape: torch.Size([1, 2, 4, 4])
Output tensor:
 tensor([[[[-0.5136, -0.1470, -0.2287, -0.7550],
          [ 0.2449, -1.0380, -0.9036, -0.5101],
          [-0.5668,  0.2264,  0.7911, -0.8494],
          [ 0.4742, -0.0806, -0.5119, -0.3172]],

         [[-0.3570,  0.1973,  0.5993,  0.1430],
          [-0.3330, -0.5056, -0.6247, -0.1729],
          [-0.3489, -0.3864,  0.6195, -0.6861],
          [ 0.0048, -1.0650, -0.6682, -0.4203]]]],
       grad_fn=<ConvolutionBackward0>)
Weights:
 Parameter containing:
tensor([[[[ 0.1318,  0.2000, -0.2260],
          [-0.1452,  0.1211,  0.2768],
          [-0.0686,  0.2494, -0.0537]]],


        [[[ 0.0353,  0.3018, -0.3092],
          [-0.2098, -0.0844, -0.1299],
          [ 0.2880, -0.2161, -0.1534]

[W301 22:49:30.350090492 NNPACK.cpp:62] Could not initialize NNPACK! Reason: Unsupported hardware.


In [5]:
cnn = SingleConvNet()
mg = MaseGraph(model=cnn)

# Provide a dummy input for the graph so it can use for tracing
batch_size = 1
# x = torch.randn((batch_size, 2, 2))
# batch_size: number of examples
# 1, 4,4,: each example has 1 channel, each channel has a size of 4x4
x = torch.randn((batch_size, 1, 3, 3))
print(x)
dummy_in = {"x": x}

mg, _ = init_metadata_analysis_pass(mg, None)
mg, _ = add_common_metadata_analysis_pass(
    mg, {"dummy_in": dummy_in, "add_value": False}
)



[36mDEBUG   [0m [34mgraph():
    %x : [num_users=1] = placeholder[target=x]
    %conv1 : [num_users=1] = call_module[target=conv1](args = (%x,), kwargs = {})
    return conv1[0m


tensor([[[[-0.0209, -0.7185,  0.5186],
          [-1.3125,  0.1920,  0.5428],
          [-2.2188,  0.2590, -1.0297]]]])
Hellos in add_common_metadata
sigoyi in add_common_metadata
wocao in add_common_metadata
wobucao in add_common_metadata
nihappp in add_common_metadata


# Quantize

In [6]:
config_file = os.path.join(
    os.path.abspath(""),
    "..",
    "..",
    "configs",
    "tests",
    "quantize",
    "fixed.toml",
)
# Fixed.toml used to quantize the model
with open(config_file, "r") as f:
    quan_args = toml.load(f)["passes"]["quantize"]
mg, _ = quantize_transform_pass(mg, quan_args)

_ = report_node_type_analysis_pass(mg)

# Update the metadata
for node in mg.fx_graph.nodes:
    for arg, arg_info in node.meta["mase"]["common"]["args"].items():
        if isinstance(arg_info, dict):
            arg_info["type"] = "fixed"
            arg_info["precision"] = [8, 5]
    for result, result_info in node.meta["mase"]["common"]["results"].items():
        if isinstance(result_info, dict):
            result_info["type"] = "fixed"
            result_info["precision"] = [8, 5]

[32mINFO    [0m [34mInspecting graph [add_common_node_type_analysis_pass][0m
[32mINFO    [0m [34m
Node name    Fx Node op    Mase type            Mase op      Value type
-----------  ------------  -------------------  -----------  ------------
x            placeholder   placeholder          placeholder  NA
conv1        call_module   module_related_func  conv2d       fixed
output       output        output               output       NA[0m


placeholder
node_config:  {'name': 'fixed', 'data_in_width': 8, 'data_in_frac_width': 5, 'weight_width': 8, 'weight_frac_width': 5, 'bias_width': 8, 'bias_frac_width': 5, 'data_out_width': 8, 'data_out_frac_width': 5, 'floor': True}
output


# Hardware Metapass

In [7]:
mg, _ = add_hardware_metadata_analysis_pass(mg)
for node in mg.nodes:
        mase_op = node.meta["mase"]["common"]["mase_op"]
        print ('mase_op:', mase_op)
        print ("common:",node.meta["mase"]["common"])
        print ("hardware:",node.meta["mase"]["hardware"])

for node in mg.fx_graph.nodes:
        if node.meta["mase"].parameters["hardware"]["is_implicit"]:
            continue
        # Only modules have internal parameters
        if node.meta["mase"].module is None:
            continue
        # print (node.meta["mase"].parameters["hardware"])
        # Only checks the hardware data that contains the key toolchain
        if "INTERNAL" in node.meta["mase"].parameters["hardware"]["toolchain"]:
                for param_name, parameter in node.meta["mase"].module.named_parameters():
                        print ("param_name in CNN.jynb:",param_name)
                        print ("parameter in CNN.jynb:", parameter)

"""
weights and bias in the Conv/linear in Maze
param_data = node.meta["mase"].module.get_parameter(param_name).data
print ("param_data: ", param_data)
"""


mase_op: conv2d
2222
vp:  {}
arg_info in add_verilog_param:  {'shape': [1, 1, 3, 3], 'torch_dtype': torch.float32, 'type': 'fixed', 'precision': [8, 5]}
arg_info shape: [1, 1, 3, 3]
length of arg_info: 4
arg_info in add_verilog_param:  {'type': 'fixed', 'precision': [8, 5], 'shape': [2, 1, 3, 3], 'from': None}
arg_info shape: [2, 1, 3, 3]
length of arg_info: 4
arg_info in add_verilog_param:  {'type': 'fixed', 'precision': [8, 5], 'shape': [1, 2], 'from': None}
arg_info shape: [1, 2]
length of arg_info: 2
mase_op: placeholder
common: {'mase_type': 'placeholder', 'mase_op': 'placeholder', 'args': {}, 'results': OrderedDict([('data_out_0', {'type': 'fixed', 'precision': [8, 5], 'shape': [1, 1, 3, 3], 'torch_dtype': torch.float32})])}
hardware: {'is_implicit': True, 'device_id': 0, 'max_parallelism': [4, 4, 4, 4]}
mase_op: conv2d
common: {'mase_type': 'module_related_func', 'mase_op': 'conv2d', 'args': OrderedDict([('data_in_0', {'shape': [1, 1, 3, 3], 'torch_dtype': torch.float32, 'type':

'\nweights and bias in the Conv/linear in Maze\nparam_data = node.meta["mase"].module.get_parameter(param_name).data\nprint ("param_data: ", param_data)\n'

# Emit for SV file

In [8]:
mg, _ = emit_verilog_top_transform_pass(mg)
mg, _ = emit_internal_rtl_transform_pass(mg)

[32mINFO    [0m [34mEmitting Verilog...[0m
[32mINFO    [0m [34mEmitting internal components...[0m


key in get_verilog_parameter: DATA_IN_0_PRECISION_0
key in get_verilog_parameter: DATA_IN_0_PRECISION_1
key in get_verilog_parameter: DATA_IN_0_TENSOR_SIZE_DIM_0
key in get_verilog_parameter: DATA_IN_0_PARALLELISM_DIM_0
key in get_verilog_parameter: DATA_IN_0_TENSOR_SIZE_DIM_1
key in get_verilog_parameter: DATA_IN_0_PARALLELISM_DIM_1
key in get_verilog_parameter: DATA_IN_0_TENSOR_SIZE_DIM_2
key in get_verilog_parameter: DATA_IN_0_PARALLELISM_DIM_2
key in get_verilog_parameter: DATA_IN_0_TENSOR_SIZE_DIM_3
key in get_verilog_parameter: DATA_IN_0_PARALLELISM_DIM_3
key in get_verilog_parameter: WEIGHT_PRECISION_0
key in get_verilog_parameter: WEIGHT_PRECISION_1
key in get_verilog_parameter: WEIGHT_TENSOR_SIZE_DIM_0
key in get_verilog_parameter: WEIGHT_PARALLELISM_DIM_0
key in get_verilog_parameter: WEIGHT_TENSOR_SIZE_DIM_1
key in get_verilog_parameter: WEIGHT_PARALLELISM_DIM_1
key in get_verilog_parameter: WEIGHT_TENSOR_SIZE_DIM_2
key in get_verilog_parameter: WEIGHT_PARALLELISM_DIM_2
key 

# Memory

In [9]:
mg, _ = emit_bram_transform_pass(mg)

"""
  param_data:  tensor([[[[-0.1946,  0.2865,  0.1487],
          [ 0.1616,  0.0175, -0.1709],
          [ 0.0564, -0.3112, -0.2409]]],


        [[[-0.1718,  0.2103,  0.1954],
          [-0.1478, -0.0120,  0.2132],
          [ 0.3314,  0.1323,  0.0450]]]])
  This would be weight, as I defined conv as (2,1,3,3)
  The kernel filter size is 3x3, and we have 2 filters, thus 9x2=18 elements
  Bias: depends on number of filter-> here 2 filters, thus 2 bias elements,
    as bias added after filter multiplies with section of pixel
"""

[32mINFO    [0m [34mEmitting BRAM...[0m
[36mDEBUG   [0m [34mEmitting DAT file for node: conv1, parameter: weight[0m
[36mDEBUG   [0m [34mROM module weight successfully written into /root/.mase/top/hardware/rtl/conv1_weight_source.sv[0m
[36mDEBUG   [0m [34mInit data weight successfully written into /root/.mase/top/hardware/rtl/conv1_weight_rom.dat[0m
[36mDEBUG   [0m [34mEmitting DAT file for node: conv1, parameter: bias[0m
[36mDEBUG   [0m [34mROM module bias successfully written into /root/.mase/top/hardware/rtl/conv1_bias_source.sv[0m
[36mDEBUG   [0m [34mInit data bias successfully written into /root/.mase/top/hardware/rtl/conv1_bias_rom.dat[0m


1 in emit_bram_transform_pass
/root/.mase/top/hardware/rtl
param_name in emit_bram_handshake: weight
parameter in emit_bram_handshake: Parameter containing:
tensor([[[[-0.1946,  0.2865,  0.1487],
          [ 0.1616,  0.0175, -0.1709],
          [ 0.0564, -0.3112, -0.2409]]],


        [[[-0.1718,  0.2103,  0.1954],
          [-0.1478, -0.0120,  0.2132],
          [ 0.3314,  0.1323,  0.0450]]]], requires_grad=True)
out_size in emit_parameters_in_mem_internal: 9
conv1
param_data in emit_parameters_in_dat_internal:  tensor([[[[-0.1946,  0.2865,  0.1487],
          [ 0.1616,  0.0175, -0.1709],
          [ 0.0564, -0.3112, -0.2409]]],


        [[[-0.1718,  0.2103,  0.1954],
          [-0.1478, -0.0120,  0.2132],
          [ 0.3314,  0.1323,  0.0450]]]])
out_depth:  2
data_buff:  fa09050501fb02f6f8
fb0706fb00070b0401

param_name in emit_bram_handshake: bias
parameter in emit_bram_handshake: Parameter containing:
tensor([-0.3299, -0.2162], requires_grad=True)
out_size in emit_parameters_in_m

'\n  param_data:  tensor([[[[-0.1946,  0.2865,  0.1487],\n          [ 0.1616,  0.0175, -0.1709],\n          [ 0.0564, -0.3112, -0.2409]]],\n\n\n        [[[-0.1718,  0.2103,  0.1954],\n          [-0.1478, -0.0120,  0.2132],\n          [ 0.3314,  0.1323,  0.0450]]]])\n  This would be weight, as I defined conv as (2,1,3,3)\n  The kernel filter size is 3x3, and we have 2 filters, thus 9x2=18 elements\n  Bias: depends on number of filter-> here 2 filters, thus 2 bias elements,\n    as bias added after filter multiplies with section of pixel\n'

In [10]:
mg, _ = emit_cocotb_transform_pass(mg)

[32mINFO    [0m [34mEmitting testbench...[0m


In [11]:
from chop.actions import simulate

simulate(skip_build=False, skip_test=False)

INFO: Running command perl /usr/local/bin/verilator -cc --exe -Mdir /workspace/docs/labs/sim_build -DCOCOTB_SIM=1 --top-module top --vpi --public-flat-rw --prefix Vtop -o top -LDFLAGS '-Wl,-rpath,/usr/local/lib/python3.11/dist-packages/cocotb/libs -L/usr/local/lib/python3.11/dist-packages/cocotb/libs -lcocotbvpi_verilator' -Wno-fatal -Wno-lint -Wno-style --trace-fst --trace-structs --trace-depth 3 -I/root/.mase/top/hardware/rtl -I/workspace/src/mase_components/interface/rtl -I/workspace/src/mase_components/language_models/rtl -I/workspace/src/mase_components/memory/rtl -I/workspace/src/mase_components/vivado/rtl -I/workspace/src/mase_components/convolution_layers/rtl -I/workspace/src/mase_components/cast/rtl -I/workspace/src/mase_components/systolic_arrays/rtl -I/workspace/src/mase_components/scalar_operators/rtl -I/workspace/src/mase_components/transformer_layers/rtl -I/workspace/src/mase_components/common/rtl -I/workspace/src/mase_components/hls/rtl -I/workspace/src/mase_components/v

%Error: /root/.mase/top/hardware/rtl/top.sv:151:6: Slices of arrays in assignments have different unpacked dimensions, 18 versus 2
  151 |     .data_out_0(conv1_data_out_0),
      |      ^~~~~~~~~~
%Error: /root/.mase/top/hardware/rtl/top.sv:143:13: Slices of arrays in assignments have different unpacked dimensions, 8 versus 18
  143 |     .weight(conv1_weight),
      |             ^~~~~~~~~~~~
%Error: /root/.mase/top/hardware/rtl/top.sv:139:16: Slices of arrays in assignments have different unpacked dimensions, 2 versus 9
  139 |     .data_in_0(conv1_data_in_0),
      |                ^~~~~~~~~~~~~~~
%Error: /root/.mase/top/hardware/rtl/top.sv:170:6: Slices of arrays in assignments have different unpacked dimensions, 18 versus 9
  170 |     .data_out(conv1_weight),
      |      ^~~~~~~~
%Error: Exiting due to 4 error(s)


SystemExit: Process 'perl' terminated with error 1