In [1]:
import sys
sys.path.insert(0, "../../")

# Co-Verification tricks

In some cases, you might want to prove that a python function, executed natively, does the same as its synthesized counterpart. This is in particular the case for functions that are reused often for various signal processing targets, be it micro code or pure cascaded hardware elements.

Take a `@rtl_function` example:

In [2]:
from cyhdl import *

In [3]:
class often_used:
    @rtl_function
    def funclet(rtl, en, a, b):
        if en == True:
            b.next = ~a
        else:
            b.next = a

We will be calling this function twice:
1. From the hardware context, like a function
2. From a native context, using `yield from`

We create a specific co-simulation design. It will only function with a cosimulation backend, such as CXXRTL.

In [4]:
from yosys.simulator import CXXRTL
from myirl.library.basictypes import Bool

class CoSimDesign(cyrite_factory.Module):
    def __init__(self):
        super().__init__("cosim", CXXRTL)
        
    @cyrite_factory.block_component
    def unit(self,
             clk : ClkSignal,
             en: Bool,
             a : Signal,
             b : Signal.Output):
        
        @always(clk.posedge)
        def worker():
            # Call @rtl_function like a function from a rtl context:
            often_used.funclet(en, a, b)

        return worker

    @cyrite_factory.testbench("ns")
    def testbench(self):
        clk = self.ClkSignal(name = 'clk')
        en = self.Signal(bool(), name = 'en')
        a, b = [ self.Signal(intbv()[8:], name = n) for n in "ab" ]
        co_a, co_b = [ self.Signal(intbv()[8:], name = n) for n in "AB" ]
        verify = self.Signal(bool())
        
        continuous_assignments = [
            co_a   @assign@  a
        ]

        uut = self.unit(clk, en, a, b)

        # # Co-Unit:
        @self.always(clk.posedge)
        def co_unit():
            # Call the same funclet using `yield from`:
            yield from often_used.funclet(en, co_a, co_b)

        @self.always(clk.posedge)
        def verification():
            if verify:
                print("Verify:", en, self.now(), co_b, b)
                assert co_b == b
        
        @self.always(delay(1))
        def clkgen():
            clk.next = ~clk

        @self.sequence
        def main():
            verify.next = False
            yield delay(20)
            verify.next = True
            for v in [0, 0xaa, 0x55, 0x2d, 0x85]:
                en.next = (v & 1) != 0
                yield clk.negedge
                a.next = v

            raise StopSimulation

        return instances()

We instance the design and run the test bench.
If we do not make changes on the hardware function, you can set `recompile` to False. This can be useful if you develop test benches for large hardware designs.

In [5]:
d = CoSimDesign()
tb = d.testbench()
tb.run(2000, wavetrace = 'test.vcd', recompile = True)

[7;35m Declare obj 'unit' in context '(CoSimDesign 'cosim')'(<class '__main__.CoSimDesign'>) [0m
DEBUG LIB ALL ELEM (CoSimDesign 'cosim')
DEBUG MAIN ELAB [Instance unit I/F: [// ID: unit_0 ]]
 DEBUG components ['unit_obj_CoSimDesignu_1u_1u_8u_8'] (CoSimDesign 'cosim') 
[32m Adding module with name `unit` [0m
[7;34m FINALIZE implementation `unit` of `unit` [0m
Compiling /tmp/myirl_cosim_l8en8v73/unit_8133.pyx because it changed.
[1/1] Cythonizing /tmp/myirl_cosim_l8en8v73/unit_8133.pyx
running build_ext
building 'runtime.unit_8133' extension
creating build/temp.linux-x86_64-3.10/tmp/myirl_cosim_l8en8v73
gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -fPIC -DCOSIM_NAMESPACE=unit_8133 -Iruntime -I/tmp/myirl_cosim_l8en8v73/ -I/usr/share/yosys/include/backends/cxxrtl/runtime -I/usr/local/include/python3.10 -c /tmp/myirl_cosim_l8en8v73/unit_8133.cpp -o build/temp.linux-x86_64-3.10/tmp/myirl_cosim_l8en8v73/unit_8133.o
gcc -pthread -Wno-unused-result -Wsign-

[32mUsing '/tmp/myirl_cosim_l8en8v73/' for output[0m
[32mCosimulation: co_a not connected to backend[0m
[32mCosimulation: co_b not connected to backend[0m
[32mCosimulation: verify not connected to backend[0m
[7;34mSTOP SIMULATION @30[0m


0

## Detailed explanations

What happens here is the following:
* `self.unit` is compiled as CXXRTL back end module and is imported ad-hoc
* The signals passed to its interface are connected to the backend, i.e. they are consumed and driven by the latter.
* The `co_*` signals are not connected to the back end and are thus handled by the cosimulation layer upon occuring events, such as a `clk.posedge`.
* All functionality in the `testbench()` function runs as native python. Therefore one has to ensure that calls to macros are evaluated explicitely or the `yield from` constructs are used for context sensitive `@rtl_function`s or `@cyrite_method`s.

To determine from the code, in which domain the code is executed or evaluated, the thumb rule applies:
* `self.always(...)` runs here in native execution
* `always(...)` is transpiled to hardware

Note that Co-Processes running in the native Python context pretty much allow any kind of Python extension looped in and only a small built-in sub set of Python can be transpiled to HDL.

## Performance aspects: Event signals

A Co-Simulator is normally the driving force in a design, creating the external stimuli for a pure digital design without functional delay simulation.

All the above `@self.always` co-processes will only react to external stimuli caused by `EventSignal` types. The reason for this is performance: all combinatorial signal dependencies of their sources are sorted out by the faster back end. The co-simulation front end should only have to set a few signals, provide a clock and a reset.

This implies certain restrictions:
* No clock generators inside the synthesized RTL, for instance, PLLs can not be simulated this way.
* co-processes can only use EventSignal types in their sensitivity list
* `@sequence` functions can only wait for events of EventSignals as well, i.e. constructs such as `s.posedge` will only work for a clock signal type.

Non-Event signals, like outputs of the simulation of the compiled backend unit will thus have to be polled explicitely for changes.

### Native simulation issues

Note that the Co-Simulation layer is very rudimentary and does not allow to simply run a hardware design on the native co-simulation side.

In particular, the support for co-processes is limited to:
* Direct assignments of signals, no combinatorial logic
* `@self.always()` only allowed for:
    * Event types caused by EventSignal types (ClkSignal, ResetSignal, ..)
    * Delay arguments (`delay(cycles)`) where cycles must be integer
    * Co-processes inside the testbench top, i.e. no hierarchy

Also keep in mind that the current Co-Simulator only allows **one** unit under test instance.

## Co-verification of existing modules

Sometimes, a verified V*HDL module is to be ported to CyHDL and requires enhancements. In order to verify these enhancements do not break existing setups, it might be useful to automatically create a wrapper to run both (the new and the verified) unit alongside each other.

This is also referred to as 'virtual lock step', as there is the option to create particular verification hardware verification units within such a wrapper.

We import the `create_instances` function which auto-wraps both units under test.

In [6]:
from myirl.library.verification.lockstep import create_instances as lockstep_create_instances

### DSP ALU element co-verification

We import a VHDL ALU component from the example DSP ALU library:

In [7]:
from cyrite.examples import libdspalu

Separately, we developed a CyHDL class as single rtl_functions. Those can be called from dual contexts in order to perform a verification for different execution domains (DSP architecture versus pure HDL versus Python, ...)

In [8]:
class DSPEmu:
	@rtl_function
	def stage0(rtl, a, b, ru, rl, UPPER, LOWER):
		ru.next = a[UPPER].signed() * b[UPPER].signed()
		rl.next = a[LOWER].signed() * b[LOWER].signed()

	@rtl_function
	def stage1(rtl, mode, ru, rl, a0, a1):
		if mode == rtl.A_SUB:
			a0.next = a0 - ru
			a1.next = a1 - rl
		elif mode == rtl.A_ADD:
			a0.next = a0 + ru
			a1.next = a1 + rl
		elif mode == rtl.A_ADDSUB:
			a0.next = a0 + ru
			a1.next = a1 - rl
		else:
			a0.next = ru
			a1.next = rl

The actual hardware component in CyHDL is composed as a library class containing auxiliaries and a block component implementation. It also implements a specific attribute getter `__getattr__`, to forward the above `rtl` class member requests to the calling context.

In [9]:
from cyrite.library.hls import mypipe

class DSPImpl(LibraryModule):
    
    # Put local type definitions into class header:
    PS = mypipe.pipelined(Signal)
    PSModeSignal = PS.Type(intbv, 2)
    Bool = PS.Type(bool)

    def __getattr__(self, name):
        "Forwarding of DSPALU.A_* types to the rtl context"
        return getattr(libdspalu.DSPAlu, name)

    # A clear hardware component. We don't do latency checking due to accumulator values
    @block_component
    def dual_mac16(self,
        clk  : ClkSignal,
        mode : libdspalu.DSPTypes.ModeSignal,
        ce   : Signal.Type(bool),
        a    : Signal,
        b    : Signal,
        rval : Bool.Output,
        resu : Signal.Output,
        resl : Signal.Output,
        W_WIDTH : int = 16,
        HEADROOM_BITS : int = 8
    ):

        r0l, r0u = [ self.PS(intbv()[W_WIDTH * 2:].signed()) for _ in range(2) ]
        a0, a1 = [ self.PS(intbv()[2 * W_WIDTH + 8:].signed()) for _ in range(2) ]

        LOWER = slice(W_WIDTH, 0)
        UPPER = slice(2 * W_WIDTH, W_WIDTH)

        mode0 = self.PS(intbv()[2:])
        mode1 = mode0.delayed(clk, 1)

        @mypipe.pipe(clk, None, ce, None, rval)
        def pipeline_worker(ctx):
            yield DSPEmu.stage0(a, b, r0u, r0l, UPPER, LOWER)(ctx).logic
            yield DSPEmu.stage1(mode1, r0u, r0l, a0, a1)(ctx).logic

        wires = [
            mode0.wireup(mode),
            resu.wireup(a0), resl.wireup(a1)
        ]

        return instances()

A test design with test bench making usage of this library:

In [10]:
class DSPTest(cyrite_factory.Module):
    """The DSP test for co-simulation mode"""

    # Put a lib into the header for auto-registration
    lib_dspemu = DSPImpl("emulation")

    dual_mac16 = lib_dspemu.dual_mac16

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mylib = self.lib_dspemu
        self.mylib.clear() # To reuse for several targets

    @cyrite_factory.testbench("ns")
    def testbench(self, N : int = 16):
        PS = mypipe.pipelined(self.Signal)
        
        clk = self.ClkSignal()
        mode = PS(intbv()[2:])
        ce, valid = [ PS(bool()) for _ in range(2) ]
        a, b = [ PS(intbv()[2 * N:]) for _ in range(2) ]
        ru, rv = [ PS(intbv()[2 * N + 8:].signed()) for _ in range(2) ]

        uut = self.dual_mac16(
            clk = clk,
            mode = mode,
            ce = ce,
            a = a,
            b = b,
            rval = valid,
            resu = ru,
            resl = rv,
            W_WIDTH = N,
        )


        @self.always(delay(1))
        def clkgen():
            clk.next = ~clk
            
        @self.sequence
        def main():
            ce.next = False
            a.next = 0x20008000
            b.next = 0x10003000
            yield clk.negedge
            ce.next = True
            mode.next = libdspalu.DSPAlu.A_ASSIGN
            a.next = 0x20008000
            b.next = 0x10003000

            yield clk.negedge

            assert valid == False
            yield clk.negedge
            yield clk.negedge

            assert valid == True
            print("RESULT:", ru, rv)

            assert ru.unsigned() == 0x0002000000
            assert rv.unsigned() == 0xFFE8000000

            yield delay(20)

            raise StopSimulation

        return instances()


### Running the test bench

We run the above test bench with the Cyrite implementation of the DSP elements first.

In [11]:
from yosys.simulator import CXXRTL
from cyrite.simulation import ghdl

d = DSPTest("test", ghdl.GHDL)

tb = d.testbench()

tb.run(200, debug = True, wavetrace = "test1.vcd", recompile = False)

[7;35m Declare obj 'testbench' in context '(DSPTest 'test')'(<class '__main__.DSPTest'>) [0m
 N: use default 16 
[7;35m Declare obj 'dual_mac16' in context '(LIB: DSPImpl 'emulation')'(<class '__main__.DSPImpl'>) [0m
 HEADROOM_BITS: use default 8 
[7;35m Declare obj 'sigdelay' in context '(LIB: DSPImpl 'emulation')'(<class '__main__.DSPImpl'>) [0m
[32m DEBUG Inline instance [CompInline 'sigdelay/sigdelay'] [0m
 Writing 'sigdelay' to file ./sigdelay.vhdl 
 Writing 'dual_mac16' to file ./dual_mac16.vhdl 
 Not emitting design types library 
[7;35m Skip registration of (LIB: DSPImpl 'emulation')/<class '__main__.DSPImpl'> [0m
 Writing 'testbench' to file /tmp/testbench.vhdl 
 Creating library file /tmp/module_defs.vhdl 
DEBUG_FILES ['/tmp/testbench.vhdl', '/tmp/module_defs.vhdl', './sigdelay.vhdl', './dual_mac16.vhdl', '/home/pyosys/src/myhdl2/myirl/targets/vhdl/libmyirl.vhdl', '/home/pyosys/src/myhdl2/myirl/targets/vhdl/txt_util.vhdl']
==== COSIM stdout ====
analyze /home/pyosys

0

### Co-Simulation testbench

We derive a class from the above, this time creating a wrapper for `dual_mac16` using `.create_instances`:

In [12]:
class CoDSPTest(DSPTest):
    lib_dspemu = DSPImpl("emulation")

    vhdl_lib = libdspalu.DSPAlu("co_vhdl")
    def dual_mac16(self, *args, **kwargs):
        instances = lockstep_create_instances(
            self.mylib.dual_mac16, self.vhdl_lib.dual_mac16,
            args, kwargs)
    
        return instances


Finally, we run the same test bench with the wrapper:

In [13]:
d = CoDSPTest("test", ghdl.GHDL)

tb = d.testbench()

tb.run(200, debug = True, wavetrace = "test1.vcd", recompile = False)

[7;35m Declare obj 'testbench' in context '(CoDSPTest 'test')'(<class '__main__.CoDSPTest'>) [0m
 N: use default 16 
[7;35m Declare obj 'dual_mac16' in context '(LIB: DSPImpl 'emulation')'(<class '__main__.DSPImpl'>) [0m
 HEADROOM_BITS: use default 8 
[7;35m Declare obj 'sigdelay' in context '(LIB: DSPImpl 'emulation')'(<class '__main__.DSPImpl'>) [0m
[32m DEBUG Inline instance [CompInline 'sigdelay/sigdelay'] [0m
[7;34m use default parameter HEADROOM_BITS : 8 [0m
[7;35m [_blackbox_method 'dual_mac16/dual_mac16'] blackbox not returning instances [0m
 W_WIDTH: use default 16 
 HEADROOM_BITS: use default 8 
 Writing 'sigdelay' to file ./sigdelay.vhdl 
 Writing 'uut_dual_mac16' to file ./uut_dual_mac16.vhdl 
 Not emitting design types library 
[7;35m Skip registration of (LIB: DSPImpl 'emulation')/<class '__main__.DSPImpl'> [0m
[7;35m Skip registration of <cyrite.examples.libdspalu.DSPAlu object at 0x7f2b0c299c90>/<class 'cyrite.examples.libdspalu.DSPAlu'> [0m
[7;35m Skip



0

Since no `reset` is used in this element, the internal `MyLockstep` base class uses a simple dead time of four clock cycles by default before starting to compare the signals. A derived class of this base class can be passed to `.create_instances`. using the `lockstep_factory` keyword.

If a reset is present, it is automatically recognized if part of the UUT interface. Otherwise, it can be specified using the `resetsig` argument to `.create_instances`.

**Note** This verification method only works for HDL targets and is not simply portable to CXXRTL. 

## Automated random stimulation

When a unit has a rather simple behaviour, a randomized I/O stimulus may suffice. In this case, you can use the `output_randomizer` to generate deterministic random values using a simple LFSR whose length is determined by the total length of the signals passed. This composed vector can not have a greater length of 32 in this implementation.

The start values of the signals are used as start values for the LFSR, when `STARTVAL` is not specified to the randomizer. This can be important when the unit has no reset and may create undefined values ('X').

In [14]:
from myirl.library.verification.randomize import output_randomizer


class RandomTest(CoDSPTest):
    lib_dspemu = DSPImpl("emulation")
    vhdl_lib = libdspalu.DSPAlu("co_vhdl")

    @cyrite_factory.testbench('ns')
    def random_tb(self, sel_signals, N = 16):
        PS = mypipe.pipelined(self.Signal)

        clk = ClkSignal()
        reset = ResetSignal(False, True)

        cycle = Signal(bool())

        # Use initial value A_ASSIGN to avoid creating undefined 'X' values:
        mode, imode = [ PS(intbv(self.vhdl_lib.A_ASSIGN)[2:]) for _ in range(2) ]
        ce, ce1, ce0, valid = [ PS(bool(True)) for _ in range(4) ]
        a, b = [ PS(intbv()[2 * N:]) for _ in range(2) ]
        ru, rv = [ PS(intbv()[2 * N + 8:].signed()) for _ in range(2) ]
        # Source random values to create data words:
        srca, srcb = [ Signal(intbv()[8:]) for _ in range(2) ]

        datagen = [
            a.wireup(concat(srca, (srca ^ srcb), ~srcb, srcb)),
            b.wireup(concat(srcb, (srca ^ srcb), srca, (srcb ^ ~srca))),
        ]
        
        uut = self.dual_mac16(
            clk = clk,
            mode = imode,
            ce = ce1,
            a = a,
            b = b,
            rval = valid,
            resu = ru,
            resl = rv,
            W_WIDTH = N,
        )
        
        @self.always(delay(2))
        def clkgen():
            clk.next = ~clk

        @self.always(clk.posedge)
        def ce_gen():
            ce0.next = ce

        @self.always(clk.posedge)
        def muxer():
            if reset:
                imode.next = self.vhdl_lib.A_ASSIGN
            else:
                imode.next = mode

        wires = [
            ce1.wireup(ce0 | ce)
        ]
        
        selection = {}
        for n in sel_signals:
            selection[n] = locals()[n]

        # Instance a randomizer for this signal selection:
        r = output_randomizer(clk, reset, cycle_strobe = cycle, **selection)

        @self.sequence
        def main():
            reset.next = True
            yield delay(20)
            reset.next = False
            # ce.next = True

            while cycle == False:
                yield clk.posedge
 
            yield delay(200)
            raise StopSimulation

        return instances()


In [15]:
p = RandomTest("random", ghdl.GHDL)

tb = p.random_tb(['mode', 'ce', 'srca', 'srcb'])
tb.run(200, debug = True, wavetrace = "rand.vcd")

[7;35m Declare obj 'random_tb' in context '(RandomTest 'random')'(<class '__main__.RandomTest'>) [0m
 N: use default 16 
[7;35m Declare obj 'dual_mac16' in context '(LIB: DSPImpl 'emulation')'(<class '__main__.DSPImpl'>) [0m
 HEADROOM_BITS: use default 8 
[7;35m Declare obj 'sigdelay' in context '(LIB: DSPImpl 'emulation')'(<class '__main__.DSPImpl'>) [0m
[32m DEBUG Inline instance [CompInline 'sigdelay/sigdelay'] [0m
[7;34m use default parameter HEADROOM_BITS : 8 [0m
[7;35m [_blackbox_method 'dual_mac16/dual_mac16'] blackbox not returning instances [0m
 W_WIDTH: use default 16 
 HEADROOM_BITS: use default 8 
TOTAL VECTOR LENGTH 19
DEBUG: SET START VALUE 7
[7;35m Declare obj 'counter_logic' in context '(RandomTest 'random')'(<class '__main__.RandomTest'>) [0m
[32m DEBUG Inline instance [CompInline 'counter_logic/counter_logic'] [0m
 Writing 'sigdelay' to file ./sigdelay.vhdl 
 Writing 'uut_dual_mac16' to file ./uut_dual_mac16.vhdl 
 Not emitting design types library 
[



0