In [1]:
# OPCODES
# instruction:     00000---
# addressing mode: -----000
from collections import namedtuple

Adm = namedtuple('Adm', ['IMP', 'IMM', 'ABS', 'REL', 'IDX'])
Opc = namedtuple('Opc', [
    'LDA', 'STA', 'PHA', 'PLA', 'ASL', 'ASR', 'TXA', 'TAX',
    'INX', 'DEX', 'ADD', 'SUB', 'AND', 'OR', 'XOR', 'CMP',
    'RTS', 'JNZ', 'JZ', 'JSR', 'JMP'
])
opc = Opc(*range(21))
adm = Adm(*range(5))
                          
import pandas as pd
import numpy as np

df = pd.DataFrame(columns=[*opc._asdict()])
for n,i in adm._asdict().items():
    df.loc[n] = (np.left_shift(pd.Series(opc._asdict()),3) + i).apply(hex)

allowed = [0x1, 0x2, 0x4, 0xa, 0xc, 0x10, 0x18, 0x21, 0x29, 0x30, 0x38, 0x40, 0x48, 0x51, 0x59, 0x61, 0x69, 0x71, 0x79, 0x80, 0x8b, 0x93, 0x9a, 0xa2]
df.style.applymap(lambda v: 'background-color: #f55' if not int(v, 16) in allowed else 'background-color: #5f5')


Unnamed: 0,LDA,STA,PHA,PLA,ASL,ASR,TXA,TAX,INX,DEX,ADD,SUB,AND,OR,XOR,CMP,RTS,JNZ,JZ,JSR,JMP
IMP,0x0,0x8,0x10,0x18,0x20,0x28,0x30,0x38,0x40,0x48,0x50,0x58,0x60,0x68,0x70,0x78,0x80,0x88,0x90,0x98,0xa0
IMM,0x1,0x9,0x11,0x19,0x21,0x29,0x31,0x39,0x41,0x49,0x51,0x59,0x61,0x69,0x71,0x79,0x81,0x89,0x91,0x99,0xa1
ABS,0x2,0xa,0x12,0x1a,0x22,0x2a,0x32,0x3a,0x42,0x4a,0x52,0x5a,0x62,0x6a,0x72,0x7a,0x82,0x8a,0x92,0x9a,0xa2
REL,0x3,0xb,0x13,0x1b,0x23,0x2b,0x33,0x3b,0x43,0x4b,0x53,0x5b,0x63,0x6b,0x73,0x7b,0x83,0x8b,0x93,0x9b,0xa3
IDX,0x4,0xc,0x14,0x1c,0x24,0x2c,0x34,0x3c,0x44,0x4c,0x54,0x5c,0x64,0x6c,0x74,0x7c,0x84,0x8c,0x94,0x9c,0xa4


In [2]:
# ROM source
src = """
; testing some basic instructions
lda #$0
tax
_1:
sta range,x
inx
txa
cmp #$10
jnz _1
at $100
range db $0

"""

In [3]:
# Assembler

from pyparsing import *

class Parser():

    pos = 0
    labels = {}
    binary = []

    def setOrig(self, tokens):
        self.pos = int(tokens.adr, 16)

    def setLabel(self, tokens):
        self.labels[tokens.lbl[0]] = self.pos-1

    def setRef(self, tokens):
        self.labels[tokens.lbl] = self.pos
        if tokens.size == 'db':
            self.binary.append(int(tokens.val, 16))
            self.pos += 1
        else:
            self.binary.append(int(tokens.val, 16) & 0xff)
            self.binary.append(int(tokens.val, 16) >> 8)
            self.pos += 2

    def setOp(self, tokens):
        c = getattr(opc, tokens.op) << 3
        if tokens.op == 'LDA':
            if tokens.am == '#$':
                c = c | adm.IMM
                self.pos += 2
                self.binary.append(c)
                self.binary.append(int(tokens.val, 16))
            elif tokens.am == '$':
                c |= adm.IDX if tokens.idx else adm.ABS
                self.pos += 3
                self.binary.append(c)
                l = int(tokens.val, 16) & 0xff
                h = int(tokens.val, 16) >> 8
                self.binary.append(h)
                self.binary.append(l)
            elif tokens.lbl:
                c |= adm.IDX if tokens.idx else adm.ABS
                self.pos += 3
                self.binary.append(c)
                self.binary.append(tokens.lbl)
        elif tokens.op == 'STA':
            c |= adm.IDX if tokens.idx else adm.ABS
            if tokens.am == '$':
                self.pos += 3
                self.binary.append(c)
                l = int(tokens.val, 16) & 0xff
                h = int(tokens.val, 16) >> 8
                self.binary.append(h)
                self.binary.append(l)
            elif tokens.lbl:
                self.pos += 3
                self.binary.append(c)
                self.binary.append(tokens.lbl[0])
        elif tokens.op in ['TAX', 'TXA', 'PHA', 'PLA', 'RTS', 'ASL', 'ASR', 'INX', 'DEX']:
            c = c | adm.IMP
            self.pos += 1
            self.binary.append(c)
        elif tokens.op in ['ADD', 'SUB', 'AND', 'OR', 'XOR', 'CMP']:
            c = c | adm.IMM
            self.pos += 2
            self.binary.append(c)
            self.binary.append(int(tokens.val, 16))
        elif tokens.op in ['JNZ', 'JZ']:
            c = c | adm.REL
            self.pos += 2
            self.binary.append(c)
            self.binary.append(tokens.lbl[0])
        elif tokens.op in ['JSR', 'JMP']:
            c = c | adm.ABS
            self.pos += 3
            self.binary.append(c)
            self.binary.append(tokens.lbl[0])
        
    def parse(self, src):
        
        op = oneOf(' '.join(opc._asdict()), caseless=True)
        org = (Word('atAT') + '$' + Word(hexnums)('adr')).setParseAction(self.setOrig)
        val = Word('#$')('am') + Word(hexnums)('val')
        lbl = (~op + Word(alphanums + '_'))('lbl')
        prm = (val|lbl) + ~Literal(':') + Optional(Word(', x'))('idx')
        label = (lbl + ':').setParseAction(self.setLabel)
        comment = ';' + restOfLine
        instruction = (op('op') + Optional(prm('prm'))).setParseAction(self.setOp)
        ref = (Word(alphanums + '_')('lbl') + oneOf('db dw')('size') + '$' + Word(hexnums)('val')).setParseAction(self.setRef)
        asm = OneOrMore(org | label | instruction | comment | ref)

        asm.parseString(src, parseAll=True)

        for i,b in enumerate(self.binary):
            if not isinstance(b, int):
                if b in self.labels.keys():
                    if self.binary[i-1] in [0x8b, 0x93]: #todo extract addressing mode
                        self.binary[i] = self.labels[b] - i + 1
                    else:
                        self.binary[i] = self.labels[b] & 0xff
                        self.binary.insert(i+1, self.labels[b] >> 8)
                else:
                    print('unresolved', b)

        return tuple(self.binary)

rom = Parser().parse(src)

print('Compiled ROM: ', end='')
for b in rom:
    if b < 0: b = 0x100 + b
    print(f'{b:02x}', end=' ')


Compiled ROM: 01 00 38 0c 00 01 40 30 79 10 8b f8 00 

In [4]:
# RAM & CPU

from myhdl import *

from cpu import processor

# 2k RAM + ROM
@block
def mem(clk, adr, we, di, do):
    
    ram = [Signal(intbv(0)[8:]) for i in range(0x2000)] # 8k

    @always(clk.posedge)
    def logic():
        if we:
            ram[adr.val].next = di
        else:
            if adr < len(rom):
                do.next = rom[adr.val]
            else:
                do.next = ram[adr.val]
        
    return logic

@block
def processor(clk, rst, data_in, data_out, adr, we):

    """
    IR = instruction register
    IM = immediate value
    RX = X register
    RW = W register used for status flags
    SR = status register
    AM = addressing mode
    SP = stack pointer
    """
    
    # (F1, F2, D, E, M1, M2) = range(0,6)
    s = enum('F1', 'F2', 'D', 'E', 'M1', 'M2')
    pc, incpc, incpc2 = [ Signal(modbv(0)[11:]) for _ in range(3) ]
    cyc = Signal(s.F1)
    ir, im, ra, rx, rw, sr, am = (Signal(modbv(0)[8:]) for _ in range(7))
    incim = Signal(modbv(0)[8:])
    sp, pushsp, popsp = [ Signal(modbv(0xff)[8:]) for _ in range(3) ]

    @always_comb
    def assign():
        incim.next = im + rx
        incpc.next = pc + 1
        incpc2.next = pc + 2
        pushsp.next = sp - 1
        popsp.next = sp + 1
    
    @always_seq(clk.posedge, rst)
    def logic():    

 
        
        
        if cyc == s.F1:
            adr.next = incpc
            pc.next = incpc
            cyc.next = s.F2

        elif cyc == s.F2:
            adr.next = incpc
            ir.next = data_out
            cyc.next = s.D

        elif cyc == s.D:
            im.next = data_out
            am.next = ir & 7
            ir.next = (ir >> 3) & 0x1f
            if (ir >> 3) == opc.RTS: # rts
                adr.next = popsp
                sp.next = popsp  
            cyc.next = s.E

        elif cyc == s.E:
            if ir == opc.LDA: # lda
                if am == adm.IMM:
                    ra.next = im
                    pc.next = incpc
                elif am == adm.ABS:
                    adr.next = concat(data_out, im)
                    pc.next = incpc2
                elif am == adm.IDX:
                    adr.next = concat(data_out, incim)
                    pc.next = incpc2
            elif ir == opc.STA: # sta
                if am == adm.ABS:
                    adr.next = concat(data_out, im)
                    we.next = 1
                    data_in.next = ra
                    pc.next = incpc2
                elif am == adm.IDX:
                    adr.next = concat(data_out, incim)
                    we.next = 1
                    data_in.next = ra
                    pc.next = incpc2
            elif ir == opc.TAX: # tax
                rx.next = ra
                rw.next = 1
            elif ir == opc.TXA: # txa
                ra.next = rx
            elif ir == opc.ADD: # add im
                ra.next = ra + im
                pc.next = incpc
            elif ir == opc.SUB: # sub im
                ra.next = ra - im
                pc.next = incpc
            elif ir == opc.AND: # and im
                ra.next = ra & im
                pc.next = incpc
            elif ir == opc.OR: # or im
                ra.next = ra | im
                pc.next = incpc
            elif ir == opc.XOR: # xor im
                ra.next = ra ^ im
                pc.next = incpc
            elif ir == opc.ASL: # asl im
                ra.next = ra << im
            elif ir == opc.ASR: # asr im
                ra.next = ra >> im
            elif ir == opc.JNZ: # jnz rel
                if sr[6] == 0:
                    pc.next = pc + im.signed()
                else:
                    pc.next = incpc
            elif ir == opc.JZ: # jz rel
                if sr[6] != 0:
                    pc.next = pc + im.signed()
                else:
                    pc.next = incpc
            elif ir == opc.INX: # inx
                rx.next = rx + 1
                rw.next = 1
            elif ir == opc.DEX: # dex
                rx.next = rx - 1
                rw.next = 1
            elif ir == opc.PHA: # pha
                adr.next = sp
                sp.next = pushsp
                data_in.next = ra
                we.next = 1
            elif ir == opc.PLA: # pla
                sp.next = popsp
                adr.next = popsp
            elif ir == opc.CMP: # cmp im
                rw.next = 2
                sr.next = concat((ra-im)>=0x80, (ra-im)==0, sr[6:0])
                pc.next = incpc
            elif ir == opc.JSR: # jsr abs
                adr.next = sp
                sp.next = pushsp
                data_in.next = incpc2[11:8]
                we.next = 1
            elif ir == opc.RTS: # rts
                adr.next = popsp
                sp.next = popsp
            elif ir == opc.JMP: # jmp abs
                pc.next = concat(data_out[3:], im)
            else:
                if __debug__:
                    raise ValueError("Illegal insn")
            cyc.next = s.M1

        elif cyc == s.M1:
            if (ir == opc.PLA) or (ir == opc.LDA and am == adm.ABS or am == adm.IDX):
                ra.next = data_out
            elif ir == opc.JSR:
                adr.next = sp
                sp.next = pushsp

                data_in.next = incpc2[8:]
                we.next = 1
                pc.next = concat(data_out[3:], im)
            elif ir == opc.RTS:
                pc.next = data_out
            else:
                we.next = 0
                adr.next = pc
            cyc.next = s.M2

        elif cyc == s.M2:
            if ir == 0x11:
                ra.next = data_out
                sr.next = concat(data_out>=0x80, data_out==0, sr[6:0])
            elif rw == 0:
                sr.next = concat(ra>=0x80, ra==0, sr[6:0])
            elif rw == 1:
                sr.next = concat(rx>=0x80, rx==0, sr[6:0])
            if ir == 0x17:
                pc.next = concat(data_out[3:], pc[8:])
                adr.next = concat(data_out, pc[8:])
            else:
                adr.next = pc
            we.next = 0
            rw.next = 0
            cyc.next = s.F1
        else:
            cyc.next = s.F1 # Should be an exception

    return instances()


## Synthesizing the processor

The pCPU processor core from above is stuck into a black box top level implementation
in order to be flexible with different implementations. We also allow to pass a parameter to co-simulate
two processor instances in parallel.

In [5]:
import sys
sys.path.append("/home/pyosys/work/myhdl-synthesis/examples")

In [16]:
from myhdl.conversion.yshelper import Design
from ys_aux import setupCosimulation

REDBG = "\033[7;31m"
OFF = "\033[0m"

class MyImpl:
    def __init__(self):
        print("\033[7;36mMy HW implementation\033[0m")
        
    def instance(self, name):
        design = Design(name)
        return design

class top_signals:
    def __init__(self):
        self.we = Signal(bool(0))
        self.adr = Signal(modbv(0)[16:])
        self.data_in = Signal(modbv(0)[8:])
        self.data_out = Signal(modbv(0)[8:])
    
@blackbox
def top_implementation(CO_SIMULATE = False, proc = processor):
    """The top level implementation blackbox. When CO_SIMULATE is true, the passed `proc` unit is
instanced in parallel to the original model"""

    a, b = [ top_signals() for i in range(2) ]
    
    clk = Signal(bool(0))
    global_reset = ResetSignal(1, 1, True)
    
    mi = mem(clk, a.adr, a.we, a.data_in, a.data_out)
    cpu = processor(clk, global_reset, a.data_in, a.data_out, a.adr, a.we)
    cpu.name = 'cpu_a'
    
    if CO_SIMULATE:
        mi_ref = mem(clk, b.adr, b.we, b.data_in, b.data_out)
        cpu_ref = proc(clk, global_reset, b.data_in, b.data_out, b.adr, b.we)
        cpu_ref.name = 'cpu_b'

    def convert(unit, design, top_name, create_trace = False):
        unit.convert("yosys_module", design, trace=create_trace)
        design.run("check; opt;")
        design.finalize(top_name)
   
    @inference(MyImpl)
    def implement(design, top_name):
        "This implements and synthesizes the design"
        convert(cpu, design, top_name)
        design.test_synth()
  
        return design    

    @inference(MyImpl)
    def cosimulation(design, top_name):
        "This creates a cosimulation object"
        convert(cpu, design, top_name, True)
        design.run("hierarchy -check; deminout; opt_expr; memory; check")
        design.write_ilang("rv32")
        #design.run("hierarchy -check; deminout; opt_expr; check")
        design.write_verilog(design.name, rename_default=True, rename_signals=True)

        # Make sure to find included files
        EXTRA_FILES = ['-I', '/usr/share/yosys/ecp5/' ]
        
        @block
        def cosim_wrapper(clk, rst, data_in, data_out, adr, we):
            "Wrapper for cosimulation object"
            args = locals()
            name = design.name + "_mapped"
            return setupCosimulation(name, use_assert=False, interface=args, debug=True, extra_files=EXTRA_FILES)

        return cosim_wrapper, design

    # Simulation test bench part
    @instance
    def startup():
        yield delay(11)
        global_reset.next = 0

    @always(delay(2))
    def stimulus():
        clk.next = not clk

        
    @always(clk.posedge)
    def verify():
        if a.data_out == b.data_out:
            pass
            #print("%02x == %02x" % (a.data_out, b.data_out))
        else:
            print(REDBG + "%02x != %02x" % (a.data_out, b.data_out) + OFF)
            raise ValueError("Value mismatch")

        if a.adr != b.adr:
             print("NEXT MAY MISMATCH >>> %04x -- %04x" % (a.adr, b.adr))

        
    return instances(), implement, cosimulation

## Co-Simulation

We have to re-instance `impl` because the previous run has stored information in its signals.
This time we run the `cosimulation` implementation rule and receive a simulation object in return.

In [17]:
impl = top_implementation()
simobj, rtl = impl.implement("cosimulation", "processor")

[7;34mSYNTHESIS[0m: Wrapping for inference: MyImpl()
[7;34mSYNTHESIS[0m: Wrapping for inference: MyImpl()
[7;36mMy HW implementation[0m
[7;34mSYNTHESIS[0m: Implementing unit 'cosimulation'
[32mCREATE Module: 'processor'[0m
[32mAdding module with name:[0m processor_1_1_8_8_16_1
CONVERT_RTL tree >>>>>> 'PROCESSOR_ASSIGN' 
[7;31mTRUNC[0m: <ipython-input-4-b06d18a86536>:48 Implicit carry truncate: incim[8:], src[9:]
[7;31mTRUNC[0m: <ipython-input-4-b06d18a86536>:49 Implicit carry truncate: incpc[11:], src[12:]
[7;31mTRUNC[0m: <ipython-input-4-b06d18a86536>:50 Implicit carry truncate: incpc2[11:], src[12:]
[7;31mTRUNC[0m: <ipython-input-4-b06d18a86536>:52 Implicit carry truncate: popsp[8:], src[9:]
CONVERT_RTL tree >>>>>> 'PROCESSOR_LOGIC' 
[7;31mTRUNC[0m: <ipython-input-4-b06d18a86536>:107 Implicit carry truncate: ra[8:], src[9:]
A > B
[7;31mTRUNC[0m: <ipython-input-4-b06d18a86536>:127 Implicit carry truncate: pc[11:], src[12:]
A > B
[7;31mTRUNC[0m: <ipython-input

Then we pass this created simulation object back into the test bench as parameter:

In [18]:
# from cpu import processor as p

tb = top_implementation(True, simobj)
tb.config_sim(
    timescale="1ps", 
    trace=True,
    tracebackup=False,
    filename='dump_mapped'
)
tb.run_sim(1800, timeunit_suffix='000')
tb.quit_sim()

Analyze command: iverilog -g2012 -o processor_mapped.o processor_mapped.v tb_processor_mapped.v -I /usr/share/yosys/ecp5/
Simulation command: vvp -m /home/pyosys/src/myhdl/myhdl-yosys/cosimulation/icarus/myhdl.vpi processor_mapped.o
[7;34mSYNTHESIS[0m: Wrapping for inference: MyImpl()
[7;34mSYNTHESIS[0m: Wrapping for inference: MyImpl()


<class 'myhdl._SuspendSimulation'>: Simulated 1800 timesteps


In [15]:
rtl.display_rtl("", fmt="dot")

Display...


### Discussion

During synthesis, we see a number of warnings pass by. These are due to the fact that some signals are left implicitely undefined in some states of the processor. This is ok in most cases and no problem when the mastering process is clock synchronous (`@always(clk.posedge)`)

However, under certain circumstances, this leads to mismatches between simulation of the MyHDL functional model (which always works with defined initial values) and the model inferred by the synthesis, due to floating drivers.
Therefore the original CPU model was slightly modified, also to avoid errors due to implicit truncation (being reported by the RTL conversion).

The correct functionality of this CPU core has not yet been verified further, so it is left at this stage as a study.

After simulation, traces are available for download (and local examining using GTKwave):

* [dump_mapped.vcd](dump_mapped.vcd) Co-simulated two CPUs with only top level interface signals traced for co-simulated post-map model
* [processor.vcd](processor.vcd) Trace of synthesized processor internal signals

You might use twinwave to compare these traces:

```
twinwave dump_mapped.vcd dump_mapped.sav + processor.vcd processor.sav
```

In [9]:
! ls -l *.vcd

-rw-r--r-- 1 pyosys users 755030 Jul 15 15:57 dump_mapped.vcd
-rw-r--r-- 1 pyosys users 221836 Jul 15 14:03 processor.vcd
