# 6502 Opcodes and Addressing modes

In [3]:
# Opcodes
# for each, we store the mnemonic and mode id (see 'modes' dict below)
opcodes = {
    0: [ 'BRK', 9 ],
    16: [ 'BPL', 8 ],
    32: [ 'JSR', 0 ],
    48: [ 'BMI', 8 ],
    64: [ 'RTI', 9 ],
    80: [ 'BVC', 8 ],
    96: [ 'RTS', 9 ],
    112: [ 'BVS', 8 ],
    128: [ 'BRA', 8 ],
    144: [ 'BCC', 8 ],
    160: [ 'LDY', 6 ],
    176: [ 'BCS', 8 ],
    192: [ 'CPY', 6 ],
    208: [ 'BNE', 8 ],
    224: [ 'CPX', 6 ],
    240: [ 'BEQ', 8 ],
    1: [ 'ORA', 11 ],
    17: [ 'ORA', 15 ],
    33: [ 'AND', 11 ],
    49: [ 'AND', 15 ],
    65: [ 'EOR', 11 ],
    81: [ 'EOR', 15 ],
    97: [ 'ADC', 11 ],
    113: [ 'ADC', 15 ],
    129: [ 'STA', 11 ],
    145: [ 'STA', 15 ],
    161: [ 'LDA', 11 ],
    177: [ 'LDA', 15 ],
    193: [ 'CMP', 11 ],
    209: [ 'CMP', 15 ],
    225: [ 'SBC', 11 ],
    241: [ 'SBC', 15 ],
    18: [ 'ORA', 14 ],
    50: [ 'AND', 14 ],
    82: [ 'EOR', 14 ],
    114: [ 'ADC', 14 ],
    146: [ 'STA', 14 ],
    162: [ 'LDX', 6 ],
    178: [ 'LDA', 14 ],
    210: [ 'CMP', 14 ],
    242: [ 'SBC', 14 ],
    4: [ 'TSB', 10 ],
    20: [ 'TRB', 10 ],
    36: [ 'BIT', 10 ],
    52: [ 'BIT', 12 ],
    100: [ 'STZ', 10 ],
    116: [ 'STZ', 12 ],
    132: [ 'STY', 10 ],
    148: [ 'STY', 12 ],
    164: [ 'LDY', 10 ],
    180: [ 'LDY', 12 ],
    196: [ 'CPY', 10 ],
    228: [ 'CPX', 10 ],
    5: [ 'ORA', 10 ],
    21: [ 'ORA', 12 ],
    37: [ 'AND', 10 ],
    53: [ 'AND', 12 ],
    69: [ 'EOR', 10 ],
    85: [ 'EOR', 12 ],
    101: [ 'ADC', 10 ],
    117: [ 'ADC', 12 ],
    133: [ 'STA', 10 ],
    149: [ 'STA', 12 ],
    165: [ 'LDA', 10 ],
    181: [ 'LDA', 12 ],
    197: [ 'CMP', 10 ],
    213: [ 'CMP', 12 ],
    229: [ 'SBC', 10 ],
    245: [ 'SBC', 12 ],
    6: [ 'ASL', 10 ],
    22: [ 'ASL', 12 ],
    38: [ 'ROL', 10 ],
    54: [ 'ROL', 12 ],
    70: [ 'LSR', 10 ],
    86: [ 'LSR', 12 ],
    102: [ 'ROR', 10 ],
    118: [ 'ROR', 12 ],
    134: [ 'STX', 10 ],
    150: [ 'STX', 13 ],
    166: [ 'LDX', 10 ],
    182: [ 'LDX', 13 ],
    198: [ 'DEC', 10 ],
    214: [ 'DEC', 12 ],
    230: [ 'INC', 10 ],
    246: [ 'INC', 12 ],
    7: [ 'RMB0', 10 ],
    23: [ 'RMB1', 10 ],
    39: [ 'RMB2', 10 ],
    55: [ 'RMB3', 10 ],
    71: [ 'RMB4', 10 ],
    87: [ 'RMB5', 10 ],
    103: [ 'RMB6', 10 ],
    119: [ 'RMB7', 10 ],
    135: [ 'SMB0', 10 ],
    151: [ 'SMB1', 10 ],
    167: [ 'SMB2', 10 ],
    183: [ 'SMB3', 10 ],
    199: [ 'SMB4', 10 ],
    215: [ 'SMB5', 10 ],
    231: [ 'SMB6', 10 ],
    247: [ 'SMB7', 10 ],
    8: [ 'PHP', 9 ],
    24: [ 'CLC', 7 ],
    40: [ 'PLP', 9 ],
    56: [ 'SEC', 7 ],
    72: [ 'PHA', 9 ],
    88: [ 'CLI', 7 ],
    104: [ 'PLA', 9 ],
    120: [ 'SEI', 7 ],
    136: [ 'DEY', 7 ],
    152: [ 'TYA', 7 ],
    168: [ 'TAY', 7 ],
    184: [ 'CLV', 7 ],
    200: [ 'INY', 7 ],
    216: [ 'CLD', 7 ],
    232: [ 'INX', 7 ],
    248: [ 'SED', 7 ],
    9: [ 'ORA', 6 ],
    25: [ 'ORA', 3 ],
    41: [ 'AND', 6 ],
    57: [ 'AND', 3 ],
    73: [ 'EOR', 6 ],
    89: [ 'EOR', 3 ],
    105: [ 'ADC', 6 ],
    121: [ 'ADC', 3 ],
    137: [ 'BIT', 6 ],
    153: [ 'STA', 3 ],
    169: [ 'LDA', 6 ],
    185: [ 'LDA', 3 ],
    201: [ 'CMP', 6 ],
    217: [ 'CMP', 3 ],
    233: [ 'SBC', 6 ],
    249: [ 'SBC', 3 ],
    10: [ 'ASL', 5 ],
    26: [ 'INC', 5 ],
    42: [ 'ROL', 5 ],
    58: [ 'DEC', 5 ],
    74: [ 'LSR', 5 ],
    90: [ 'PHY', 9 ],
    106: [ 'ROR', 5 ],
    122: [ 'PLY', 9 ],
    138: [ 'TXA', 7 ],
    154: [ 'TXS', 7 ],
    170: [ 'TAX', 7 ],
    186: [ 'TSX', 7 ],
    202: [ 'DEX', 7 ],
    218: [ 'PHX', 9 ],
    234: [ 'NOP', 7 ],
    250: [ 'PLX', 9 ],
    203: [ 'WAI', 7 ],
    219: [ 'STP', 7 ],
    12: [ 'TSB', 0 ],
    28: [ 'TRB', 0 ],
    44: [ 'BIT', 0 ],
    60: [ 'BIT', 2 ],
    76: [ 'JMP', 0 ],
    108: [ 'JMP', 4 ],
    124: [ 'JMP', 1 ],
    140: [ 'STY', 0 ],
    156: [ 'STZ', 0 ],
    172: [ 'LDY', 0 ],
    188: [ 'LDY', 2 ],
    204: [ 'CPY', 0 ],
    236: [ 'CPX', 0 ],
    13: [ 'ORA', 0 ],
    29: [ 'ORA', 2 ],
    45: [ 'AND', 0 ],
    61: [ 'AND', 2 ],
    77: [ 'EOR', 0 ],
    93: [ 'EOR', 2 ],
    109: [ 'ADC', 0 ],
    125: [ 'ADC', 2 ],
    141: [ 'STA', 0 ],
    157: [ 'STA', 2 ],
    173: [ 'LDA', 0 ],
    189: [ 'LDA', 2 ],
    205: [ 'CMP', 0 ],
    221: [ 'CMP', 2 ],
    237: [ 'SBC', 0 ],
    253: [ 'SBC', 2 ],
    14: [ 'ASL', 0 ],
    30: [ 'ASL', 2 ],
    46: [ 'ROL', 0 ],
    62: [ 'ROL', 2 ],
    78: [ 'LSR', 0 ],
    94: [ 'LSR', 2 ],
    110: [ 'ROR', 0 ],
    126: [ 'ROR', 2 ],
    142: [ 'STX', 0 ],
    158: [ 'STZ', 2 ],
    174: [ 'LDX', 0 ],
    190: [ 'LDX', 3 ],
    206: [ 'DEC', 0 ],
    222: [ 'DEC', 2 ],
    238: [ 'INC', 0 ],
    254: [ 'INC', 2 ],
    15: [ 'BBR0', 8 ],
    31: [ 'BBR1', 8 ],
    47: [ 'BBR2', 8 ],
    63: [ 'BBR3', 8 ],
    79: [ 'BBR4', 8 ],
    95: [ 'BBR5', 8 ],
    111: [ 'BBR6', 8 ],
    127: [ 'BBR7', 8 ],
    143: [ 'BBS0', 8 ],
    159: [ 'BBS1', 8 ],
    175: [ 'BBS2', 8 ],
    191: [ 'BBS3', 8 ],
    207: [ 'BBS4', 8 ],
    223: [ 'BBS5', 8 ],
    239: [ 'BBS6', 8 ],
    255: [ 'BBS7', 8 ],
}

# Addressing modes
# for each, we store the mode name and instructions length

modes = {
    0: [ 'a', 3 ],
    1: [ '(a,x)', 3 ],
    2: [ 'a,x', 3 ],
    3: [ 'a,y', 3 ],
    4: [ '(a)', 3 ],
    5: [ 'A', 1 ],
    6: [ '#', 2 ],
    7: [ 'i', 1 ],
    8: [ 'r', 2 ],
    9: [ 's', 1 ],
    10: [ 'zp', 2 ],
    11: [ '(zp,x)', 2 ],
    12: [ 'zp,x', 2 ],
    13: [ 'zp,y', 2 ],
    14: [ '(zp)', 2 ],
    15: [ '(zp),y', 2 ],
}

In [11]:
# Order and clean formating for the opcodes dict
print("opcodes = {")
for i in sorted(opcodes.keys()):
    o,m = opcodes[i]
    print("    %3d: [ '%s'%s, %2s ],  #   %s  %s" % (i, o.lower(), (" "*(4-len(o))), m, (o.lower()+" "*(4-len(o))), modes[m][0]))
print("}")

opcodes = {
      0: [ 'brk' ,  9 ],  #   brk   s
      1: [ 'ora' , 11 ],  #   ora   (zp,x)
      4: [ 'tsb' , 10 ],  #   tsb   zp
      5: [ 'ora' , 10 ],  #   ora   zp
      6: [ 'asl' , 10 ],  #   asl   zp
      7: [ 'rmb0', 10 ],  #   rmb0  zp
      8: [ 'php' ,  9 ],  #   php   s
      9: [ 'ora' ,  6 ],  #   ora   #
     10: [ 'asl' ,  5 ],  #   asl   A
     12: [ 'tsb' ,  0 ],  #   tsb   a
     13: [ 'ora' ,  0 ],  #   ora   a
     14: [ 'asl' ,  0 ],  #   asl   a
     15: [ 'bbr0',  8 ],  #   bbr0  r
     16: [ 'bpl' ,  8 ],  #   bpl   r
     17: [ 'ora' , 15 ],  #   ora   (zp),y
     18: [ 'ora' , 14 ],  #   ora   (zp)
     20: [ 'trb' , 10 ],  #   trb   zp
     21: [ 'ora' , 12 ],  #   ora   zp,x
     22: [ 'asl' , 12 ],  #   asl   zp,x
     23: [ 'rmb1', 10 ],  #   rmb1  zp
     24: [ 'clc' ,  7 ],  #   clc   i
     25: [ 'ora' ,  3 ],  #   ora   a,y
     26: [ 'inc' ,  5 ],  #   inc   A
     28: [ 'trb' ,  0 ],  #   trb   a
     29: [ 'ora' ,  2 ],  #   ora   a,x
     30: 

# Some functions

In [8]:
def isValidOpcode(b):
    return b in opcodes.keys()

# decode an opcode byte b
# returns mnemonic, mode, length
def decode(b):
    if isinstance(b,str):
        b=hex2dec(b)
    if isValidOpcode(b):
        o, m = opcodes[b]
        m, l = modes[m]
        return o, m, l
    else:
        return None,None,None

def hex2dec(s):
    return int(s,16)

Here we have a program:

In [4]:
prog = "a2 00 a0 00 8a 99 00 02 48 e8 c8 c0 10 d0 f5 68 99 00 02 c8 c0 20 d0 f7"

prog = iter( prog.split(" "))

First attempt to disassemble. Largely inspired from [skilldrick/easy6502](https://github.com/skilldrick/easy6502/blob/3261b868631e65896c1d4d2b9111053d643178a2/simulator/assembler.js#L2537)

In [160]:
print(" Addr  Hexdump      Instruction      Mode")
print("-----  -----------  -----------      ----")
# infinite loop
addr_cursor = hex2dec("0600") # start addr
while True:
    try:
        n = next(prog)
        hex = n
        addr = addr_cursor
        addr_cursor += 1
        b = hex2dec( n )
        comment = ""

        if isValidOpcode(b):
            o,m,l = decode(b)
            #print(o,m,l)
            operand=""
            for i in range(l-1):
                n = next(prog)
                addr_cursor += 1
                hex = hex + " " + n
                operand = n + operand
            
            #if l>1 and hex2dec(operand) >= 0x0100 and hex2dec(operand) <= 0x01ff:
            #    comment = "Stack"

            if o.startswith("b") and o not in ["bit", "brk"] :
                dest = addr + 2
                operand = hex2dec(operand)
                if operand > 0x7f:
                    dest -= 0x100 - operand
                else:
                    dest += operand
                operand = "%04x" % dest
            
            if l>1:
                operand = "$"+operand

            if m == "Imm":
                operand = "#"+operand
            if m.endswith('X'):
                operand += ",X"
            if m.startswith('IND'):
                operand = "(" + operand + ")"
            if m.endswith('Y'):
                operand += ",Y"
            if b in [ 10, 74, 42, 106 ]:
                operand = "A"
            
            print("$%04x  %-12s %s %-10s   %-4s %s" % (addr, hex, o, operand, m, comment))

    except StopIteration:
        break


 Addr  Hexdump      Instruction      Mode
-----  -----------  -----------      ----
$0600  a2 00        ldx #$00         Imm  
$0602  a0 00        ldy #$00         Imm  
$0604  8a           txa              SNGL 
$0605  99 00 02     sta $0200,Y      ABSY 
$0608  48           pha              SNGL 
$0609  e8           inx              SNGL 
$060a  c8           iny              SNGL 
$060b  c0 10        cpy #$10         Imm  
$060d  d0 f5        bne $0604        BRA  
$060f  68           pla              SNGL 
$0610  99 00 02     sta $0200,Y      ABSY 
$0613  c8           iny              SNGL 
$0614  c0 20        cpy #$20         Imm  
$0616  d0 f7        bne $060f        BRA  


# 2nd attempt

Now we have a stream of incoming bytes. let's first assume the first one is a SYNC (opcode fetch)

In [7]:
prog = "a2 00 a0 00 8a 99 00 02 48 e8 c8 c0 10 d0 f5 68 99 00 02 c8 c0 20 d0 f7"

#prog = iter( prog.split(" "))

In [8]:
stream = []
addr_cursor = hex2dec("0600") # start addr

# sync represent how many byte we're missing to complete the instruction

sync = 0

for b in prog.split(" "):
    # b is the incoming byte

    stream.append([ addr_cursor, b , sync==0 ])
    if sync == 0:
        # we just fetched a new opcode
        _,_,l = decode(b)
        sync = l

    sync -= 1
    addr_cursor += 1

# we generate a simulated stream of [addr, data, sync] signals
stream

[[1536, 'a2', True],
 [1537, '00', False],
 [1538, 'a0', True],
 [1539, '00', False],
 [1540, '8a', True],
 [1541, '99', True],
 [1542, '00', False],
 [1543, '02', False],
 [1544, '48', True],
 [1545, 'e8', True],
 [1546, 'c8', True],
 [1547, 'c0', True],
 [1548, '10', False],
 [1549, 'd0', True],
 [1550, 'f5', False],
 [1551, '68', True],
 [1552, '99', True],
 [1553, '00', False],
 [1554, '02', False],
 [1555, 'c8', True],
 [1556, 'c0', True],
 [1557, '20', False],
 [1558, 'd0', True],
 [1559, 'f7', False]]

In [14]:
def render_instr(_args):
    # args is a list: addr, mnemonic, [op1, op2]
    # addr: the instruction's address. int or hex str
    # mnemonic, op1 & op2 are strings in hex

    # we create a copy of the _args list, because we will modify it
    args=_args[:]

    # we revers the list, so we can pop each item
    # and at the end, the operand's byte are correctly ordered
    addr = args.pop(0)
    if isinstance(addr,str):
        addr=hex2dec(addr)

    opcode = args.pop(0)

    args.reverse()
    operand="".join(args)
    args.reverse() # now we need it again in the chronological order, for hexdump

    comment = ""

    unknown = ".."

    o,m,l = decode(opcode)

    miss = l - len(args) - 1 # how many operands bytes we are missing

    operand = unknown*miss + operand

    hexdump = " ".join( [opcode] + args + [ unknown for _ in range(miss) ])

    #if o.startswith("b") and o not in ["bit", "brk"] :
    if m == "r" :
        if miss==0 :
            dest = addr + 2
            operand = hex2dec(operand)
            if operand > 0x7f:
                dest -= 0x100 - operand
            else:
                dest += operand
            operand = "%04X" % dest
        else:
            # add an extra unknown byte
            operand = unknown + operand
    
    if l>1:
        operand = "$"+operand

    if m == "#":
        operand = "#"+operand
    if ',x' in m:
        operand += ",x"
    if m.startswith('('):
        operand = "(" + operand + ")"
    if m.endswith('y'):
        operand += ",y"
    if m == "A":
        operand = "A"
    
    return ("%04X  %-12s %s %-10s   %-4s %s" % (addr, hexdump.upper(), o, operand, m, comment))   

In [5]:
stream = [[1536, 'a2', True],
 [1537, '08', False],
 [1538, 'ca', True],
 [1539, '8e', True],
 [1540, '00', False],
 [1541, '02', False],
 [1542, 'e0', True],
 [1543, '03', False],
 [1544, 'd0', True],
 [1545, 'f8', False],
 [1546, '8e', True],
 [1547, '01', False],
 [1548, '02', False],
 [1549, '00', True]]

In [70]:
stream = [
 [1544, 'd0', True],
 [1545, 'f8', False],
]

In [15]:
iter_stream = iter(stream)

# count how many bytes we still miss
# to complete the current instruction 
miss = 0
instr=[]
while True:
    try:
        addr_cursor, b, sync = next(iter_stream)

        if sync:
            # new instruction started
            # opcode fetched
            instr=[addr_cursor, b]
            _,_,l = decode(b)
            miss = l
        else:
            instr.append(b)

        miss -= 1
        if miss >= 0:
            print("%04X %s | %s" % (addr_cursor, b, render_instr(instr) ))
        else:
            print("%04X %s | %s" % (addr_cursor, b, 36*" "+ "Not an instruction" ))
        
    except StopIteration:
        break

0600 a2 | 0600  A2 ..        LDX #$..         #    
0601 00 | 0600  A2 00        LDX #$00         #    
0602 a0 | 0602  A0 ..        LDY #$..         #    
0603 00 | 0602  A0 00        LDY #$00         #    
0604 8a | 0604  8A           TXA              i    
0605 99 | 0605  99 .. ..     STA $....,y      a,y  
0606 00 | 0605  99 00 ..     STA $..00,y      a,y  
0607 02 | 0605  99 00 02     STA $0200,y      a,y  
0608 48 | 0608  48           PHA              s    
0609 e8 | 0609  E8           INX              i    
060A c8 | 060A  C8           INY              i    
060B c0 | 060B  C0 ..        CPY #$..         #    
060C 10 | 060B  C0 10        CPY #$10         #    
060D d0 | 060D  D0 ..        BNE $....        r    
060E f5 | 060D  D0 F5        BNE $0604        r    
060F 68 | 060F  68           PLA              s    
0610 99 | 0610  99 .. ..     STA $....,y      a,y  
0611 00 | 0610  99 00 ..     STA $..00,y      a,y  
0612 02 | 0610  99 00 02     STA $0200,y      a,y  
0613 c8 | 06