In [None]:
# TASK 3 – PS-PL GPIO CALCULATOR
#
# hw:
#   - ZYNQ PS + PL
#   - EMIO GPIO, 52-bit wide
#   - RTL: task3.v
#
# GPIO bit layout (LSB → MSB):
#   [23:0]   : inputs passthrough (in0-in1-in2 packed)
#   [47:24]  : output
#   [48]     : done
#   [51:49]  : opcode
#
# author: Alp Bolukbasi
#
# note: comments are intentionally explicit and verbose to understand each non-obvious
#       statement, and why it is required for controlling GPIO via MMIO on the PYNQ-z2.

from pynq import Overlay, MMIO
# - Overlay lets you load a bitstream (.bit) and access the overlay object
# - MMIO gives direct memory-mapped io access to physical peripheral registers

from dataclasses import dataclass
# dataclass used for structured results. although its not necessary

import time

# load the bitstream overlay into the PS this instantiates the design in the running linux image.
# overlay("task3.bit") will find the bit file in the current working directory on the board.
overlay = Overlay("task3.bit")

# gpio register offsets and bank explanation
# -------------------------------------------------------------------------
# the ZYNQ GPIO controller is memory-mapped at a fixed physical address.
# registers are accessed by MMIO using offsets from this base address.
# bank 2 corresponds to EMIO GPIO 0..31 (lower 32 bits of the 52-bit word)
# bank 3 corresponds to EMIO GPIO 32..63 (upper bits of the 52-bit word)
# note: Vivado's GUI shows MIO bank groupings differently; EMIO banks are fixed.

GPIO_BASE = 0xe000a000
# base address of the GPIO controller in the ZYNQ address map. this is the physical address used for MMIO.

DATA2_RO  = 0x068  # bank 2 input data (EMIO 0-31)
# offset to read bank 2's input data (read-only view of current pin levels)
# reading this returns a 32-bit value where bit0 maps to EMIO0, bit1->EMIO1, ... bit31->EMIO31

DATA3_RO  = 0x06c  # bank 3 input data (EMIO 32-63)
# offset to read bank 3's input data. this contains the higher bits of the 52-bit word.

DATA2     = 0x048  # bank 2 output data
# offset to write the output data for bank 2 (drives pins if direction + OEN are enabled)
# writing bit n sets the logic level driven on that pin (when configured as output)

DATA3     = 0x04c  # bank 3 output data
# offset to write the output data for bank 3 (upper 32-bit chunk)

DIRM2     = 0x284  # direction mode bank 2
# direction register for bank 2: 1 = pin is an output, 0 = pin is an input
# you must set the direction to output before writes to DATA2 will drive physical pins

DIRM3     = 0x2c4  # direction mode bank 3
# direction register for bank 3

OEN2      = 0x288  # output enable bank 2
# output enable register for bank 2: 1 = output driver enabled (pin actively driven)
# even if direction bit is 1, the physical pin might be tri-stated unless OEN is set

OEN3      = 0x2c8  # output enable bank 3
# output enable register for bank 3


# bit layout constants (these are protocol-level constants agreed with the PL)
# -------------------------------------------------------------------------
# these constants define where fields live inside the 52-bit GPIO word used as the PS-PL protocol.
# they are not hardware-specific registers, they are the application protocol for task3.
RESULT_LSB = 24
# the least-significant bit position of the 'result' field in the 52-bit word.
# result occupies bits [47:24], so to extract result shift right by 24 and mask 24 bits.

DONE_BIT   = 48
# the 'done' flag is at bit index 48. reading this bit indicates that PL finished the operation.

OPCODE_LSB = 49
# the opcode field starts at bit 49 and is 3 bits wide (49,50,51).

MASK_24    = (1 << 24) - 1
# mask value for a 24-bit field (0x00ffffff). used to keep/limit the result and input packing.




MASK_3     = (1 << 3)  - 1
# 3-bit mask (0x7) for opcode field validation and packing.


# MMIO initialization and GPIO configuration: map registers and make pins driveable
# -------------------------------------------------------------------------
# create an MMIO object that maps GPIO controller registers into process virtual memory.
# the size passed (0x1000) is the window size mapped; it must cover the offsets we use.
mmio = MMIO(GPIO_BASE, 0x1000)

# set bank 2 direction to all-ones: configure all 32 pins in bank 2 as outputs.
# this is a coarse configuration that ensures the PS can drive EMIO0..EMIO31
mmio.write(DIRM2, 0xffffffff)

# set bank 2 output enable to all-ones: activate the output drivers electrically.
# direction establishes intent, OEN actually enables the transistor that drives the pin.
mmio.write(OEN2,  0xffffffff)

# set bank 3 direction to all-ones: configure EMIO32..EMIO63 as outputs.
mmio.write(DIRM3, 0xffffffff)

# set bank 3 output enable to all-ones.
mmio.write(OEN3,  0xffffffff)


# structured return type for results
# -------------------------------------------------------------------------
@dataclass
class CalcResult:
    # simple container that makes results explicit and easy to print/inspect
    opcode: int
    result: int
    done: bool

# main helper: execute_calculation
# -------------------------------------------------------------------------
def execute_calculation(opcode, in0, in1, in2):
    # execute a single calculation by:
    #  1) packing inputs into the protocol word
    #  2) writing the 52-bit word to the two bank registers
    #  3) polling the readback registers until the done bit is set
    #  4) extracting and returning the result
    #
    # note: inputs are masked to 8 bits each because the task specifies 8-bit operands
    #       the lower 24 bits passed are in the order (in2<<16) | (in1<<8) | in0.

    # a. pack inputs into a 24-bit combined field (lower 24 bits of the protocol word).
    #    masking each input ensures we only take the least-significant 8 bits of each value.
    inputs_packed = ((in2 & 0xff) << 16) | ((in1 & 0xff) << 8) | (in0 & 0xff)

    # b. build the full 52-bit word according to the agreed layout:
    #    - bits [23:0] = inputs_packed
    #    - bits [51:49] = opcode (3 bits)
    #    higher bits not used by the application are left zero implicitly.
    word_out = (inputs_packed & MASK_24) | ((opcode & MASK_3) << OPCODE_LSB)
    
    
    
    
    # note: Python integers are unbounded; the protocol expects only 52 meaningful bits.

    # c. write the 52-bit word into the GPIO controller by splitting into two 32-bit writes:
    #    - the controller is organized in 32-bit registers; lower half goes to bank2 DATA register,
    #      upper half goes to bank3 DATA register.
    #    - we mask and shift accordingly because mmio.write takes 32-bit values.
    mmio.write(DATA2, word_out & 0xffffffff)
    # write lower 32 bits (bits [31:0]) to bank 2 data register; this covers the protocol's lower 24 bits.
    mmio.write(DATA3, (word_out >> 32) & 0xffffffff)
    # write upper 32 bits (bits [63:32]) to bank 3 data register; for our 52-bit word only lower bits used.

    # d. poll the read registers to wait for the 'done' flag set by the PL.
    #    use a timeout to avoid locking the python process indefinitely if the hardware misbehaves.
    start_time = time.time()
    while (time.time() - start_time) < 2.0:  # 2 second timeout
        # read the current driven/input levels from the hardware using the read-only registers.
        # read DATA2_RO to get the lower 32 bits back (pins 0..31)
        low_bits = mmio.read(DATA2_RO)
        # read DATA3_RO to get the upper 32 bits back (pins 32..63)
        high_bits = mmio.read(DATA3_RO)

        # reconstruct the full 64-bit read value (we only use lower 52 bits)
        # shifting high_bits by 32 moves them into their correct positions.
        full_word = (high_bits << 32) | (low_bits & 0xffffffff)
        

        # check the 'done' flag at DONE_BIT; shift right then mask the LSB
        done = (full_word >> DONE_BIT) & 0x1
        if done:
            # extract result field by shifting the result LSB into bit 0 and masking 24 bits
            result = (full_word >> RESULT_LSB) & MASK_24
            # return a structured result object with opcode, extracted result and done=true
            return CalcResult(opcode, result, True)

    # if timeout expired return done false and result zero. caller can treat as error or retry.
    return CalcResult(opcode, 0, False) 

# functional tests
# each tuple: (opcode, in0, in1, in2)
test_cases = [
    (0,  3,  5,  7),   # opcode 0
    (1, 10,  4,  5),   # opcode 1
    (2,  8,  1,  6),   # opcode 2
    (3, 15,  3,  1),   # opcode 3
    (4,  9,  0,  0),   # opcode 4
    (5,  6,  2,  4),   # opcode 5
    (6, 12,  3,  2),   # opcode 6
    (7,  3,  2,  1),   # opcode 7
]

print("Task 3 alu verification (all opcodes):")
print("\n")
for op, i0, i1, i2 in test_cases:
    res = execute_calculation(op, i0, i1, i2)
    status = "SUCCESS" if res.done else "TIMEOUT (check hardware wires)"
    print(
        f"opcode {op}: "
        f"in0={i0}, in1={i1}, in2={i2} -> "
        f"result={res.result} | status={status}"
    )
    