In [None]:
from pynq import Overlay, allocate
import numpy as np

# RTL-defined Matrix Parameters
I_OUTER_DIM = 8
W_OUTER_DIM = 6
INNER_DIM   = 4
BLOCK_SIZE  = 2
NUM_CORES   = 2

# --- FPGA and Design Parameters ---
INPUT_WIDTH_BITS = 64 * NUM_CORES
WEIGHT_WIDTH_BITS = 64
OUTPUT_WIDTH_BITS = 64 * NUM_CORES
DATA_UNIT_BITS = 32  # uint32
WORDS_PER_OUTPUT = OUTPUT_WIDTH_BITS // DATA_UNIT_BITS

# Derived output size
ROWS = I_OUTER_DIM // BLOCK_SIZE
COLS = W_OUTER_DIM // BLOCK_SIZE
TOTAL_OUTPUT_WORDS = (ROWS * COLS) // NUM_CORES
OUTPUT_BUFFER_WORDS = TOTAL_OUTPUT_WORDS * WORDS_PER_OUTPUT

# File paths
INPUT_MEM_FILE = "i.mem"
WEIGHT_MEM_FILE = "w.mem"
OUTPUT_MEM_FILE = "o.mem"

# --- Load Overlay ---
overlay = Overlay("/home/xilinx/jupyter_notebooks/Matrix_Multiplier/design_1.bit")
print("Overlay loaded.")

In [None]:
dma_i = overlay.axi_dma_0
dma_w = overlay.axi_dma_1
dma_o = overlay.axi_dma_2

In [None]:
# --- Load HEX .mem File ---
def load_mem_file(filename, word_bits):
    with open(filename, "r") as f:
        hex_data = f.read().replace("\n", "").strip()
    word_hex_len = word_bits // 4
    chunks = [hex_data[i:i+word_hex_len] for i in range(0, len(hex_data), word_hex_len)]
    data = []
    for word in chunks:
        word = word.zfill(word_hex_len)
        for i in range(0, len(word), 8):
            data.append(int(word[len(word)-8-i:len(word)-i], 16))
    return np.array(data, dtype=np.uint32)

In [None]:
# --- Allocate Buffers ---
input_buffer = allocate(shape=input_data.shape, dtype=np.uint32)
weight_buffer = allocate(shape=weight_data.shape, dtype=np.uint32)
output_buffer = allocate(shape=(OUTPUT_BUFFER_WORDS,), dtype=np.uint32)

np.copyto(input_buffer, input_data)
np.copyto(weight_buffer, weight_data)
input_buffer.flush()
weight_buffer.flush()

In [None]:
# --- Start Transfers ---
dma_i.sendchannel.transfer(input_buffer)
dma_w.sendchannel.transfer(weight_buffer)

In [None]:
dma_i.sendchannel.wait()
dma_w.sendchannel.wait()

In [None]:
dma_o.recvchannel.transfer(output_buffer)

In [None]:
dma_o.recvchannel.wait()
output_buffer.invalidate()

In [None]:
# --- Reconstruct Output Data ---
output_chunks = output_buffer.reshape((-1, WORDS_PER_OUTPUT))
output_words = [
    ''.join(f"{x:08x}" for x in reversed(chunk)) for chunk in output_chunks
]

print("Output received (128-bit hex):")
for i, word in enumerate(output_words):
    print(f"Output {i}: {word}")


In [None]:
with open(OUTPUT_MEM_FILE, "w") as f:
    for word in output_words:
        f.write(word + "\n")

print(f"Saved output to {OUTPUT_MEM_FILE}")