In [1]:
# Import necessary Library
import numpy as np
import random
import os
import sys
from FPU import cmul, cadd
from utils import read_binary, binary_to_fp16, store_binary, generate_fp16

In [2]:
def generate_matrix(rows, cols, complex=False, seed=123):
    """
    Generate an (rows x cols) matrix using generate_fp16().
    """
    vec = generate_fp16(size=rows * cols, complex=complex, seed=seed)
    mat = []
    idx = 0
    for _ in range(rows):
        row = []
        for _ in range(cols):
            row.append(vec[idx])
            idx += 1
        mat.append(row)
    return mat

def matmul(A_mat, B_mat, use_complex=True):
    """
    Multiply A(MxK) by B(KxN) column-by-column.
    For each column of B, produce the corresponding column of C.
    Complex multiply/add uses cmul and cadd.

    Args:
        A_mat: list of list of elements (real or [re, im])
        B_mat: list of list of elements (real or [re, im])
        use_complex: True if A and B are complex-valued

    Returns:
        C_mat: list of list, same format as A/B (real or complex),
               with shape MxN
    """
    M = len(A_mat)
    K = len(A_mat[0])
    N = len(B_mat[0])

    print(M, K, N)

    C_mat = []
    for i in range(M):
        # initialize each row as empty list
        C_mat.append([])

    for col in range(N):
        # process one column of B at a time
        for i in range(M):
            if use_complex:
                acc = [np.float16(0.0), np.float16(0.0)]
                for k in range(K):
                    prod = cmul(A_mat[i][k], B_mat[k][col])
                    acc = cadd(acc, prod)
                C_mat[i].append([np.float16(acc[0]), np.float16(acc[1])])
            else:
                acc = np.float16(0.0)
                for k in range(K):
                    acc = np.float16(acc + np.float16(A_mat[i][k] * B_mat[k][col]))
                C_mat[i].append(np.float16(acc))
    return C_mat

def flatten_matrix_row_major(mat, complex=False):
    """
    Row-major flatten:
        index(A[i][j]) = i*cols + j
    For complex=True, each element is [re, im] (np.float16).
    """
    flat = []
    for row in mat:
        for x in row:
            flat.append(x)
    return flat

def flatten_matrix_col_major(mat, complex=False):
    """
    Column-major flatten:
        index(A[i][j]) = j*rows + i
    For complex=True, each element is [re, im] (np.float16).
    """
    flat = []
    rows = len(mat)
    cols = len(mat[0])

    for c in range(cols):
        for r in range(rows):
            flat.append(mat[r][c])
    return flat

# Single PE

## Vector Vector Multiplication

In [100]:
M = 1   # Rows in A
K = 128   # Cols in A / Rows in B
N = 1   # Cols in B
use_complex = True  # Set False for real-valued matmul

A_mat = generate_matrix(M, K, complex=use_complex, seed=101)
B_mat = generate_matrix(K, N, complex=use_complex, seed=202)

C_mat = matmul(A_mat, B_mat, use_complex=use_complex)

1 128 1


In [97]:
A_flat_row = flatten_matrix_row_major(A_mat, complex=use_complex)
B_flat_col = flatten_matrix_col_major(B_mat, complex=use_complex)
C_mat

[[[-22.42, 45.88]]]

In [63]:
A_flat_row = flatten_matrix_row_major(A_mat)
store_binary(A_flat_row, "../PE/DATA/mm_input.txt")
store_binary(B_flat_col, "../PE/DATA/mm_weight.txt")
store_binary(C_mat[0], "../PE/DATA/mm_output.txt")

In [117]:
B_mat[0]

[[1.292, 0.9346], [0.2283, -0.4058]]

## Vector & Two column Multiplication in the single PE

In [3]:
M = 1   # Rows in A
K = 128   # Cols in A / Rows in B
N = 2   # Cols in B
use_complex = True  # Set False for real-valued matmul

A_mat = generate_matrix(M, K, complex=use_complex, seed=101)
B_mat = generate_matrix(K, N, complex=use_complex, seed=202)

C_mat = matmul(A_mat, B_mat, use_complex=use_complex)

A_flat_row = flatten_matrix_row_major(A_mat, complex=use_complex)
B_flat_col = flatten_matrix_col_major(B_mat, complex=use_complex)
C_flat_col = flatten_matrix_row_major(C_mat, complex=use_complex)

1 128 2


In [93]:
C_mat

[[[-10.57, 81.3], [0.8574, 57.66]]]

In [84]:
C_flat_col

[[-10.57, 81.3], [0.8574, 57.66]]

In [76]:
store_binary(A_flat_row, "../PE/DATA/mm2_input.txt")
store_binary(B_flat_col, "../PE/DATA/mm2_weight.txt")
store_binary(C_flat_col, "../PE/DATA/mm2_output.txt")

# Multiple PE

# Vector Matrix Multiplication (each row of PE array)

In [3]:
# For B Matrix, two column go to each PE

M = 1       # Rows in A
K = 128     # Cols in A / Rows in B
N = 22      # Cols in B
use_complex = True  # Set False for real-valued matmul

A_mat = generate_matrix(M, K, complex=use_complex, seed=101)
B_mat = generate_matrix(K, N, complex=use_complex, seed=202)

print(len(A_mat), len(A_mat[0]))
print(len(B_mat), len(B_mat[0]))

1 128
128 22


In [4]:
C_mat = matmul(A_mat, B_mat, use_complex=use_complex)

C_mat

def save_Cmat_outputs(C_mat, out_dir="../PE/DATA"):
    """
    Split C_mat (M x N) into 11 PE outputs.
    Each PE gets 2 consecutive columns concatenated together.
    For PE i, output format:
        [col0 (M), col1 (M)]
        => length = 2*M
    """
    M = len(C_mat)             # rows
    N = len(C_mat[0])          # cols
    assert N == 22, "C_mat must have 22 columns (for 11 PEs)."
    num_PEs = N // 2

    for pe in range(num_PEs):
        c0 = 2 * pe
        c1 = 2 * pe + 1

        # Extract two columns
        col0 = [C_mat[r][c0] for r in range(M)]
        col1 = [C_mat[r][c1] for r in range(M)]

        # Concatenate (no zero padding)
        payload = col0 + col1   # length = 2*M

        # Save file
        out_path = f"{out_dir}/mm_{pe}_output.txt"
        store_binary(payload, out_path)

save_Cmat_outputs(C_mat)

1 128 22


In [5]:
# Assumptions:
# - B_mat is a K x N 2D list
# - Each element is either:
#     (a) real-valued: a float (use_complex=False), or
#     (b) complex-valued: [re, im] pair (use_complex=True)
# - store_binary(vec, path) writes a 1D list `vec` to `path`
# - N == 22 so there are 11 PEs, and each PE gets two consecutive columns.

def save_Bmat_as_weights_for_PEs(B_mat, use_complex=True, out_dir="../PE/DATA"):
    """
    Split B_mat (K x N) into 11 PE weight files.
    For PE i, take columns (2*i) and (2*i+1) and store:
        [col0 (K), zeros (K), col1 (K), zeros (K)]  --> total length = 4*K
    """
    # --- Basic shape checks ---
    K = len(B_mat)                 # rows
    assert K > 0, "B_mat must have at least 1 row."
    N = len(B_mat[0])              # cols
    for r in range(1, K):
        assert len(B_mat[r]) == N, "All rows of B_mat must have the same length."

    assert N == 22, "This helper expects N == 22 (11 PEs, 2 cols each)."
    num_PEs = N // 2
    assert num_PEs == 11, "N must be 22 so that there are exactly 11 PEs."

    # --- Zero template per data type ---
    # For complex -> [0.0, 0.0], for real -> 0.0
    def make_zero():
        return [0.0, 0.0] if use_complex else 0.0

    # --- Build and store payload per PE ---
    for pe in range(num_PEs):
        c0 = 2 * pe
        c1 = 2 * pe + 1

        # Extract two columns
        col0 = [B_mat[r][c0] for r in range(K)]   # length K
        col1 = [B_mat[r][c1] for r in range(K)]   # length K

        print(r, c0, c1)

        # Build zeros (K entries), making fresh objects (no shared refs)
        zeros = [make_zero() for _ in range(K)]

        # Concatenate as: col0, zeros, col1, zeros  -> total length 4*K = 512 (when K=128)
        # payload = col0 + zeros + col1 + zeros
        payload = col0 + col1 + col1 + col0

        # File path: ../PE/DATA/mm{0..10}_weight.txt
        out_path = f"{out_dir}/mm_{pe}_weight.txt"
        store_binary(payload, out_path)


In [6]:
save_Bmat_as_weights_for_PEs(B_mat)

127 0 1
127 2 3
127 4 5
127 6 7
127 8 9
127 10 11
127 12 13
127 14 15
127 16 17
127 18 19
127 20 21


In [7]:

A_mat_flat = flatten_matrix_row_major(A_mat)
# for i in range(11):
#     store_binary(A_mat_flat, f'../PE/DATA/mm_{i}_input.txt', zero_padding=i)

for i in range(1, 11):
    print(i)
    store_binary(A_mat_flat, f'../PE/DATA/mm_{i}_input.txt', zero_padding=i-1)

1
2
3
4
5
6
7
8
9
10
