In [2]:
import numpy as np

def build_sincos_position_embedding(L, E, filename="pos_embed.mem"):
    """
    L: number of tokens (including [CLS], if any)
    E: embedding dimension (must be even)
    """
    assert E % 2 == 0, "Embedding dim must be even"

    # 1. compute the raw float32 sin/cos table
    pe = np.zeros((L, E), dtype=np.float32)
    position = np.arange(L)[:, None]  # shape (L,1)
    div_term = np.exp(np.arange(0, E, 2) * -(np.log(10000.0) / E))
    pe[:, 0::2] = np.sin(position * div_term)
    pe[:, 1::2] = np.cos(position * div_term)

    # 2. quantize to signed Q1.15 (int16)
    scale = (1 << 15) - 1
    pe_q = np.round(pe * scale).astype(np.int16)  # values in [-32768..32767]

    # 3. dump to hex memory file, one 16‑bit word per line
    with open(filename, "w") as f:
        for l in range(L):
            for e in range(E):
                q = int(pe_q[l, e])      # Python int in [-32768..32767]
                val = q & 0xFFFF          # now an unsigned 16‑bit value
                f.write(f"{val:04x}\n")

if __name__ == "__main__":
    L = 196   # e.g. 14×14 patches (or 197 with [CLS])
    E = 128
    build_sincos_position_embedding(L, E, "pos_embed.mem")
    print(f"Wrote {L*E} lines to pos_embed.mem")


Wrote 25088 lines to pos_embed.mem


In [3]:
import numpy as np

def build_fp16_sincos_pos_embed(L, E, filename="pos_embed_fp16.mem"):
    """
    L: number of tokens (including [CLS], if any)
    E: embedding dimension (must be even)
    """
    assert E % 2 == 0, "Embedding dim must be even"
    # 1) float32 sin/cos table
    pe = np.zeros((L, E), dtype=np.float32)
    pos = np.arange(L)[:, None]
    div_term = np.exp(np.arange(0, E, 2) * -(np.log(10000.0) / E))
    pe[:, 0::2] = np.sin(pos * div_term)
    pe[:, 1::2] = np.cos(pos * div_term)

    # 2) cast to float16
    pe16 = pe.astype(np.float16)

    # 3) view bits as uint16
    bits = pe16.view(np.uint16)  # same shape (L,E), each 16 bits

    # 4) dump as hex, one word per line
    with open(filename, "w") as f:
        for l in range(L):
            for e in range(E):
                val = int(bits[l, e])  # 0..0xFFFF
                f.write(f"{val:04x}\n")

if __name__ == "__main__":
    L = 196   # e.g. 14×14 patches (or 197 with [CLS])
    E = 128
    build_fp16_sincos_pos_embed(L, E, "pos_embed_fp16.mem")
    print(f"Wrote {L*E} lines to pos_embed_fp16.mem")


Wrote 25088 lines to pos_embed_fp16.mem
