In [56]:
import os
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
import math
import json
import re
from copy import deepcopy

import torch
import torch.nn as nn
import torch.nn.functional as F

import random
np.random.seed(42)
random.seed(42)

In [57]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 1234

# Python & NumPy
random.seed(SEED)
np.random.seed(SEED)

# PyTorch
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# Determinism flags
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [58]:
encoder_input = torch.randn(1, 512, device=device)
bottleneck_input = torch.randn(256, 32, device=device)
decoder_first_input = torch.randn(256, 32, device=device)
decoder_last_input = torch.randn(64, 256, device=device)
out_input = torch.randn(16, 512, device=device)

In [59]:
Q_CONFIGS = {
    "Q10.10": dict(frac_bits=10, int_bits=10, dtype=np.int32),
    "Q9.14": dict(frac_bits=14, int_bits=10, dtype=np.int32),
}

TYPE =  "Q9.14" # "Q4.12", "Q10.10","Q9.14", or "FLOAT"

cfg = Q_CONFIGS[TYPE]
FRAC_BITS = cfg["frac_bits"]
INT_BITS  = cfg["int_bits"]
DTYPE     = cfg["dtype"]

def float_to_q(x, frac_bits, int_bits, dtype):
    scale = 1 << frac_bits
    total_bits = int_bits + frac_bits
    min_val = -(1 << (total_bits - 1))
    max_val = (1 << (total_bits - 1)) - 1

    xq = np.round(x * scale)
    xq = np.clip(xq, min_val, max_val)
    return xq.astype(dtype)

def q_to_float(x, frac_bits):
    return x.astype(np.float32) / (1 << frac_bits)

In [60]:
data_path = os.path.abspath(f"../models/input_sample_{TYPE}/")
os.makedirs(data_path, exist_ok=True)

In [61]:
def write_tensor_mem(tensor, name, path, y_first=True):
    """
    tensor: torch.Tensor, shape [x, y]
    writes: name.mem
    format: HEX (Q10.10, int32)

    order:
      y_first=True  → y outer, x inner  (y → x)
      y_first=False → x outer, y inner  (x → y)
    """
    assert tensor.ndim == 2, f"{name} must be 2D"

    # Move to CPU → numpy float
    arr_f = tensor.detach().cpu().numpy().astype(np.float32)

    # Quantize to Q10.10
    arr_q = float_to_q(
        arr_f,
        frac_bits=FRAC_BITS,
        int_bits=INT_BITS,
        dtype=DTYPE      # np.int32
    )

    X, Y = arr_q.shape
    out_hex_path = os.path.join(path, f"{name}_hex.mem")
    out_raw_path = os.path.join(path, f"{name}_raw.mem")

    with open(out_hex_path, "w") as f_hex, open(out_raw_path, "w") as f_raw:
        if y_first:
            # y → x
            for x in range(X):
                for y in range(Y):
                    v = int(arr_q[x, y])
                    f_hex.write(f"{v & 0xFFFFFFFF:08X}\n")
                    f_raw.write(f"{v}\n")
        else:
            # x → y
            for y in range(Y):
                for x in range(X):
                    v = int(arr_q[x, y])
                    f_hex.write(f"{v & 0xFFFFFFFF:08X}\n")
                    f_raw.write(f"{v}\n")

    order_str = "y→x" if y_first else "x→y"
    print(
        f"Wrote {name}.mem  shape=({X},{Y})  "
        f"entries={X*Y}  order={order_str}"
    )

In [62]:
write_tensor_mem(encoder_input,        "encoder_input",        data_path)
write_tensor_mem(bottleneck_input,     "bottleneck_input",     data_path)
write_tensor_mem(decoder_first_input,  "decoder_first_input",  data_path)
write_tensor_mem(decoder_last_input,   "decoder_last_input",   data_path, y_first=False)
write_tensor_mem(out_input,            "out_input",            data_path)

Wrote encoder_input.mem  shape=(1,512)  entries=512  order=y→x
Wrote bottleneck_input.mem  shape=(256,32)  entries=8192  order=y→x
Wrote decoder_first_input.mem  shape=(256,32)  entries=8192  order=y→x
Wrote decoder_last_input.mem  shape=(64,256)  entries=16384  order=x→y
Wrote out_input.mem  shape=(16,512)  entries=8192  order=y→x


# Group Input

In [63]:
def run_automated_export(seeds, base_data_path):
    """
    Iterates through a list of seeds, generates tensors, and exports them.
    """
    for seed in seeds:
        print(f"\n--- Processing Seed: {seed} ---")
        
        # 1. Set Seeds for Reproducibility
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        data_path = os.path.join(base_data_path, f"input_sample_{seed}/")
        os.makedirs(data_path, exist_ok=True)

        # 3. Generate Tensors
        # Note: These are re-generated per seed to ensure different random values
        tensors_to_process = [
            ("decoder_last_input", torch.randn(64, 256, device=device), False), # x->y
        ]

        # 4. Export each tensor
        for name, tensor, y_first in tensors_to_process:
            # Append seed to the filename as requested
            seeded_name = f"{name}_{seed}"
            write_tensor_mem(tensor, seeded_name, data_path, y_first=y_first)

In [64]:
seed_arr = [1234, 420, 67, 69, 13523100, 13223051, 13223075, 42, 21, 20]
group_data_path = os.path.abspath(f"../models/input_sample_{TYPE}_{len(seed_arr)}/")
os.makedirs(group_data_path, exist_ok=True)

run_automated_export(seed_arr, group_data_path)


--- Processing Seed: 1234 ---
Wrote decoder_last_input_1234.mem  shape=(64,256)  entries=16384  order=x→y

--- Processing Seed: 420 ---
Wrote decoder_last_input_420.mem  shape=(64,256)  entries=16384  order=x→y

--- Processing Seed: 67 ---
Wrote decoder_last_input_67.mem  shape=(64,256)  entries=16384  order=x→y

--- Processing Seed: 69 ---
Wrote decoder_last_input_69.mem  shape=(64,256)  entries=16384  order=x→y

--- Processing Seed: 13523100 ---
Wrote decoder_last_input_13523100.mem  shape=(64,256)  entries=16384  order=x→y

--- Processing Seed: 13223051 ---
Wrote decoder_last_input_13223051.mem  shape=(64,256)  entries=16384  order=x→y

--- Processing Seed: 13223075 ---
Wrote decoder_last_input_13223075.mem  shape=(64,256)  entries=16384  order=x→y

--- Processing Seed: 42 ---
Wrote decoder_last_input_42.mem  shape=(64,256)  entries=16384  order=x→y

--- Processing Seed: 21 ---
Wrote decoder_last_input_21.mem  shape=(64,256)  entries=16384  order=x→y

--- Processing Seed: 20 ---
Wr

# Check

In [65]:
# --- LOAD ORIGINAL ---
orig = encoder_input.detach().cpu().numpy().astype(np.float32)
X, Y = orig.shape

# --- READ BACK MEM (HEX) ---
mem_vals = []
with open(os.path.join(data_path, "encoder_input_hex.mem")) as f:
    for line in f:
        # parse hex → uint32
        mem_vals.append(int(line.strip(), 16))

# reinterpret as signed int32 (two’s complement)
mem_vals = np.array(mem_vals, dtype=np.uint32).view(np.int32)

# --- RECONSTRUCT [x, y] USING SAME ORDER (y first, then x) ---
recon_q = np.zeros((X, Y), dtype=np.int32)

idx = 0
for y in range(Y):
    for x in range(X):
        recon_q[x, y] = mem_vals[idx]
        idx += 1

# --- CONVERT BACK TO FLOAT ---
recon_f = q_to_float(recon_q, FRAC_BITS)

# --- ERROR METRICS ---
abs_err = np.abs(orig - recon_f)

print("===== SANITY CHECK =====")
print("Original min/max:", orig.min(), orig.max())
print("Reconst  min/max:", recon_f.min(), recon_f.max())
print("Max abs error   :", abs_err.max())
print("Mean abs error  :", abs_err.mean())

# --- PRINT FIRST 10 VALUES (MEM ORDER) ---
print("\nFirst 10 entries (mem order):")
for i in range(10):
    y = i // X
    x = i % X
    print(
        f"[{x},{y}]  "
        f"orig={orig[x,y]:+.6f}  "
        f"Q={recon_q[x,y]:6d}  "
        f"recon={recon_f[x,y]:+.6f}"
    )


===== SANITY CHECK =====
Original min/max: -2.780352 2.9948635
Reconst  min/max: -2.7803345 2.994873
Max abs error   : 3.0308962e-05
Mean abs error  : 1.4849333e-05

First 10 entries (mem order):
[0,0]  orig=-1.616490  Q=-26485  recon=-1.616516
[0,1]  orig=+0.568455  Q=  9314  recon=+0.568481
[0,2]  orig=-0.510225  Q= -8360  recon=-0.510254
[0,3]  orig=-0.911339  Q=-14931  recon=-0.911316
[0,4]  orig=-1.155516  Q=-18932  recon=-1.155518
[0,5]  orig=-0.226151  Q= -3705  recon=-0.226135
[0,6]  orig=-1.289133  Q=-21121  recon=-1.289124
[0,7]  orig=+1.065382  Q= 17455  recon=+1.065369
[0,8]  orig=-0.716660  Q=-11742  recon=-0.716675
[0,9]  orig=-0.533334  Q= -8738  recon=-0.533325
