In [17]:
import os
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
import math
import json
import re
from copy import deepcopy

import torch
import torch.nn as nn
import torch.nn.functional as F

import random
np.random.seed(42)
random.seed(42)

In [18]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 1234

# Python & NumPy
random.seed(SEED)
np.random.seed(SEED)

# PyTorch
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# Determinism flags
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [19]:
encoder_input = torch.randn(1, 512, device=device)
bottleneck_input = torch.randn(256, 32, device=device)
decoder_first_input = torch.randn(256, 32, device=device)
decoder_last_input = torch.randn(64, 256, device=device)
out_input = torch.randn(16, 512, device=device)

In [20]:
data_path = os.path.abspath("../models/input_sample/")
os.makedirs(data_path, exist_ok=True)

In [21]:
Q_CONFIGS = {
    "Q10.10": dict(frac_bits=10, int_bits=10, dtype=np.int32),
}
TYPE = "Q10.10"
cfg = Q_CONFIGS[TYPE]
FRAC_BITS = cfg["frac_bits"]
INT_BITS  = cfg["int_bits"]
DTYPE     = cfg["dtype"]

def float_to_q(x, frac_bits, int_bits, dtype):
    scale = 1 << frac_bits
    total_bits = int_bits + frac_bits
    min_val = -(1 << (total_bits - 1))
    max_val = (1 << (total_bits - 1)) - 1

    xq = np.round(x * scale)
    xq = np.clip(xq, min_val, max_val)
    return xq.astype(dtype)

def q_to_float(x, frac_bits):
    return x.astype(np.float32) / (1 << frac_bits)

In [22]:
def write_tensor_mem(tensor, name, path):
    """
    tensor: torch.Tensor, shape [x, y]
    writes: name.mem
    order: y first, then x
    format: HEX (Q10.10, int32)
    """
    assert tensor.ndim == 2, f"{name} must be 2D"

    # Move to CPU → numpy float
    arr_f = tensor.detach().cpu().numpy().astype(np.float32)

    # Quantize to Q10.10
    arr_q = float_to_q(
        arr_f,
        frac_bits=FRAC_BITS,
        int_bits=INT_BITS,
        dtype=DTYPE      # np.int32
    )

    X, Y = arr_q.shape
    out_path = os.path.join(path, f"{name}.mem")

    with open(out_path, "w") as f:
        for y in range(Y):
            for x in range(X):
                v = int(arr_q[x, y]) & 0xFFFFFFFF
                f.write(f"{v:08X}\n")

    print(f"Wrote {name}.mem  shape=({X},{Y})  entries={X*Y}")

In [23]:
write_tensor_mem(encoder_input,        "encoder_input",        data_path)
write_tensor_mem(bottleneck_input,     "bottleneck_input",     data_path)
write_tensor_mem(decoder_first_input,  "decoder_first_input",  data_path)
write_tensor_mem(decoder_last_input,   "decoder_last_input",   data_path)
write_tensor_mem(out_input,            "out_input",            data_path)

Wrote encoder_input.mem  shape=(1,512)  entries=512
Wrote bottleneck_input.mem  shape=(256,32)  entries=8192
Wrote decoder_first_input.mem  shape=(256,32)  entries=8192
Wrote decoder_last_input.mem  shape=(64,256)  entries=16384
Wrote out_input.mem  shape=(16,512)  entries=8192


# Check

In [24]:
# --- LOAD ORIGINAL ---
orig = encoder_input.detach().cpu().numpy().astype(np.float32)
X, Y = orig.shape

# --- READ BACK MEM (HEX) ---
mem_vals = []
with open(os.path.join(data_path, "encoder_input.mem")) as f:
    for line in f:
        # parse hex → uint32
        mem_vals.append(int(line.strip(), 16))

# reinterpret as signed int32 (two’s complement)
mem_vals = np.array(mem_vals, dtype=np.uint32).view(np.int32)

# --- RECONSTRUCT [x, y] USING SAME ORDER (y first, then x) ---
recon_q = np.zeros((X, Y), dtype=np.int32)

idx = 0
for y in range(Y):
    for x in range(X):
        recon_q[x, y] = mem_vals[idx]
        idx += 1

# --- CONVERT BACK TO FLOAT ---
recon_f = q_to_float(recon_q, FRAC_BITS)

# --- ERROR METRICS ---
abs_err = np.abs(orig - recon_f)

print("===== SANITY CHECK =====")
print("Original min/max:", orig.min(), orig.max())
print("Reconst  min/max:", recon_f.min(), recon_f.max())
print("Max abs error   :", abs_err.max())
print("Mean abs error  :", abs_err.mean())

# --- PRINT FIRST 10 VALUES (MEM ORDER) ---
print("\nFirst 10 entries (mem order):")
for i in range(10):
    y = i // X
    x = i % X
    print(
        f"[{x},{y}]  "
        f"orig={orig[x,y]:+.6f}  "
        f"Q={recon_q[x,y]:6d}  "
        f"recon={recon_f[x,y]:+.6f}"
    )


===== SANITY CHECK =====
Original min/max: -2.780352 2.9948635
Reconst  min/max: -2.7802734 2.9951172
Max abs error   : 0.00048825145
Mean abs error  : 0.00023887216

First 10 entries (mem order):
[0,0]  orig=-1.616490  Q= -1655  recon=-1.616211
[0,1]  orig=+0.568455  Q=   582  recon=+0.568359
[0,2]  orig=-0.510225  Q=  -522  recon=-0.509766
[0,3]  orig=-0.911339  Q=  -933  recon=-0.911133
[0,4]  orig=-1.155516  Q= -1183  recon=-1.155273
[0,5]  orig=-0.226151  Q=  -232  recon=-0.226562
[0,6]  orig=-1.289133  Q= -1320  recon=-1.289062
[0,7]  orig=+1.065382  Q=  1091  recon=+1.065430
[0,8]  orig=-0.716660  Q=  -734  recon=-0.716797
[0,9]  orig=-0.533334  Q=  -546  recon=-0.533203
