In [1]:
import os
import copy
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import itertools

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential

import hls4ml
from hls4ml.converters.keras_to_hls import parse_default_keras_layer
from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode
from hls4ml.model.attributes import Attribute

import h5py

import qkeras as qk
from qkeras.estimate import print_qstats
from qkeras.utils import model_quantize
from qkeras.utils import quantized_model_dump
from qkeras import QActivation, QDense, QConv2DBatchnorm

# Source the Vivado path
os.environ['PATH'] = os.environ['XILINX_VIVADO'] + '/bin:' + os.environ['PATH']

np.random.seed(0)
tf.random.set_seed(0)


2025-06-06 14:02:34.290735: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-06-06 14:02:34.377266: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-06 14:02:34.380370: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-06-06 14:02:34.380390: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar





Helper functions

In [2]:
def print_quantization_info(model):
    for layer in model.layers:
        print(f"Layer Name: {layer.name}")
        print(f"Type: {layer.__class__.__name__}")
        
        # Helper function to handle both quantizer objects and config dicts
        def process_quantizer(quantizer, prefix=""):
            if quantizer:
                if isinstance(quantizer, dict):
                    # Handle dictionary config
                    class_name = quantizer.get("class_name", "UnknownQuantizer")
                    config = quantizer.get("config", {})
                else:
                    # Handle object with potential get_config()
                    class_name = quantizer.__class__.__name__
                    config = quantizer.get_config() if hasattr(quantizer, "get_config") else {}
                
                print(f"  {prefix}Quantizer: {class_name}")
                print(f"  {prefix}Config: {config}")
            else:
                print(f"  No {prefix}Quantizer")

        # Check for QKeras layers with kernel/bias quantizers
        if isinstance(layer, (qk.QDense, qk.QConv2D, qk.QConv1D, 
                            qk.QConv2DTranspose, qk.QDepthwiseConv2D)):
            # Kernel quantizer
            process_quantizer(layer.kernel_quantizer, "Kernel ")
            
            # Bias quantizer
            process_quantizer(layer.bias_quantizer, "Bias ")
            
            # Activation quantizer
            activation = layer.activation
            if activation:
                if isinstance(activation, dict) or hasattr(activation, "get_config"):
                    process_quantizer(activation, "Activation ")
                else:
                    print(f"  Activation: {activation} (Not Quantized)")
            else:
                print("  No Activation")

        # Check for QActivation layers
        elif isinstance(layer, qk.QActivation):
            process_quantizer(layer.quantizer, "Activation ")
        
        print("-" * 50)

def generate_binary_numbers(n_bits):
    return np.array([''.join(bits) for bits in itertools.product('01', repeat=n_bits)])

def bin_to_int(bin_str):
    int_val = 0
    for i, bit in enumerate(bin_str[::-1]):
        int_val += int(bit)*(2**i)
    return int_val

def bin_to_frac(bin_str):
    frac_val = 0
    for i, bit in enumerate(bin_str):
        frac_val += int(bit)*(2**(-i-1))
    return frac_val

def frac_to_bin(frac, n_bits=8):
    bin_str = ""
    for i in range(n_bits):
        if frac >= 2**(-i-1):
            bin_str += "1"
            frac -= 2**(-i-1)
        else:
            bin_str += "0"
    return bin_str

def hex_str_to_float_frac(hex_str):
    int_num = int(hex_str.replace("\n", "").strip(), 16)
    int_bin_str = np.binary_repr(int_num)
    float_frac = bin_to_frac(int_bin_str)
    return float_frac
    

# 1. Load Keras model

In [3]:
KERAS_DIR = "/home/aelabd/RHEED/keras_models"
RTL_DIR = "/home/aelabd/RHEED/rtl_models"

def dice_loss(y_true, y_pred, delta=0.6):
    error = y_true - y_pred
    is_small = tf.abs(error) <= delta
    squared_loss = 0.5 * tf.square(error)
    linear_loss = delta * (tf.abs(error) - 0.5 * delta)
    return tf.reduce_mean(tf.where(is_small, squared_loss, linear_loss))

fpath_model_keras = os.path.join(KERAS_DIR, "model.keras")
with tf.keras.utils.custom_object_scope({'dice_loss': dice_loss,
                                         'QConv2DBatchnorm': QConv2DBatchnorm,
                                         'QActivation': QActivation,
                                         'QDense': QDense
                                         }):
        model = tf.keras.models.load_model(fpath_model_keras)

print_quantization_info(model)

2025-06-06 14:02:36.977879: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-06-06 14:02:36.978051: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-06-06 14:02:36.978125: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2025-06-06 14:02:36.978177: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2025-06-06 14:02:36.978227: W tensorf

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Layer Name: q_conv2d_batchnorm_5
Type: QConv2DBatchnorm
  Kernel Quantizer: quantized_bits
  Kernel Config: {'bits': 8, 'integer': 0, 'symmetric': 0, 'alpha': 1, 'keep_negative': True, 'use_stochastic_rounding': False, 'qnoise_factor': 1.0}
  Bias Quantizer: quantized_bits
  Bias Config: {'bits': 8, 'integer': 0, 'symmetric': 0, 'alpha': 1, 'keep_negative': True, 'use_stochastic_rounding': False, 'qnoise_factor': 1.0}
  Activation: <function linear at 0x7f9c02478430> (Not Quantized)
--------------------------------------------------
Layer Name: q_activation_8
Type: QActivation
  Activation Quantizer: quantized_relu
  Activation Config: {'bits': 8, 'integer': 2, 'use_sigmoid': 0, 'negative_slope': 0.0, 'use_stochastic_rounding': False, 'relu_upper_bound': None, 'qnoise_factor': 1.0}
--------------------------------------------------
Layer Name: max_pooling2d_5
Type: MaxPooling2D
--------------------------------------------------
Layer Name: q_conv2d_batchnorm_6
Type: QConv2DBatchnorm
  

# 2. Load testbench input data

In [4]:
IN_ROWS = 100
IN_COLS = 160
OUT_ROWS = 48
OUT_COLS = 48
NUM_CROPS = 1
data_dir = f"tb_data_Mono8/{IN_ROWS}x{IN_COLS}_to_{OUT_ROWS}x{OUT_COLS}x{NUM_CROPS}"

CROP_X0 = [0, 13, 112]
CROP_Y0 = [0, 1, 52]
input_data = {}
for y0 in CROP_Y0:
    input_data[f"y1_{y0}"] = {}
    for x0 in CROP_X0:
        crop_data = []
        fpath = os.path.join(data_dir, f"Y1_{y0}/X1_{x0}/HDL_cropnorm_out.txt")
        with open(fpath, "r") as f:
            for line in f.readlines():

                line_float = np.zeros((OUT_COLS,))
                for i, hex_str in enumerate(line.split(" ")):
                    hex_str_clean = hex_str.replace("\n", "").replace(" ", "")
                    if len(hex_str_clean) < 1: continue
                    line_float[i] = hex_str_to_float_frac(hex_str_clean)

                crop_data.append(line_float)
        input_data[f"y1_{y0}"][f"x1_{x0}"] = np.expand_dims(np.expand_dims(np.array(crop_data), 0), 3)

# 3. Generate predictions

# NOTE: Output quantization is <8,2> not <8,0>

In [15]:
def frac_to_bin_pos(frac, n_bits=8):
    bin_str = "0"
    for i in range(n_bits-1):
        if frac >= 2**(-i-1):
            bin_str += "1"
            frac -= 2**(-i-1)
        else:
            bin_str += "0"
    # bin_str = "00" + bin_str[:-2]
    return bin_str

def frac_to_bin_neg(frac, n_bits=8): # Use 2's complement to invert
    bin_str_2c = frac_to_bin_pos(-frac, n_bits=n_bits)
    bin_str_minus_1 = ""
    for b in bin_str_2c:
        if b=="0": bin_str_minus_1 += "1"
        elif b=="1": bin_str_minus_1 += "0"

    int_of_bin_str = bin_to_int(bin_str_minus_1) + 1
    bin_str = np.binary_repr(int_of_bin_str)
    # print(f"frac: {frac}, bin_str_2c: {bin_str_2c}, bin_str_minus_1: {bin_str_minus_1}, int_of_bin_str: {int_of_bin_str}, bin_str: {bin_str}")
    return bin_str

def frac_to_bin(frac, n_bits=8):
    if frac >= 0: return frac_to_bin_pos(frac, n_bits=n_bits)
    else: return frac_to_bin_neg(frac, n_bits=n_bits)
    

In [None]:
pred_data = {}
output_data = {}
for y0 in CROP_Y0:
    pred_data[f"y1_{y0}"] = {}
    for x0 in CROP_X0:
        print(f"y0={y0}, x0={x0}")
        pred = model.predict(input_data[f"y1_{y0}"][f"x1_{x0}"])[0]
        pred_data[f"y1_{y0}"][f"x1_{x0}"] = pred
        fpath_txt = os.path.join(data_dir, f"Y1_{y0}/X1_{x0}/QKeras_pred.txt")
        with open(fpath_txt, "w") as f:
            for val in pred:
                bin_str = frac_to_bin(val)
                int_num = bin_to_int(bin_str)
                hex_str = hex(int_num).replace("0x", "").upper()
                if len(hex_str) < 2: hex_str = "0" + hex_str
                f.write(f"{hex_str} ")
                print(f"val: {val}, bin_str: {bin_str}")

y0=0, x0=0
val: 0.451171875, bin_str: 00111001
val: 0.5185546875, bin_str: 01000010
val: -0.63134765625, bin_str: 10110000
val: -0.43115234375, bin_str: 11001001
val: -0.12060546875, bin_str: 11110001
y0=0, x0=13
val: 0.45068359375, bin_str: 00111001
val: 0.51904296875, bin_str: 01000010
val: -0.63720703125, bin_str: 10101111
val: -0.43017578125, bin_str: 11001001
val: -0.11669921875, bin_str: 11110010
y0=0, x0=112
val: 0.451171875, bin_str: 00111001
val: 0.5185546875, bin_str: 01000010
val: -0.63134765625, bin_str: 10110000
val: -0.43115234375, bin_str: 11001001
val: -0.12060546875, bin_str: 11110001
y0=1, x0=0
val: 0.45556640625, bin_str: 00111010
val: 0.52734375, bin_str: 01000011
val: -0.66015625, bin_str: 10101100
val: -0.45556640625, bin_str: 11000110
val: -0.11865234375, bin_str: 11110001
y0=1, x0=13
val: 0.45654296875, bin_str: 00111010
val: 0.52392578125, bin_str: 01000011
val: -0.66552734375, bin_str: 10101011
val: -0.45361328125, bin_str: 11000110
val: -0.11083984375, bin_st

In [14]:
pred_data = {}
output_data = {}
for y0 in CROP_Y0:
    pred_data[f"y1_{y0}"] = {}
    for x0 in CROP_X0:
        print(f"y0={y0}, x0={x0}")
        pred = model.predict(input_data[f"y1_{y0}"][f"x1_{x0}"])[0]
        pred_data[f"y1_{y0}"][f"x1_{x0}"] = pred
        fpath_txt = os.path.join(data_dir, f"Y1_{y0}/X1_{x0}/QKeras_pred.txt")
        with open(fpath_txt, "w") as f:
            for val in pred:
                bin_str = frac_to_bin(val)
                int_num = bin_to_int(bin_str)
                hex_str = hex(int_num).replace("0x", "").upper()
                if len(hex_str) < 2: hex_str = "0" + hex_str
                f.write(f"{hex_str} ")
                print(f"val: {val}, bin_str: {bin_str}")



y0=0, x0=0
val: 0.451171875, bin_str: 00001110
val: 0.5185546875, bin_str: 00010000
val: -0.63134765625, bin_str: 11101100
val: -0.43115234375, bin_str: 11110011
val: -0.12060546875, bin_str: 11111101
y0=0, x0=13
val: 0.45068359375, bin_str: 00001110
val: 0.51904296875, bin_str: 00010000
val: -0.63720703125, bin_str: 11101100
val: -0.43017578125, bin_str: 11110011
val: -0.11669921875, bin_str: 11111101
y0=0, x0=112
val: 0.451171875, bin_str: 00001110
val: 0.5185546875, bin_str: 00010000
val: -0.63134765625, bin_str: 11101100
val: -0.43115234375, bin_str: 11110011
val: -0.12060546875, bin_str: 11111101
y0=1, x0=0
val: 0.45556640625, bin_str: 00001110
val: 0.52734375, bin_str: 00010000
val: -0.66015625, bin_str: 11101011
val: -0.45556640625, bin_str: 11110010
val: -0.11865234375, bin_str: 11111101
y0=1, x0=13
val: 0.45654296875, bin_str: 00001110
val: 0.52392578125, bin_str: 00010000
val: -0.66552734375, bin_str: 11101011
val: -0.45361328125, bin_str: 11110010
val: -0.11083984375, bin_st

In [78]:
def ap_fixed_8_2_to_float(bin_str_yo):
    out_float = 0
    if bin_str_yo[0]=="1": out_float += 2
        
    if bin_str_yo[1]=="1": out_float += 1        

    for i in range(6):
        if bin_str_yo[i+2]=="1": 
            out_float += 2**(-(i+1))
        
    return out_float

In [2]:
import os
vdir = "/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/myproject_syn_verilog"
for f in os.listdir(vdir):
    print(f"remove_files {os.path.join(vdir, f)}")

remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/myproject_syn_verilog/myproject_dense_resource_ap_fixed_8_0_5_3_0_ap_fixed_8_2_5_3_0_config2_mult_s_w2_ROM_AUeOg.v
remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/myproject_syn_verilog/myproject_relu_array_ap_fixed_16u_array_ap_fixed_8_2_5_3_0_16u_relu_config8_s.v
remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/myproject_syn_verilog/myproject_start_for_relu_array_ap_fixed_16u_array_ap_fixed_8_2_5_3_0_16u_relu_config8_U0.v
remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/myproject_syn_verilog/myproject_dense_resource_ap_fixed_8_2_5_3_0_ap_fixed_8_2_5_3_0_config2_mult_s_outidx_ROdEe.dat
remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/myproject_syn_verilog/myproject_flow_control_loop_pipe_no_ap_cont.v
remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/myproject_syn_verilog/myproject_start_for_relu_array_ap_fixed_8u_array_

In [3]:
vdir_src = "/home/aelabd/RHEED/rtl_models/vivado_2022.2/yuhao_model/myproject_prj/solution1/syn/verilog"
vdir_dest = "/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/myproject_syn_verilog"
print(f"cp -r {vdir_src} {vdir_dest}")

cp -r /home/aelabd/RHEED/rtl_models/vivado_2022.2/yuhao_model/myproject_prj/solution1/syn/verilog /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/myproject_syn_verilog
