In [1]:
import os
import copy
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import itertools

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential

import hls4ml
from hls4ml.converters.keras_to_hls import parse_default_keras_layer
from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode
from hls4ml.model.attributes import Attribute

import h5py

import qkeras as qk
from qkeras.estimate import print_qstats
from qkeras.utils import model_quantize
from qkeras.utils import quantized_model_dump
from qkeras import QActivation, QDense, QConv2DBatchnorm

# Source the Vivado path
os.environ['PATH'] = os.environ['XILINX_VIVADO'] + '/bin:' + os.environ['PATH']

np.random.seed(0)
tf.random.set_seed(0)


2025-07-04 17:50:08.987221: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-07-04 17:50:09.071010: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-04 17:50:09.074315: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-07-04 17:50:09.074337: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar





Helper functions

In [2]:
def ap_fixed_bin_to_float(bin_str: str, X: int, Y: int) -> np.float32:
    """
    Converts a binary string in two's complement ap_fixed<X,Y> format to a float.
    
    Parameters:
        bin_str (str): Binary string of length X.
        X (int): Total number of bits.
        Y (int): Number of integer bits (including sign bit).
        
    Returns:
        np.float32: Floating point value.
    """
    if len(bin_str) != X:
        raise ValueError(f"Binary string length ({len(bin_str)}) must match X ({X})")
    
    # Interpret binary string as signed integer
    int_val = int(bin_str, 2)
    if bin_str[0] == '1':  # Negative number (two's complement)
        int_val -= (1 << X)
    
    # Scale back by number of fractional bits
    frac_bits = X - Y
    value = int_val / (2 ** frac_bits)
    return np.float32(value)

print(ap_fixed_bin_to_float('00110100', 8, 4))  # 3.25
print(ap_fixed_bin_to_float('11100000', 8, 4))  # -4

3.25
-2.0


# Load QKeras output (benchmark)

In [3]:
IN_ROWS = 300
IN_COLS = 320
OUT_ROWS = 48
OUT_COLS = 48
NUM_CROPS = 5
data_dir = f"tb_data_Mono8/{IN_ROWS}x{IN_COLS}_to_{OUT_ROWS}x{OUT_COLS}x{NUM_CROPS}"

TOTAL_BITS = 22
INT_BITS = 11

CROP_Y0_X0 = [(0,0), (0,30), (20,0), (40,40), (252, 272)]
qkeras_out = np.zeros((5, 5))
i = 0

for (y0,x0) in CROP_Y0_X0:
    if y0==0 and x0==13: continue
    fpath = os.path.join(data_dir, f"Y1_{y0}_X1_{x0}/QKeras_mg1_pred_ap_fixed_22_11.txt")
    with open(fpath, "r") as f:
        for j, line in enumerate(f.readlines()):
            line_float = ap_fixed_bin_to_float(line.strip(), TOTAL_BITS, INT_BITS)
            qkeras_out[i, j] = line_float
        i += 1

print(qkeras_out)

[[0.39941406 0.58105469 0.72265625 0.50097656 0.02880859]
 [0.39355469 0.60644531 0.734375   0.49902344 0.02685547]
 [0.31835938 0.61376953 0.84277344 0.58789062 0.16748047]
 [0.52929688 0.44433594 0.79394531 0.57666016 0.07861328]
 [0.42431641 0.38525391 0.82470703 0.62451172 0.3203125 ]]


# Load RTL full pipeline output

In [4]:
full_rtl_out = np.zeros((5,5))
i=0

for (y0,x0) in CROP_Y0_X0:
    if y0==0 and x0==13: continue
    fpath = os.path.join(data_dir, f"Y1_{y0}_X1_{x0}/full_RTL_out.txt")
    with open(fpath, "r") as f:
        for j, line in enumerate(f.readlines()):
            line_float = ap_fixed_bin_to_float(line.strip(), TOTAL_BITS, INT_BITS)
            full_rtl_out[i, j] = line_float
    i += 1

print(full_rtl_out)

[[0.39941406 0.58105469 0.72265625 0.50097656 0.02880859]
 [0.39355469 0.60644531 0.734375   0.49902344 0.02685547]
 [0.31835938 0.61376953 0.84277344 0.58789062 0.16748047]
 [0.52929688 0.44433594 0.79394531 0.57666016 0.07861328]
 [0.42431641 0.38525391 0.82470703 0.62451172 0.3203125 ]]


# Compare and contrast

In [5]:
# QKeras CNN - RTL full pipeline
print(qkeras_out - full_rtl_out)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
