In [1]:
import os
import copy
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import itertools

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential

import hls4ml
from hls4ml.converters.keras_to_hls import parse_default_keras_layer
from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode
from hls4ml.model.attributes import Attribute

import h5py

import qkeras as qk
from qkeras.estimate import print_qstats
from qkeras.utils import model_quantize
from qkeras.utils import quantized_model_dump
from qkeras import QActivation, QDense, QConv2DBatchnorm

# Source the Vivado path
os.environ['PATH'] = os.environ['XILINX_VIVADO'] + '/bin:' + os.environ['PATH']

np.random.seed(0)
tf.random.set_seed(0)


2025-07-01 15:46:09.269355: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-07-01 15:46:09.358251: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-01 15:46:09.361135: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-07-01 15:46:09.361150: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar





Helper functions

In [2]:
def ap_fixed_bin_to_float(bin_str: str, X: int, Y: int) -> np.float32:
    """
    Converts a binary string in two's complement ap_fixed<X,Y> format to a float.
    
    Parameters:
        bin_str (str): Binary string of length X.
        X (int): Total number of bits.
        Y (int): Number of integer bits (including sign bit).
        
    Returns:
        np.float32: Floating point value.
    """
    if len(bin_str) != X:
        raise ValueError(f"Binary string length ({len(bin_str)}) must match X ({X})")
    
    # Interpret binary string as signed integer
    int_val = int(bin_str, 2)
    if bin_str[0] == '1':  # Negative number (two's complement)
        int_val -= (1 << X)
    
    # Scale back by number of fractional bits
    frac_bits = X - Y
    value = int_val / (2 ** frac_bits)
    return np.float32(value)

print(ap_fixed_bin_to_float('00110100', 8, 4))  # 3.25
print(ap_fixed_bin_to_float('11100000', 8, 4))  # -4

3.25
-2.0


# Load QKeras benchmarks

In [None]:
IN_ROWS = 100
IN_COLS = 160
OUT_ROWS = 48
OUT_COLS = 48
NUM_CROPS = 1
data_dir = f"tb_data_Mono8/{IN_ROWS}x{IN_COLS}_to_{OUT_ROWS}x{OUT_COLS}x{NUM_CROPS}"

CROP_X0 = [0, 13, 112]
# CROP_Y0 = [0, 1, 52]
CROP_Y0 = [1, 52]

qkeras_out = np.zeros((6, 5))
i = 0
for y0 in CROP_Y0:
    for x0 in CROP_X0:
        fpath = os.path.join(data_dir, f"Y1_{y0}/X1_{x0}/QKeras_pred_ap_fixed_22_11.txt")
        with open(fpath, "r") as f:
            for j, line in enumerate(f.readlines()):
                line_float = ap_fixed_bin_to_float(line.strip(), 22, 11)
                qkeras_out[i, j] = line_float
            i += 1

print(qkeras_out)

[[ 0.43359375  0.49853516 -0.55810547 -0.38134766 -0.20068359]
 [ 0.42626953  0.49755859 -0.55761719 -0.38427734 -0.19042969]
 [ 0.43359375  0.49853516 -0.55810547 -0.38134766 -0.20068359]
 [ 0.42724609  0.51220703 -0.51269531 -0.35351562 -0.29882812]
 [ 0.43115234  0.50732422 -0.52197266 -0.35253906 -0.25927734]
 [ 0.42724609  0.51220703 -0.51269531 -0.35351562 -0.29882812]]


# Load RTL CNN benchmarks

In [None]:
rtl_cnn_out = np.zeros((6,5))
i=0
for y0 in CROP_Y0:
    for x0 in CROP_X0:
        fpath = os.path.join(data_dir, f"Y1_{y0}/X1_{x0}/CNN_out_benchmark_ap_fixed_22_11.txt")
        with open(fpath, "r") as f:
            for j, line in enumerate(f.readlines()):
                line_float = ap_fixed_bin_to_float(line.strip(), 22, 11)
                rtl_cnn_out[i, j] = line_float
            i += 1

print(rtl_cnn_out)

[[ 0.43359375  0.49853516 -0.55810547 -0.38134766 -0.20068359]
 [ 0.4375      0.50488281 -0.55419922 -0.39160156 -0.20605469]
 [ 0.42919922  0.49902344 -0.55615234 -0.38183594 -0.20507812]
 [ 0.42578125  0.51757812 -0.51171875 -0.35400391 -0.3046875 ]
 [ 0.43261719  0.52734375 -0.50585938 -0.35742188 -0.34619141]
 [ 0.42578125  0.51757812 -0.51171875 -0.35400391 -0.3046875 ]]


In [23]:
full_pipeline_out = np.zeros((6,5))
i=0
for y0 in CROP_Y0:
    for x0 in CROP_X0:
        fpath = os.path.join(data_dir, f"Y1_{y0}/X1_{x0}/full_pipeline_out_ap_fixed_22_11.txt")
        with open(fpath, "r") as f:
            for j,line in enumerate(f.readlines()):
                line_float = ap_fixed_bin_to_float(line.strip(), 22, 11)
                full_pipeline_out[i, j] = line_float
            i += 1

print(full_pipeline_out)

[[ 0.43359375  0.49853516 -0.55810547 -0.38134766 -0.20068359]
 [ 0.42626953  0.49755859 -0.55761719 -0.38427734 -0.19042969]
 [ 0.43359375  0.49853516 -0.55810547 -0.38134766 -0.20068359]
 [ 0.42724609  0.51220703 -0.51269531 -0.35351562 -0.29882812]
 [ 0.43115234  0.50732422 -0.52197266 -0.35253906 -0.25927734]
 [ 0.42724609  0.51220703 -0.51269531 -0.35351562 -0.29882812]]


In [27]:
qkeras_out - full_pipeline_out

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

# Create, imshow 3D map of diff

In [6]:
# QKeras CNN - RTL CNN 
print(f"\nQKeras CNN - RTL CNN pipeline")
cnn_diff = {}
for y0 in CROP_Y0:
    print(f"\ny0={y0}")
    cnn_diff[f"y1_{y0}"] = {}
    for x0 in CROP_X0:
        cnn_diff[f"y1_{y0}"][f"x1_{x0}"] = qkeras_out[f"y1_{y0}"][f"x1_{x0}"] - rtl_cnn_out[f"y1_{y0}"][f"x1_{x0}"]
        print(f"  x0={x0} diff: {cnn_diff[f'y1_{y0}'][f'x1_{x0}']:.6f}")


QKeras CNN - RTL CNN pipeline

y0=1
  x0=0 diff: 0.000000
  x0=13 diff: 0.015625
  x0=112 diff: 0.004395

y0=52
  x0=0 diff: 0.005859
  x0=13 diff: 0.086914
  x0=112 diff: 0.005859


In [9]:
# RTL CNN - RTL full pipeline
rtl_diff = {}
for y0 in CROP_Y0:
    if y0==0:
        continue
    print(f"\ny0={y0}")
    rtl_diff[f"y1_{y0}"] = {}
    for x0 in CROP_X0:
        rtl_diff[f"y1_{y0}"][f"x1_{x0}"] = rtl_cnn_out[f"y1_{y0}"][f"x1_{x0}"] - full_pipeline_out[f"y1_{y0}"][f"x1_{x0}"]
        print(f"  x0={x0} diff: {rtl_diff[f'y1_{y0}'][f'x1_{x0}']:.6f}")
    
                


y0=1
  x0=0 diff: 0.000000
  x0=13 diff: -0.015625
  x0=112 diff: -0.004395

y0=52
  x0=0 diff: -0.005859
  x0=13 diff: -0.086914
  x0=112 diff: -0.005859


In [20]:
rtl_cnn_out_arr = np.zeros((9, 5))
i=0
for k,v in rtl_cnn_out.items():
    print(f"{k}")
    for k2, v2 in v.items():
        rtl_cnn_out_arr[i] = v[k2]
        i += 1

y1_1
y1_52


In [21]:
rtl_cnn_out_arr

array([[-0.20068359, -0.20068359, -0.20068359, -0.20068359, -0.20068359],
       [-0.20605469, -0.20605469, -0.20605469, -0.20605469, -0.20605469],
       [-0.20507812, -0.20507812, -0.20507812, -0.20507812, -0.20507812],
       [-0.3046875 , -0.3046875 , -0.3046875 , -0.3046875 , -0.3046875 ],
       [-0.34619141, -0.34619141, -0.34619141, -0.34619141, -0.34619141],
       [-0.3046875 , -0.3046875 , -0.3046875 , -0.3046875 , -0.3046875 ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]])

In [23]:
rtl_cnn_out["y1_1"]["x1_112"]

-0.20507812