In [1]:
import os
import copy

from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential

import hls4ml
from hls4ml.converters.keras_to_hls import parse_default_keras_layer
from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode
from hls4ml.model.attributes import Attribute

import h5py

import qkeras
from qkeras.estimate import print_qstats
from qkeras.utils import model_quantize
from qkeras.utils import quantized_model_dump
from qkeras import QActivation, QDense, QConv2DBatchnorm

# Source the Vivado path
os.environ['PATH'] = os.environ['XILINX_VIVADO'] + '/bin:' + os.environ['PATH']

np.random.seed(0)
tf.random.set_seed(0)

# BACKEND = "Vivado"
BACKEND = "Vitis"


2025-06-05 19:27:04.783059: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-06-05 19:27:05.017164: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-05 19:27:05.025509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-06-05 19:27:05.025547: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar





Globals

# 1. Load Keras model

In [2]:
KERAS_DIR = "keras_models"
RTL_DIR = "rtl_models"

def dice_loss(y_true, y_pred, delta=0.6):
    error = y_true - y_pred
    is_small = tf.abs(error) <= delta
    squared_loss = 0.5 * tf.square(error)
    linear_loss = delta * (tf.abs(error) - 0.5 * delta)
    return tf.reduce_mean(tf.where(is_small, squared_loss, linear_loss))

fpath_model_keras = os.path.join(KERAS_DIR, "model.keras")
with tf.keras.utils.custom_object_scope({'dice_loss': dice_loss,
                                         'QConv2DBatchnorm': QConv2DBatchnorm,
                                         'QActivation': QActivation,
                                         'QDense': QDense
                                         }):
        model = tf.keras.models.load_model(fpath_model_keras)

model.summary()

2025-06-05 19:27:08.297010: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-06-05 19:27:08.297390: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-06-05 19:27:08.297532: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2025-06-05 19:27:08.297661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2025-06-05 19:27:08.297746: W tensorf

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 q_conv2d_batchnorm_5 (QConv  (None, 46, 46, 8)        113       
 2DBatchnorm)                                                    
                                                                 
 q_activation_8 (QActivation  (None, 46, 46, 8)        0         
 )                                                               
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 11, 11, 8)        0         
 2D)                                                             
                                                                 
 q_conv2d_batchnorm_6 (QConv  (None, 9, 9, 16)         1233      
 2DBatchnorm)                                                    
                                                                 
 q_activation_9 (QActivation  (None, 9, 9, 16)        

In [3]:
import qkeras as qk

def print_quantization_info(model):
    for layer in model.layers:
        print(f"Layer Name: {layer.name}")
        print(f"Type: {layer.__class__.__name__}")
        
        # Helper function to handle both quantizer objects and config dicts
        def process_quantizer(quantizer, prefix=""):
            if quantizer:
                if isinstance(quantizer, dict):
                    # Handle dictionary config
                    class_name = quantizer.get("class_name", "UnknownQuantizer")
                    config = quantizer.get("config", {})
                else:
                    # Handle object with potential get_config()
                    class_name = quantizer.__class__.__name__
                    config = quantizer.get_config() if hasattr(quantizer, "get_config") else {}
                
                print(f"  {prefix}Quantizer: {class_name}")
                print(f"  {prefix}Config: {config}")
            else:
                print(f"  No {prefix}Quantizer")

        # Check for QKeras layers with kernel/bias quantizers
        if isinstance(layer, (qk.QDense, qk.QConv2D, qk.QConv1D, 
                            qk.QConv2DTranspose, qk.QDepthwiseConv2D)):
            # Kernel quantizer
            process_quantizer(layer.kernel_quantizer, "Kernel ")
            
            # Bias quantizer
            process_quantizer(layer.bias_quantizer, "Bias ")
            
            # Activation quantizer
            activation = layer.activation
            if activation:
                if isinstance(activation, dict) or hasattr(activation, "get_config"):
                    process_quantizer(activation, "Activation ")
                else:
                    print(f"  Activation: {activation} (Not Quantized)")
            else:
                print("  No Activation")

        # Check for QActivation layers
        elif isinstance(layer, qk.QActivation):
            process_quantizer(layer.quantizer, "Activation ")
        
        print("-" * 50)

print_quantization_info(model)

Layer Name: q_conv2d_batchnorm_5
Type: QConv2DBatchnorm
  Kernel Quantizer: quantized_bits
  Kernel Config: {'bits': 8, 'integer': 0, 'symmetric': 0, 'alpha': 1, 'keep_negative': True, 'use_stochastic_rounding': False, 'qnoise_factor': 1.0}
  Bias Quantizer: quantized_bits
  Bias Config: {'bits': 8, 'integer': 0, 'symmetric': 0, 'alpha': 1, 'keep_negative': True, 'use_stochastic_rounding': False, 'qnoise_factor': 1.0}
  Activation: <function linear at 0x7fd4c19cc430> (Not Quantized)
--------------------------------------------------
Layer Name: q_activation_8
Type: QActivation
  Activation Quantizer: quantized_relu
  Activation Config: {'bits': 8, 'integer': 2, 'use_sigmoid': 0, 'negative_slope': 0.0, 'use_stochastic_rounding': False, 'relu_upper_bound': None, 'qnoise_factor': 1.0}
--------------------------------------------------
Layer Name: max_pooling2d_5
Type: MaxPooling2D
--------------------------------------------------
Layer Name: q_conv2d_batchnorm_6
Type: QConv2DBatchnorm
  

# 2. Create hls4ml model

For now, skip evaluation and creating benchmark results. JUST convert it. 

In [20]:
# Generate the configuration from the Keras model
FP_TOTAL = 8
FP_INT = 2
REUSE_FACTOR = 32

config = hls4ml.utils.config_from_keras_model(model,  backend=BACKEND)

config['Model']['Precision']['default'] = f"ap_fixed<{FP_TOTAL},{FP_INT}>"
config['Model']['ReuseFactor'] = REUSE_FACTOR
config['Model']['Strategy'] = 'Resource'

config["LayerName"] = {"q_conv2d_batchnorm_5_input": {"Precision": {"result": "ap_fixed<8,0>"}}}
# config["LayerName"][]["Precision"]["result"] = "ap_fixed<8,0>"
# config["LayerName"]["q_conv2d_batchnorm_5_input"]["Precision"]["result"] = "ap_fixed<8,0>"
# config["LayerName"]["q_dense_6"]["Precision"]["result"] = "ap_fixed<8,2>"

# config["LayerName"]["q_conv2d_batchnorm_5"]["Precision"] = "ap_fixed<8,0>"
# config["LayerName"]["q_conv2d_batchnorm_6"]["ReuseFactor"] = 10
# config["LayerName"]["q_conv2d_batchnorm_5"]["Strategy"] = "Resource"


# config["LayerName"]["q_conv2d_batchnorm_6"]["ReuseFactor"] = 120
# config["LayerName"]["q_conv2d_batchnorm_6"]["Strategy"] = "Resource"

# config["LayerName"]["q_conv2d_batchnorm_7"]["ReuseFactor"] = 480
# config["LayerName"]["q_conv2d_batchnorm_7"]["Strategy"] = "Resource"

# config["LayerName"]["q_dense_5"]["ReuseFactor"] = 100
# config["LayerName"]["q_dense_5"]["Strategy"] = "Resource"


# config["LayerName"]["q_dense_6"]["Precision"] = "ap_fixed<22,11>"
# config["LayerName"]["q_dense_6"]["ReuseFactor"] = 16
# config["LayerName"]["q_dense_6"]["Strategy"] = "Resource"


# Attempt conversion on simplified model
if BACKEND=="Vivado":
    output_dir = f"rtl_models/vivado_2019.1/yuhao_model_manual_precision"
elif BACKEND=="Vitis":
    output_dir = f"rtl_models/vivado_2022.2/yuhao_model_manual_precision"
else: raise NotImplementedError
hls_model = hls4ml.converters.convert_from_keras_model(
    model, 
    hls_config=config, 
    output_dir=output_dir, 
    backend=BACKEND,
    part='xcku035-fbva676-2-e', 
    io_type="io_stream"
)

hls_model.compile()

print("")
print(hls_model.config.config['OutputDir'])

Interpreting Sequential
Topology:
Layer name: q_conv2d_batchnorm_5_input, layer type: InputLayer, input shapes: [[None, 48, 48, 1]], output shape: [None, 48, 48, 1]
Layer name: q_conv2d_batchnorm_5, layer type: QConv2DBatchnorm, input shapes: [[None, 48, 48, 1]], output shape: [None, 46, 46, 8]
Layer name: q_activation_8, layer type: Activation, input shapes: [[None, 46, 46, 8]], output shape: [None, 46, 46, 8]
Layer name: max_pooling2d_5, layer type: MaxPooling2D, input shapes: [[None, 46, 46, 8]], output shape: [None, 11, 11, 8]
Layer name: q_conv2d_batchnorm_6, layer type: QConv2DBatchnorm, input shapes: [[None, 11, 11, 8]], output shape: [None, 9, 9, 16]
Layer name: q_activation_9, layer type: Activation, input shapes: [[None, 9, 9, 16]], output shape: [None, 9, 9, 16]
Layer name: max_pooling2d_6, layer type: MaxPooling2D, input shapes: [[None, 9, 9, 16]], output shape: [None, 4, 4, 16]
Layer name: q_conv2d_batchnorm_7, layer type: QConv2DBatchnorm, input shapes: [[None, 4, 4, 16]]

In [21]:
hls_model.config.config

{'OutputDir': 'rtl_models/vivado_2022.2/yuhao_model_manual_precision',
 'ProjectName': 'myproject',
 'Backend': 'Vitis',
 'Version': '1.0.0',
 'Part': 'xcku035-fbva676-2-e',
 'ClockPeriod': 5,
 'ClockUncertainty': '27%',
 'IOType': 'io_stream',
 'HLSConfig': {'Model': {'Precision': {'default': 'ap_fixed<8,2>'},
   'ReuseFactor': 32,
   'Strategy': 'Resource',
   'BramFactor': 1000000000,
   'TraceOutput': False},
  'LayerName': {'q_conv2d_batchnorm_5_input': {'Precision': {'result': 'ap_fixed<8,0>'}}}},
 'WriterConfig': {'Namespace': None,
  'WriteWeightsTxt': True,
  'WriteTar': False},
 'KerasModel': <keras.engine.sequential.Sequential at 0x7fd4bb3bfc70>,
 'InputData': None,
 'OutputPredictions': None,
 'Stamp': 'B4aa5683'}

In [22]:
hls_model.predict(np.full((1, 48, 48), 7).astype(float))

array([ 0.390625,  0.84375 , -1.078125, -1.171875, -0.734375])

# Generate RTL model

In [23]:
hls_model.build(csim=False, synth=True, vsynth=True)


****** Vitis HLS - High-Level Synthesis from C, C++ and OpenCL v2019.1 (64-bit)
  **** SW Build 3670227 on Oct 13 2022
  **** IP Build 2548770 on Fri May 24 18:01:18 MDT 2019
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.

source /tools/Xilinx/Vitis_HLS/2022.2/scripts/vitis_hls/hls.tcl -notrace
INFO: [HLS 200-10] Running '/tools/Xilinx/Vitis_HLS/2022.2/bin/unwrapped/lnx64.o/vitis_hls'
INFO: [HLS 200-10] For user 'aelabd' on host 'DESKTOP-Q0UCNGC.' (Linux_x86_64 version 5.15.133.1-microsoft-standard-WSL2) on Fri Jun 06 10:16:17 CEST 2025
INFO: [HLS 200-10] On os Ubuntu 24.04 LTS
INFO: [HLS 200-10] In directory '/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/rtl_models/vivado_2022.2/yuhao_model_manual_precision'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-1510] Running: open_project myproject_prj 
INFO: [HLS 200-10] Opening project '/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/rtl_models/vivado_2022.2/yuhao_model_manual_precision/myproject_prj'.
INFO: [HLS 200-1510] Running

{'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '3.650',
  'BestLatency': '99054',
  'WorstLatency': '99090',
  'IntervalMin': '9218',
  'IntervalMax': '99074',
  'BRAM_18K': '102',
  'DSP': '0',
  'FF': '16906',
  'LUT': '35923',
  'URAM': '0',
  'AvailableBRAM_18K': '1080',
  'AvailableDSP': '1700',
  'AvailableFF': '406256',
  'AvailableLUT': '203128',
  'AvailableURAM': '0'},
 'VivadoSynthReport': {'LUT': '21418',
  'FF': '16107',
  'BRAM_18K': '26.5',
  'DSP48E': '0'}}

In [24]:
hls4ml.report.read_vivado_report(hls_model.config.config['OutputDir'])

Found 1 solution(s) in rtl_models/vivado_2022.2/yuhao_model_manual_precision/myproject_prj.
Reports for solution "solution1":

C simulation report not found.
SYNTHESIS REPORT:
== Vitis HLS Report for 'myproject'
* Date:           Fri Jun  6 10:18:43 2025

* Version:        2019.1 (Build 3670227 on Oct 13 2022)
* Project:        myproject_prj
* Solution:       solution1 (Vivado IP Flow Target)
* Product family: kintexu
* Target device:  xcku035-fbva676-2-e


== Performance Estimates
+ Timing: 
    * Summary: 
    +--------+---------+----------+------------+
    |  Clock |  Target | Estimated| Uncertainty|
    +--------+---------+----------+------------+
    |ap_clk  |  5.00 ns|  3.650 ns|     1.35 ns|
    +--------+---------+----------+------------+

+ Latency: 
    * Summary: 
    +---------+---------+----------+----------+------+-------+----------+
    |  Latency (cycles) |  Latency (absolute) |   Interval   | Pipeline |
    |   min   |   max   |    min   |    max   |  min |  max  |  

In [25]:
hls_model.config.config['OutputDir']

'rtl_models/vivado_2022.2/yuhao_model_manual_precision'

# IMPORTANT NOTE:

(I suspect this is only on Linux)

You must go into the verilog directory and find where the ".dat" ROM imports are, e.g. "CoaxlinkQuadCxp12_1cam/rtl_models/vivado_2019.1/yuhao_model/myproject_prj/solution1/syn/verilog/dense_wrapper_ap_fixed_8_0_5_3_0_ap_fixed_22_9_5_3_0_config15_s_w15_V.v" has an import "./dense_wrapper_ap_fixed_8_0_5_3_0_ap_fixed_22_9_5_3_0_config15_s_w15_V_rom.dat". You must go and manually change all of these relative paths into ABSOLUTE paths. Otherwise, vivado doesn't know where to find them. 

### Command line commands to test in ModelSim (on the Windows side of this PC)

In [10]:
verilog_dir = os.path.join(hls_model.config.config['OutputDir'], "myproject_prj", "solution1", "syn", "verilog")

if BACKEND=="Vivado":
    vlog_suffix = 'vlog "./rtl_models/vivado_2019.1/yuhao_model'
elif BACKEND=="Vitis":
    vlog_suffix = 'vlog "./rtl_models/vivado_2022.2/yuhao_model'

for f in os.listdir(verilog_dir):
    if f.endswith(".dat"): continue
    print(f'{vlog_suffix}/{f}"')

vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_shift_line_buffer_array_ap_fixed_8_2_5_3_0_1u_config2_s.v"
vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_mul_8s_7s_15_1_1.v"
vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_flow_control_loop_pipe_no_ap_cont.v"
vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_conv_2d_cl_array_array_ap_fixed_25_14_5_3_0_32u_config10_s.v"
vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_mux_727_8_1_1.v"
vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_shift_line_buffer_array_ap_ufixed_8_2_4_0_0_16u_config10_s.v"
vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_compute_output_buffer_2d_array_array_ap_fixed_8_0_5_3_0_8u_config2_s.v"
vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_mul_8s_5ns_13_1_0.v"
vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_pooling2d_cl_array_array_ap_ufixed_8_2_4_0_0_32u_config13_s.v"
vlog "./rtl_models/vivado_2022.2/yuhao_model/myproject_mul_8s_6ns_13_1_0.v"
vlog "./rtl_mode

In [11]:
copy_command = "cp -r"

copy_command += f" {verilog_dir}"

if BACKEND=="Vivado":
    dest_dir = "/mnt/c/Users/abdel/OneDrive/Documents/RHEED/crop_verilog/testbench_crop_plus_gaussian/rtl_models/vivado_2019.1/yuhao_model"
elif BACKEND=="Vitis":
    dest_dir = "/mnt/c/Users/abdel/OneDrive/Documents/RHEED/crop_verilog/testbench_crop_plus_gaussian/rtl_models/vivado_2022.2/yuhao_model"
else: raise NotImplementedError

copy_command += f" {dest_dir}"
print(copy_command)

cp -r rtl_models/vivado_2022.2/yuhao_model_manual_precision/myproject_prj/solution1/syn/verilog /mnt/c/Users/abdel/OneDrive/Documents/RHEED/crop_verilog/testbench_crop_plus_gaussian/rtl_models/vivado_2022.2/yuhao_model
