In [1]:
import os
import copy
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential

import hls4ml
from hls4ml.converters.keras_to_hls import parse_default_keras_layer
from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode
from hls4ml.model.attributes import Attribute

import h5py

import qkeras
from qkeras.estimate import print_qstats
from qkeras.utils import model_quantize
from qkeras.utils import quantized_model_dump
from qkeras import QActivation, QDense, QConv2DBatchnorm

# Source the Vivado path
os.environ['PATH'] = os.environ['XILINX_VIVADO'] + '/bin:' + os.environ['PATH']

np.random.seed(0)
tf.random.set_seed(0)


2025-05-29 17:18:41.203704: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-29 17:18:41.335273: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-29 17:18:41.341192: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-05-29 17:18:41.341215: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar





Globals

# 1. Load Keras model

In [2]:
KERAS_DIR = "keras_models"
RTL_DIR = "rtl_models"

def dice_loss(y_true, y_pred, delta=0.6):
    error = y_true - y_pred
    is_small = tf.abs(error) <= delta
    squared_loss = 0.5 * tf.square(error)
    linear_loss = delta * (tf.abs(error) - 0.5 * delta)
    return tf.reduce_mean(tf.where(is_small, squared_loss, linear_loss))

fpath_model_keras = os.path.join(KERAS_DIR, "model.keras")
with tf.keras.utils.custom_object_scope({'dice_loss': dice_loss,
                                         'QConv2DBatchnorm': QConv2DBatchnorm,
                                         'QActivation': QActivation,
                                         'QDense': QDense
                                         }):
        model = tf.keras.models.load_model(fpath_model_keras)

model.summary()

2025-05-29 17:18:45.738888: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-29 17:18:45.739118: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-05-29 17:18:45.739217: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2025-05-29 17:18:45.739284: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2025-05-29 17:18:45.739349: W tensorf

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 q_conv2d_batchnorm_5 (QConv  (None, 46, 46, 8)        113       
 2DBatchnorm)                                                    
                                                                 
 q_activation_8 (QActivation  (None, 46, 46, 8)        0         
 )                                                               
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 11, 11, 8)        0         
 2D)                                                             
                                                                 
 q_conv2d_batchnorm_6 (QConv  (None, 9, 9, 16)         1233      
 2DBatchnorm)                                                    
                                                                 
 q_activation_9 (QActivation  (None, 9, 9, 16)        

In [3]:
import qkeras as qk

def print_quantization_info(model):
    for layer in model.layers:
        print(f"Layer Name: {layer.name}")
        print(f"Type: {layer.__class__.__name__}")
        
        # Helper function to handle both quantizer objects and config dicts
        def process_quantizer(quantizer, prefix=""):
            if quantizer:
                if isinstance(quantizer, dict):
                    # Handle dictionary config
                    class_name = quantizer.get("class_name", "UnknownQuantizer")
                    config = quantizer.get("config", {})
                else:
                    # Handle object with potential get_config()
                    class_name = quantizer.__class__.__name__
                    config = quantizer.get_config() if hasattr(quantizer, "get_config") else {}
                
                print(f"  {prefix}Quantizer: {class_name}")
                print(f"  {prefix}Config: {config}")
            else:
                print(f"  No {prefix}Quantizer")

        # Check for QKeras layers with kernel/bias quantizers
        if isinstance(layer, (qk.QDense, qk.QConv2D, qk.QConv1D, 
                            qk.QConv2DTranspose, qk.QDepthwiseConv2D)):
            # Kernel quantizer
            process_quantizer(layer.kernel_quantizer, "Kernel ")
            
            # Bias quantizer
            process_quantizer(layer.bias_quantizer, "Bias ")
            
            # Activation quantizer
            activation = layer.activation
            if activation:
                if isinstance(activation, dict) or hasattr(activation, "get_config"):
                    process_quantizer(activation, "Activation ")
                else:
                    print(f"  Activation: {activation} (Not Quantized)")
            else:
                print("  No Activation")

        # Check for QActivation layers
        elif isinstance(layer, qk.QActivation):
            process_quantizer(layer.quantizer, "Activation ")
        
        print("-" * 50)

print_quantization_info(model)

Layer Name: q_conv2d_batchnorm_5
Type: QConv2DBatchnorm
  Kernel Quantizer: quantized_bits
  Kernel Config: {'bits': 8, 'integer': 0, 'symmetric': 0, 'alpha': 1, 'keep_negative': True, 'use_stochastic_rounding': False, 'qnoise_factor': 1.0}
  Bias Quantizer: quantized_bits
  Bias Config: {'bits': 8, 'integer': 0, 'symmetric': 0, 'alpha': 1, 'keep_negative': True, 'use_stochastic_rounding': False, 'qnoise_factor': 1.0}
  Activation: <function linear at 0x7ff5b1710a60> (Not Quantized)
--------------------------------------------------
Layer Name: q_activation_8
Type: QActivation
  Activation Quantizer: quantized_relu
  Activation Config: {'bits': 8, 'integer': 2, 'use_sigmoid': 0, 'negative_slope': 0.0, 'use_stochastic_rounding': False, 'relu_upper_bound': None, 'qnoise_factor': 1.0}
--------------------------------------------------
Layer Name: max_pooling2d_5
Type: MaxPooling2D
--------------------------------------------------
Layer Name: q_conv2d_batchnorm_6
Type: QConv2DBatchnorm
  

# 2. Create hls4ml model

For now, skip evaluation and creating benchmark results. JUST convert it. 

In [4]:
# Generate the configuration from the Keras model
FP_TOTAL = 8
FP_INT = 0
REUSE_FACTOR = 32

config = hls4ml.utils.config_from_keras_model(model, granularity='model', backend="Vivado")
config['Model']['Precision']['default'] = f"ap_fixed<{FP_TOTAL},{FP_INT}>"
config['Model']['ReuseFactor'] = REUSE_FACTOR
config['Model']['Strategy'] = 'Resource'

# Attempt conversion on simplified model
output_dir = f"rtl_models/model2"
hls_model = hls4ml.converters.convert_from_keras_model(
    model, 
    hls_config=config, 
    output_dir=output_dir, 
    # backend="Vitis",
    backend="Vivado",
    part='xcku035-fbva676-2-e', 
    io_type="io_stream"
)

# hls_model.compile()

print("")
print(hls_model.config.config['OutputDir'])

Interpreting Sequential
Topology:
Layer name: q_conv2d_batchnorm_5_input, layer type: InputLayer, input shapes: [[None, 48, 48, 1]], output shape: [None, 48, 48, 1]
Layer name: q_conv2d_batchnorm_5, layer type: QConv2DBatchnorm, input shapes: [[None, 48, 48, 1]], output shape: [None, 46, 46, 8]
Layer name: q_activation_8, layer type: Activation, input shapes: [[None, 46, 46, 8]], output shape: [None, 46, 46, 8]
Layer name: max_pooling2d_5, layer type: MaxPooling2D, input shapes: [[None, 46, 46, 8]], output shape: [None, 11, 11, 8]
Layer name: q_conv2d_batchnorm_6, layer type: QConv2DBatchnorm, input shapes: [[None, 11, 11, 8]], output shape: [None, 9, 9, 16]
Layer name: q_activation_9, layer type: Activation, input shapes: [[None, 9, 9, 16]], output shape: [None, 9, 9, 16]
Layer name: max_pooling2d_6, layer type: MaxPooling2D, input shapes: [[None, 9, 9, 16]], output shape: [None, 4, 4, 16]
Layer name: q_conv2d_batchnorm_7, layer type: QConv2DBatchnorm, input shapes: [[None, 4, 4, 16]]

In [5]:
hls_model.predict(np.full((1, 48, 48), 7).astype(float))

array([ 0.2890625 ,  0.4453125 ,  0.41796875,  0.43359375, -0.02734375])

In [6]:
hls_model.config.config

{'OutputDir': 'rtl_models/model',
 'ProjectName': 'myproject',
 'Backend': 'Vivado',
 'Version': '1.0.0',
 'Part': 'xcku035-fbva676-2-e',
 'ClockPeriod': 5,
 'ClockUncertainty': '12.5%',
 'IOType': 'io_stream',
 'HLSConfig': {'Model': {'Precision': {'default': 'ap_fixed<8,0>'},
   'ReuseFactor': 32,
   'Strategy': 'Resource',
   'BramFactor': 1000000000,
   'TraceOutput': False}},
 'WriterConfig': {'Namespace': None,
  'WriteWeightsTxt': True,
  'WriteTar': False},
 'KerasModel': <keras.engine.sequential.Sequential at 0x7f2604f77c40>,
 'InputData': None,
 'OutputPredictions': None,
 'Stamp': '184e6475'}

# Generate RTL model

In [7]:
hls_model.build(csim=False, synth=True, vsynth=True)


****** Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC v2019.1 (64-bit)
  **** SW Build 2552052 on Fri May 24 14:47:09 MDT 2019
  **** IP Build 2548770 on Fri May 24 18:01:18 MDT 2019
    ** Copyright 1986-2019 Xilinx, Inc. All Rights Reserved.

source /tools/Xilinx/Vivado/2019.1/scripts/vivado_hls/hls.tcl -notrace
INFO: [HLS 200-10] Running '/tools/Xilinx/Vivado/2019.1/bin/unwrapped/lnx64.o/vivado_hls'
INFO: [HLS 200-10] For user 'aelabd' on host 'DESKTOP-Q0UCNGC.' (Linux_x86_64 version 5.15.133.1-microsoft-standard-WSL2) on Thu May 29 16:46:53 CEST 2025
INFO: [HLS 200-10] On os Ubuntu 24.04 LTS
INFO: [HLS 200-10] In directory '/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/rtl_models/model'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-10] Creating and opening project '/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/rtl_models/model/myproject_prj'.
INFO: [HLS 200-10] Adding design file 'firmware/myproject.cpp' to the project
INFO: [HLS 200-10] Adding test bench file 'myproje

{'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '4.375',
  'BestLatency': '96751',
  'WorstLatency': '96786',
  'IntervalMin': '6914',
  'IntervalMax': '96770',
  'BRAM_18K': '119',
  'DSP': '0',
  'FF': '24060',
  'LUT': '87219',
  'URAM': '0',
  'AvailableBRAM_18K': '1080',
  'AvailableDSP': '1700',
  'AvailableFF': '406256',
  'AvailableLUT': '203128',
  'AvailableURAM': '0'},
 'VivadoSynthReport': {'LUT': '28951',
  'FF': '22059',
  'BRAM_18K': '30.5',
  'DSP48E': '0'}}

In [9]:
hls4ml.report.read_vivado_report(hls_model.config.config['OutputDir'])

Found 1 solution(s) in rtl_models/model/myproject_prj.
Reports for solution "solution1":

C simulation report not found.
SYNTHESIS REPORT:
== Vivado HLS Report for 'myproject'
* Date:           Thu May 29 16:48:52 2025

* Version:        2019.1 (Build 2552052 on Fri May 24 15:28:33 MDT 2019)
* Project:        myproject_prj
* Solution:       solution1
* Product family: kintexu
* Target device:  xcku035-fbva676-2-e


== Performance Estimates
+ Timing (ns): 
    * Summary: 
    +--------+-------+----------+------------+
    |  Clock | Target| Estimated| Uncertainty|
    +--------+-------+----------+------------+
    |ap_clk  |   5.00|     4.375|        0.62|
    +--------+-------+----------+------------+

+ Latency (clock cycles): 
    * Summary: 
    +-------+-------+------+-------+----------+
    |    Latency    |   Interval   | Pipeline |
    |  min  |  max  |  min |  max  |   Type   |
    +-------+-------+------+-------+----------+
    |  96751|  96786|  6914|  96770| dataflow |
    +

In [10]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 q_conv2d_batchnorm_5 (QConv  (None, 46, 46, 8)        113       
 2DBatchnorm)                                                    
                                                                 
 q_activation_8 (QActivation  (None, 46, 46, 8)        0         
 )                                                               
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 11, 11, 8)        0         
 2D)                                                             
                                                                 
 q_conv2d_batchnorm_6 (QConv  (None, 9, 9, 16)         1233      
 2DBatchnorm)                                                    
                                                                 
 q_activation_9 (QActivation  (None, 9, 9, 16)        

In [5]:
hls_model.config.config['OutputDir']

'rtl_models/model2'

In [9]:
for f in os.listdir(os.path.join(hls_model.config.config['OutputDir'].replace("2",""), "myproject_prj", "solution1", "syn", "verilog")):
    if not f.endswith(".dat"):
        print(f'vlog "./verilog/{f}"')


vlog "./verilog/shift_line_buffer_array_ap_fixed_8_0_5_3_0_1u_config2_s.v"
vlog "./verilog/start_for_relu_array_ap_fixed_16u_array_ap_fixed_8_0_5_3_0_16u_relu_config8_U0.v"
vlog "./verilog/fifo_w8_d81_A.v"
vlog "./verilog/start_for_relu_array_ap_fixed_32u_array_ap_fixed_8_0_5_3_0_32u_relu_config17_U0.v"
vlog "./verilog/dense_array_ap_fixed_32u_array_ap_fixed_8_0_5_3_0_5u_config18_s.v"
vlog "./verilog/relu_array_ap_fixed_32u_array_ap_fixed_8_0_5_3_0_32u_relu_config12_s.v"
vlog "./verilog/dense_resource_ap_fixed_8_0_5_3_0_ap_fixed_8_0_5_3_0_config10_mult_s_w10_V.v"
vlog "./verilog/dense_resource_ap_fixed_8_0_5_3_0_ap_fixed_8_0_5_3_0_config10_mult_s.v"
vlog "./verilog/pooling2d_cl_array_ap_fixed_16u_array_ap_fixed_8_0_5_3_0_16u_config9_s_line_bRg6.v"
vlog "./verilog/start_for_relu_array_ap_fixed_8u_array_ap_fixed_8_0_5_3_0_8u_relu_config4_U0.v"
vlog "./verilog/conv_2d_cl_array_ap_fixed_16u_array_ap_fixed_8_0_5_3_0_32u_config10_s.v"
vlog "./verilog/shift_line_buffer_array_ap_fixed_8_0_5_3_

In [10]:
for f in os.listdir(os.path.join(hls_model.config.config['OutputDir'].replace("2",""), "myproject_prj", "solution1", "syn", "verilog")):
    if f.endswith(".dat"):
        print(f)


dense_resource_ap_fixed_8_0_5_3_0_ap_fixed_8_0_5_3_0_config2_mult_s_w2_V_rom.dat
dense_wrapper_ap_fixed_8_0_5_3_0_ap_fixed_8_0_5_3_0_config15_s_w15_V_rom.dat
dense_resource_ap_fixed_8_0_5_3_0_ap_fixed_8_0_5_3_0_config2_mult_s_outidx_rom.dat
dense_wrapper_ap_fixed_8_0_5_3_0_ap_fixed_8_0_5_3_0_config18_s_w18_V_rom.dat
dense_resource_ap_fixed_8_0_5_3_0_ap_fixed_8_0_5_3_0_config10_mult_s_w10_V_rom.dat
dense_resource_ap_fixed_8_0_5_3_0_ap_fixed_8_0_5_3_0_config6_mult_s_w6_V_rom.dat
