In [1]:
import os
import copy
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential

import hls4ml
from hls4ml.converters.keras_to_hls import parse_default_keras_layer
from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode
from hls4ml.model.attributes import Attribute

import h5py

import qkeras
from qkeras.estimate import print_qstats
from qkeras.utils import model_quantize
from qkeras.utils import quantized_model_dump
from qkeras import QActivation, QDense, QConv2DBatchnorm

# Source the Vivado path
os.environ['PATH'] = os.environ['XILINX_VIVADO'] + '/bin:' + os.environ['PATH']

np.random.seed(0)
tf.random.set_seed(5)

# BACKEND = "Vivado"
BACKEND = "Vitis"


2025-06-04 14:30:32.135738: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-06-04 14:30:32.262292: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-04 14:30:32.268214: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-06-04 14:30:32.268247: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar





Globals

# 1. Load Keras model

In [2]:
import tensorflow as tf
from qkeras import QConv2DBatchnorm, QActivation, QDense

def custom_weighted_mse_loss(I, J, n):
    W = tf.pow(I, n)
    squared_diffs = tf.pow(I - J, 2)
    weighted_squared_diffs = W * squared_diffs

    return tf.reduce_mean(weighted_squared_diffs)

def build_model(input_shape, total_bits, integer_bits):
    inputs = tf.keras.Input(shape=input_shape)
    x = tf.keras.layers.Flatten()(inputs)
    x = QDense(
        5,
        f"quantized_bits({total_bits}, {integer_bits}, alpha=1)"
    )(x)
    outputs = QActivation(f"quantized_relu({total_bits}, {integer_bits})")(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [3]:
FP_TOTAL = 8
FP_INT = 2
INPUT_SHAPE = (5, 5, 1)

# Build the model
model = build_model(INPUT_SHAPE, FP_TOTAL, FP_INT)

# Compile the model
model.compile(optimizer='adam', loss=custom_weighted_mse_loss, run_eagerly=True)

# Display the model summary
model.summary()

2025-06-04 14:30:37.137859: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-06-04 14:30:37.138197: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-06-04 14:30:37.138329: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2025-06-04 14:30:37.138426: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2025-06-04 14:30:37.138513: W tensorf

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 5, 5, 1)]         0         
                                                                 
 flatten (Flatten)           (None, 25)                0         
                                                                 
 q_dense (QDense)            (None, 5)                 130       
                                                                 
 q_activation (QActivation)  (None, 5)                 0         
                                                                 
Total params: 130
Trainable params: 130
Non-trainable params: 0
____________________________________________________________

In [4]:
import qkeras as qk

def print_quantization_info(model):
    for layer in model.layers:
        print(f"Layer Name: {layer.name}")
        print(f"Type: {layer.__class__.__name__}")
        
        # Helper function to handle both quantizer objects and config dicts
        def process_quantizer(quantizer, prefix=""):
            if quantizer:
                if isinstance(quantizer, dict):
                    # Handle dictionary config
                    class_name = quantizer.get("class_name", "UnknownQuantizer")
                    config = quantizer.get("config", {})
                else:
                    # Handle object with potential get_config()
                    class_name = quantizer.__class__.__name__
                    config = quantizer.get_config() if hasattr(quantizer, "get_config") else {}
                
                print(f"  {prefix}Quantizer: {class_name}")
                print(f"  {prefix}Config: {config}")
            else:
                print(f"  No {prefix}Quantizer")

        # Check for QKeras layers with kernel/bias quantizers
        if isinstance(layer, (qk.QDense, qk.QConv2D, qk.QConv1D, 
                            qk.QConv2DTranspose, qk.QDepthwiseConv2D)):
            # Kernel quantizer
            process_quantizer(layer.kernel_quantizer, "Kernel ")
            
            # Bias quantizer
            process_quantizer(layer.bias_quantizer, "Bias ")
            
            # Activation quantizer
            activation = layer.activation
            if activation:
                if isinstance(activation, dict) or hasattr(activation, "get_config"):
                    process_quantizer(activation, "Activation ")
                else:
                    print(f"  Activation: {activation} (Not Quantized)")
            else:
                print("  No Activation")

        # Check for QActivation layers
        elif isinstance(layer, qk.QActivation):
            process_quantizer(layer.quantizer, "Activation ")
        
        print("-" * 50)

print_quantization_info(model)

Layer Name: input_1
Type: InputLayer
--------------------------------------------------
Layer Name: flatten
Type: Flatten
--------------------------------------------------
Layer Name: q_dense
Type: QDense
  No Kernel Quantizer
  No Bias Quantizer
  Activation Quantizer: quantized_bits
  Activation Config: {'bits': 8, 'integer': 2, 'symmetric': 0, 'alpha': 1, 'keep_negative': True, 'use_stochastic_rounding': False, 'qnoise_factor': 1.0}
--------------------------------------------------
Layer Name: q_activation
Type: QActivation
  Activation Quantizer: quantized_relu
  Activation Config: {'bits': 8, 'integer': 2, 'use_sigmoid': 0, 'negative_slope': 0.0, 'use_stochastic_rounding': False, 'relu_upper_bound': None, 'qnoise_factor': 1.0}
--------------------------------------------------


# 2. Create hls4ml model

For now, skip evaluation and creating benchmark results. JUST convert it. 

In [6]:
# Generate the configuration from the Keras model
REUSE_FACTOR = 32

config = hls4ml.utils.config_from_keras_model(model, granularity='model', backend=BACKEND)
config['Model']['Precision']['default'] = f"ap_fixed<{FP_TOTAL},{FP_INT}>"
config['Model']['ReuseFactor'] = REUSE_FACTOR
config['Model']['Strategy'] = 'Resource'

# Attempt conversion on simplified model
output_dir = f"rtl_models/small_dummy_model"
hls_model = hls4ml.converters.convert_from_keras_model(
    model, 
    hls_config=config, 
    output_dir=output_dir, 
    backend=BACKEND,
    part='xcku035-fbva676-2-e', 
    io_type="io_stream"
)

hls_model.compile()

print("")
print(hls_model.config.config['OutputDir'])

Interpreting Model
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 5, 5, 1]], output shape: [None, 5, 5, 1]
Layer name: flatten, layer type: Reshape, input shapes: [[None, 5, 5, 1]], output shape: [None, 25]
Layer name: q_dense, layer type: QDense, input shapes: [[None, 25]], output shape: [None, 5]
Layer name: q_activation, layer type: Activation, input shapes: [[None, 5]], output shape: [None, 5]
Interpreting Model
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 5, 5, 1]], output shape: [None, 5, 5, 1]
Layer name: flatten, layer type: Reshape, input shapes: [[None, 5, 5, 1]], output shape: [None, 25]
Layer name: q_dense, layer type: QDense, input shapes: [[None, 25]], output shape: [None, 5]
Layer name: q_activation, layer type: Activation, input shapes: [[None, 5]], output shape: [None, 5]
Creating HLS model
Writing HLS project
Done

rtl_models/small_dummy_model


In [7]:
hls_model.predict(np.full((1, 5, 5), 7).astype(float))

array([0.      , 0.515625, 0.      , 0.      , 0.      ])

In [8]:
hls_model.config.config

{'OutputDir': 'rtl_models/small_dummy_model',
 'ProjectName': 'myproject',
 'Backend': 'Vitis',
 'Version': '1.0.0',
 'Part': 'xcku035-fbva676-2-e',
 'ClockPeriod': 5,
 'ClockUncertainty': '27%',
 'IOType': 'io_stream',
 'HLSConfig': {'Model': {'Precision': {'default': 'ap_fixed<8,2>'},
   'ReuseFactor': 32,
   'Strategy': 'Resource',
   'BramFactor': 1000000000,
   'TraceOutput': False}},
 'WriterConfig': {'Namespace': None,
  'WriteWeightsTxt': True,
  'WriteTar': False},
 'KerasModel': <keras.engine.functional.Functional at 0x7f3a9420cca0>,
 'InputData': None,
 'OutputPredictions': None,
 'Stamp': 'B7d14cBF'}

# Generate RTL model

In [9]:
hls_model.build(csim=False, synth=True, vsynth=True, cosim=False)


****** Vitis HLS - High-Level Synthesis from C, C++ and OpenCL v2022.2 (64-bit)
  **** SW Build 3670227 on Oct 13 2022
  **** IP Build 3669848 on Fri Oct 14 08:30:02 MDT 2022
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.

source /tools/Xilinx/Vitis_HLS/2022.2/scripts/vitis_hls/hls.tcl -notrace
INFO: [HLS 200-10] Running '/tools/Xilinx/Vitis_HLS/2022.2/bin/unwrapped/lnx64.o/vitis_hls'
INFO: [HLS 200-10] For user 'aelabd' on host 'DESKTOP-Q0UCNGC.' (Linux_x86_64 version 5.15.133.1-microsoft-standard-WSL2) on Wed Jun 04 14:31:17 CEST 2025
INFO: [HLS 200-10] On os Ubuntu 24.04 LTS
INFO: [HLS 200-10] In directory '/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/rtl_models/small_dummy_model'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-1510] Running: open_project myproject_prj 
INFO: [HLS 200-10] Opening project '/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/rtl_models/small_dummy_model/myproject_prj'.
INFO: [HLS 200-1510] Running: set_top myproject 
INFO: [HLS 200-1510] Running:

{'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '3.384',
  'BestLatency': '100',
  'WorstLatency': '773',
  'IntervalMin': '101',
  'IntervalMax': '774',
  'BRAM_18K': '0',
  'DSP': '0',
  'FF': '849',
  'LUT': '1048',
  'URAM': '0',
  'AvailableBRAM_18K': '1080',
  'AvailableDSP': '1700',
  'AvailableFF': '406256',
  'AvailableLUT': '203128',
  'AvailableURAM': '0'},
 'VivadoSynthReport': {'LUT': '712',
  'FF': '852',
  'BRAM_18K': '0.5',
  'DSP48E': '0'}}

In [10]:
hls4ml.report.read_vivado_report(hls_model.config.config['OutputDir'])

Found 1 solution(s) in rtl_models/small_dummy_model/myproject_prj.
Reports for solution "solution1":

C simulation report not found.
SYNTHESIS REPORT:
== Vitis HLS Report for 'myproject'
* Date:           Wed Jun  4 14:31:38 2025

* Version:        2022.2 (Build 3670227 on Oct 13 2022)
* Project:        myproject_prj
* Solution:       solution1 (Vivado IP Flow Target)
* Product family: kintexu
* Target device:  xcku035-fbva676-2-e


== Performance Estimates
+ Timing: 
    * Summary: 
    +--------+---------+----------+------------+
    |  Clock |  Target | Estimated| Uncertainty|
    +--------+---------+----------+------------+
    |ap_clk  |  5.00 ns|  3.384 ns|     1.35 ns|
    +--------+---------+----------+------------+

+ Latency: 
    * Summary: 
    +---------+---------+----------+----------+-----+-----+----------+
    |  Latency (cycles) |  Latency (absolute) |  Interval | Pipeline |
    |   min   |   max   |    min   |    max   | min | max |   Type   |
    +---------+---------

### Command line commands to test in ModelSim (on the Windows side of this PC)

In [19]:
verilog_dir = os.path.join(hls_model.config.config['OutputDir'], "myproject_prj", "solution1", "syn", "verilog")

if BACKEND=="Vivado":
    vlog_suffix = 'vlog "./rtl_models/vivado_2019.1/small_dummy'
elif BACKEND=="Vitis":
    vlog_suffix = 'vlog "./rtl_models/vivado_2022.2/small_dummy'

for f in os.listdir(verilog_dir):
    if f.endswith(".dat"): continue
    print(f'{vlog_suffix}/{f}"')

vlog "./rtl_models/vivado_2022.2/small_dummy/myproject_start_for_relu_array_ap_fixed_5u_array_ap_fixed_8_2_5_3_0_5u_relu_config5_U0.v"
vlog "./rtl_models/vivado_2022.2/small_dummy/myproject_mul_8s_7s_14_1_1.v"
vlog "./rtl_models/vivado_2022.2/small_dummy/myproject_flow_control_loop_pipe_sequential_init.v"
vlog "./rtl_models/vivado_2022.2/small_dummy/myproject_dense_array_ap_fixed_1u_array_ap_fixed_8_2_5_3_0_5u_config3_s.v"
vlog "./rtl_models/vivado_2022.2/small_dummy/myproject_dense_array_ap_fixed_1u_array_ap_fixed_8_2_5_3_0_5u_config3_s_w3_68_ROM_AUTO_1R.v"
vlog "./rtl_models/vivado_2022.2/small_dummy/myproject_dense_array_array_ap_fixed_8_2_5_3_0_5u_config3_Pipeline_DataPrepare.v"
vlog "./rtl_models/vivado_2022.2/small_dummy/myproject_mux_255_8_1_1.v"
vlog "./rtl_models/vivado_2022.2/small_dummy/myproject.v"
vlog "./rtl_models/vivado_2022.2/small_dummy/myproject_regslice_both.v"
vlog "./rtl_models/vivado_2022.2/small_dummy/myproject_fifo_w40_d1_S.v"
vlog "./rtl_models/vivado_2022.2/s

In [16]:
copy_command = "cp -r"

copy_command += f" {verilog_dir}"

if BACKEND=="Vivado":
    dest_dir = "/mnt/c/Users/abdel/OneDrive/Documents/RHEED/crop_verilog/testbench_crop_plus_gaussian/rtl_models/vivado_2019.1/small_dummy"
elif BACKEND=="Vitis":
    dest_dir = "/mnt/c/Users/abdel/OneDrive/Documents/RHEED/crop_verilog/testbench_crop_plus_gaussian/rtl_models/vivado_2022.2/small_dummy"
else: raise NotImplementedError

copy_command += f" {dest_dir}"
print(copy_command)

cp -r rtl_models/small_dummy_model/myproject_prj/solution1/syn/verilog /mnt/c/Users/abdel/OneDrive/Documents/RHEED/crop_verilog/testbench_crop_plus_gaussian/rtl_models/vivado_2022.2/small_dummy
