In [1]:
import os
import copy
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential

import hls4ml
from hls4ml.converters.keras_to_hls import parse_default_keras_layer
from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode
from hls4ml.model.attributes import Attribute

import h5py

import qkeras
from qkeras.estimate import print_qstats
from qkeras.utils import model_quantize
from qkeras.utils import quantized_model_dump
from qkeras import QActivation, QDense, QConv2DBatchnorm

# Source the Vivado path
os.environ['PATH'] = os.environ['XILINX_VIVADO'] + '/bin:' + os.environ['PATH']

np.random.seed(0)
tf.random.set_seed(0)

# BACKEND = "Vivado"
BACKEND = "Vitis"


2025-06-30 12:39:45.631998: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-06-30 12:39:45.786710: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-30 12:39:45.792065: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-06-30 12:39:45.792094: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar





Globals

# 1. Load Keras model

In [3]:
KERAS_DIR = "/home/aelabd/RHEED/keras_models"

def dice_loss(y_true, y_pred, delta=0.6):
    error = y_true - y_pred
    is_small = tf.abs(error) <= delta
    squared_loss = 0.5 * tf.square(error)
    linear_loss = delta * (tf.abs(error) - 0.5 * delta)
    return tf.reduce_mean(tf.where(is_small, squared_loss, linear_loss))

fpath_model_keras = os.path.join(KERAS_DIR, "model.keras")
with tf.keras.utils.custom_object_scope({'dice_loss': dice_loss,
                                         'QConv2DBatchnorm': QConv2DBatchnorm,
                                         'QActivation': QActivation,
                                         'QDense': QDense
                                         }):
        model = tf.keras.models.load_model(fpath_model_keras)

model.summary()

from qkeras.autoqkeras.utils import print_qmodel_summary
print_qmodel_summary(model)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 q_conv2d_batchnorm_5 (QConv  (None, 46, 46, 8)        113       
 2DBatchnorm)                                                    
                                                                 
 q_activation_8 (QActivation  (None, 46, 46, 8)        0         
 )                                                               
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 11, 11, 8)        0         
 2D)                                                             
                                                                 
 q_conv2d_batchnorm_6 (QConv  (None, 9, 9, 16)         1233      
 2DBatchnorm)                                                    
                                                                 
 q_activation_9 (QActivation  (None, 9, 9, 16)        

In [22]:
model.get_layer("q_conv2d_batchnorm_5").__dict__

{'_self_setattr_tracking': True,
 '_obj_reference_counts_dict': ObjectIdentityDictionary({<_ObjectIdentityWrapper wrapping True>: 3, <_ObjectIdentityWrapper wrapping <keras.utils.generic_utils.Config object at 0x7feb90023400>>: 1, <_ObjectIdentityWrapper wrapping DictWrapper({'class_name': 'quantized_bits', 'config': DictWrapper({'bits': 8, 'integer': 0, 'symmetric': 0, 'alpha': 1, 'keep_negative': True, 'use_stochastic_rounding': False, 'qnoise_factor': 1.0})})>: 1, <_ObjectIdentityWrapper wrapping DictWrapper({'class_name': 'quantized_bits', 'config': DictWrapper({'bits': 8, 'integer': 0, 'symmetric': 0, 'alpha': 1, 'keep_negative': True, 'use_stochastic_rounding': False, 'qnoise_factor': 1.0})})>: 1, <_ObjectIdentityWrapper wrapping <qkeras.quantizers.quantized_bits object at 0x7feb900212d0>>: 1, <_ObjectIdentityWrapper wrapping <qkeras.quantizers.quantized_bits object at 0x7feb900224a0>>: 1, <_ObjectIdentityWrapper wrapping ListWrapper([<qkeras.quantizers.quantized_bits object at 0

# 2. Create hls4ml model

For now, skip evaluation and creating benchmark results. JUST convert it. 

In [None]:
config = hls4ml.utils.config_from_keras_model(model, 
                                              granularity='name', 
                                              backend='Vitis',
                                              default_precision='fixed<32,16>', 
                                              default_reuse_factor=32,
                                             )

config['Model']['Strategy'] = 'Resource'
config["LayerName"] = {"q_conv2d_batchnorm_5_input": {"Precision": {"result": "ap_ufixed<8,0>"}}}
# TODO: Inspect output precision


Interpreting Sequential
Topology:
Layer name: q_conv2d_batchnorm_5_input, layer type: InputLayer, input shapes: [[None, 48, 48, 1]], output shape: [None, 48, 48, 1]
Layer name: q_conv2d_batchnorm_5, layer type: QConv2DBatchnorm, input shapes: [[None, 48, 48, 1]], output shape: [None, 46, 46, 8]
Layer name: q_activation_8, layer type: Activation, input shapes: [[None, 46, 46, 8]], output shape: [None, 46, 46, 8]
Layer name: max_pooling2d_5, layer type: MaxPooling2D, input shapes: [[None, 46, 46, 8]], output shape: [None, 11, 11, 8]
Layer name: q_conv2d_batchnorm_6, layer type: QConv2DBatchnorm, input shapes: [[None, 11, 11, 8]], output shape: [None, 9, 9, 16]
Layer name: q_activation_9, layer type: Activation, input shapes: [[None, 9, 9, 16]], output shape: [None, 9, 9, 16]
Layer name: max_pooling2d_6, layer type: MaxPooling2D, input shapes: [[None, 9, 9, 16]], output shape: [None, 4, 4, 16]
Layer name: q_conv2d_batchnorm_7, layer type: QConv2DBatchnorm, input shapes: [[None, 4, 4, 16]]

In [5]:
# Attempt conversion on simplified model
if BACKEND=="Vivado":
    output_dir = f"/home/aelabd/RHEED/04_ref_design/rtl_models/vivado_2019.1/yuhao_model"
elif BACKEND=="Vitis":
    output_dir = f"/home/aelabd/RHEED/04_ref_design/rtl_models/vivado_2022.2/yuhao_model"
else: raise NotImplementedError
hls_model = hls4ml.converters.convert_from_keras_model(
    model, 
    hls_config=config, 
    output_dir=output_dir, 
    backend=BACKEND,
    part='xcku035-fbva676-2-e', 
    io_type="io_stream"
)

hls_model.compile() # hls_model.write for Windows 

print("")
print(hls_model.config.config['OutputDir'])

Interpreting Sequential
Topology:
Layer name: q_conv2d_batchnorm_5_input, layer type: InputLayer, input shapes: [[None, 48, 48, 1]], output shape: [None, 48, 48, 1]
Layer name: q_conv2d_batchnorm_5, layer type: QConv2DBatchnorm, input shapes: [[None, 48, 48, 1]], output shape: [None, 46, 46, 8]
Layer name: q_activation_8, layer type: Activation, input shapes: [[None, 46, 46, 8]], output shape: [None, 46, 46, 8]
Layer name: max_pooling2d_5, layer type: MaxPooling2D, input shapes: [[None, 46, 46, 8]], output shape: [None, 11, 11, 8]
Layer name: q_conv2d_batchnorm_6, layer type: QConv2DBatchnorm, input shapes: [[None, 11, 11, 8]], output shape: [None, 9, 9, 16]
Layer name: q_activation_9, layer type: Activation, input shapes: [[None, 9, 9, 16]], output shape: [None, 9, 9, 16]
Layer name: max_pooling2d_6, layer type: MaxPooling2D, input shapes: [[None, 9, 9, 16]], output shape: [None, 4, 4, 16]
Layer name: q_conv2d_batchnorm_7, layer type: QConv2DBatchnorm, input shapes: [[None, 4, 4, 16]]

In [6]:
x_test = np.random.randint(0, 256, size=(5, 48, 48, 1), dtype=np.uint8) / 256. 
y_test = model.predict(x_test)
y_hls = hls_model.predict(np.ascontiguousarray(x_test))

print(y_test[0].flatten())
print(y_hls[0].flatten())

[ 0.4345703   0.50683594 -0.5961914  -0.40478516 -0.12548828]
[ 0.43218994  0.5052948  -0.59460449 -0.40483093 -0.1235199 ]


In [7]:
hls_model.config.config

{'OutputDir': '/home/aelabd/RHEED/04_ref_design/rtl_models/vivado_2022.2/yuhao_model',
 'ProjectName': 'myproject',
 'Backend': 'Vitis',
 'Version': '1.0.0',
 'Part': 'xcku035-fbva676-2-e',
 'ClockPeriod': 5,
 'ClockUncertainty': '27%',
 'IOType': 'io_stream',
 'HLSConfig': {'Model': {'Precision': {'default': 'fixed<32,16>'},
   'ReuseFactor': 32,
   'Strategy': 'Resource',
   'BramFactor': 1000000000,
   'TraceOutput': False},
  'LayerName': {'q_conv2d_batchnorm_5_input': {'Precision': {'result': 'ap_ufixed<8,0>'}}}},
 'WriterConfig': {'Namespace': None,
  'WriteWeightsTxt': True,
  'WriteTar': False},
 'KerasModel': <keras.engine.sequential.Sequential at 0x7fec171bc8b0>,
 'InputData': None,
 'OutputPredictions': None,
 'Stamp': 'b8e0E54c'}

# Generate RTL model

In [11]:
hls_model.build(csim=False, synth=True, vsynth=True, export=True)


****** Vitis HLS - High-Level Synthesis from C, C++ and OpenCL v2022.2 (64-bit)
  **** SW Build 3670227 on Oct 13 2022
  **** IP Build 3669848 on Fri Oct 14 08:30:02 MDT 2022
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.

source /tools/Xilinx/Vitis_HLS/2022.2/scripts/vitis_hls/hls.tcl -notrace
INFO: [HLS 200-10] Running '/tools/Xilinx/Vitis_HLS/2022.2/bin/unwrapped/lnx64.o/vitis_hls'
INFO: [HLS 200-10] For user 'aelabd' on host 'DESKTOP-Q0UCNGC.' (Linux_x86_64 version 5.15.133.1-microsoft-standard-WSL2) on Mon Jun 30 13:29:27 PDT 2025
INFO: [HLS 200-10] On os Ubuntu 24.04 LTS
INFO: [HLS 200-10] In directory '/home/aelabd/RHEED/04_ref_design/rtl_models/vivado_2022.2/yuhao_model'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-1510] Running: open_project myproject_prj 
INFO: [HLS 200-10] Opening project '/home/aelabd/RHEED/04_ref_design/rtl_models/vivado_2022.2/yuhao_model/myproject_prj'.
INFO: [HLS 200-1510] Running: set_top myproject 
INFO: [HLS 200-1510] Running: ad

{'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '4.493',
  'BestLatency': '99056',
  'WorstLatency': '99093',
  'IntervalMin': '9218',
  'IntervalMax': '99074',
  'BRAM_18K': '350',
  'DSP': '195',
  'FF': '89237',
  'LUT': '53224',
  'URAM': '0',
  'AvailableBRAM_18K': '1080',
  'AvailableDSP': '1700',
  'AvailableFF': '406256',
  'AvailableLUT': '203128',
  'AvailableURAM': '0'},
 'VivadoSynthReport': {'LUT': '40432',
  'FF': '64653',
  'BRAM_18K': '75.5',
  'DSP48E': '390'}}

In [12]:
hls4ml.report.read_vivado_report(hls_model.config.config['OutputDir'])

Found 1 solution(s) in /home/aelabd/RHEED/04_ref_design/rtl_models/vivado_2022.2/yuhao_model/myproject_prj.
Reports for solution "solution1":

C SIMULATION RESULT:
INFO: [SIM 2] *************** CSIM start ***************
INFO: [SIM 4] CSIM will launch GCC as the compiler.
   Compiling ../../../../myproject_test.cpp in debug mode
   Compiling ../../../../firmware/myproject.cpp in debug mode
   Generating csim.exe
/tools/Xilinx/Vivado/2022.2/tps/lnx64/binutils-2.37/bin/ld: cannot find crt1.o: No such file or directory
/tools/Xilinx/Vivado/2022.2/tps/lnx64/binutils-2.37/bin/ld: cannot find crti.o: No such file or directory
/tools/Xilinx/Vivado/2022.2/tps/lnx64/binutils-2.37/bin/ld: cannot find -lpthread
/tools/Xilinx/Vivado/2022.2/tps/lnx64/binutils-2.37/bin/ld: cannot find -lm
collect2: error: ld returned 1 exit status
make: *** [Makefile.rules:323: csim.exe] Error 1
ERR: [SIM 100] 'csim_design' failed: compilation error(s).
INFO: [SIM 3] *************** CSIM finish ***************

SYNT

In [13]:
hls_model.config.config['OutputDir']

'/home/aelabd/RHEED/04_ref_design/rtl_models/vivado_2022.2/yuhao_model'

# IMPORTANT NOTE:

(I suspect this is only on Linux)

You must go into the verilog directory and find where the ".dat" ROM imports are, e.g. "CoaxlinkQuadCxp12_1cam/rtl_models/vivado_2019.1/yuhao_model/myproject_prj/solution1/syn/verilog/dense_wrapper_ap_fixed_8_0_5_3_0_ap_fixed_22_9_5_3_0_config15_s_w15_V.v" has an import "./dense_wrapper_ap_fixed_8_0_5_3_0_ap_fixed_22_9_5_3_0_config15_s_w15_V_rom.dat". You must go and manually change all of these relative paths into ABSOLUTE paths. Otherwise, vivado doesn't know where to find them. 

In [5]:
import os
src_old = "/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog"
for f in os.listdir(src_old):
    print(f"remove_files {os.path.join(src_old, f)}")

remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog/myproject_fifo_w64_d16_S.v
remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog/myproject_flow_control_loop_pipe_no_ap_cont.v
remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog/myproject_dense_resource_rf_gt_nin_rem0_ap_ufixed_ap_fixed_21_10_5_3_0_config15_s_outidbjl.v
remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog/myproject_start_for_relu_array_ap_fixed_10u_array_ap_ufixed_8_2_4_0_0_10u_relu_config12bsm.v
remove_files /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog/m

In [6]:
# src_new = hls_model.config.config['OutputDir'] + "/myproject_prj/solution1/syn/verilog"
src_new = "/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog"
for f in os.listdir(src_new):
    if f.endswith(".dat") or ("dense" not in f): continue
    v_file = os.path.join(src_new, f)
    v_file_temp = v_file.replace(".v", "_TEMP.v")
    with open(v_file, 'r') as og_file:
        with open(v_file_temp, 'w') as new_file:
            for line in og_file:
                if ('.dat"') in line:
                    print(f"old_line: {line}")
                    line = line.replace(r'$readmemh("./', f'$readmemh("{src_new}/')
                    print(f"new_line: {line}")
                new_file.write(line)
    
        os.remove(v_file)
        os.rename(v_file_temp, v_file)
        print(f"Updated file: {v_file}\n\n\n")

old_line:     $readmemh("./myproject_dense_resource_rf_gt_nin_rem0_ap_ufixed_ap_fixed_21_10_5_3_0_config15_s_outidbjl.dat", rom0);

new_line:     $readmemh("/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog/myproject_dense_resource_rf_gt_nin_rem0_ap_ufixed_ap_fixed_21_10_5_3_0_config15_s_outidbjl.dat", rom0);

Updated file: /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog/myproject_dense_resource_rf_gt_nin_rem0_ap_ufixed_ap_fixed_21_10_5_3_0_config15_s_outidbjl.v



Updated file: /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject_prj/solution1/syn/verilog/myproject_dense_resource_rf_gt_nin_rem0_ap_ufixed_ap_fixed_22_11_5_3_0_config21_s.v



Updated file: /home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/04_ref_design/rtl_models/vivado_2023.2/yuhao_model_good1/myproject