## Load in RHEED training data

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import h5py
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import tensorflow as tf
from qkeras import *
import hls4ml

2025-05-22 19:45:52.563533: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-22 19:45:52.665079: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-22 19:45:52.670384: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-05-22 19:45:52.670411: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar





### Useful functions

In [2]:
def print_dict(d, indent=0):
    align = 20
    for key, value in d.items():
        print('  ' * indent + str(key), end='')
        if isinstance(value, dict):
            print()
            print_dict(value, indent+1)
        else:
            print(':' + ' ' * (20 - len(key) - 2 * indent) + str(value))  

In [3]:
def custom_weighted_mse_loss(I, J, n):
    W = tf.pow(I, n)
    squared_diffs = tf.pow(I - J, 2)
    weighted_squared_diffs = W * squared_diffs

    return tf.reduce_mean(weighted_squared_diffs)

## Load the Model

I ran into some issues loading the QKeras model directly so I had to do this weird work around

In [4]:
# Model Architecture QAT
integer_bits = 2
fraction_bits = 6
symmetric = 0
keep_negative = 1

In [10]:
import tensorflow as tf
from qkeras import QConv2DBatchnorm, QActivation, QDense

def build_model(input_shape, total_bits, integer_bits):
    inputs = tf.keras.Input(shape=input_shape)
    
    x = QConv2DBatchnorm(
        filters=6, kernel_size=3, strides=1, padding='valid',
        kernel_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
        bias_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
        kernel_initializer='lecun_uniform',
        kernel_regularizer=tf.keras.regularizers.l1(0.0001),
        use_bias=True,
    )(inputs)
    x = QActivation(f"quantized_relu({total_bits}, {integer_bits})")(x)
    x = tf.keras.layers.MaxPool2D(pool_size=4, strides=4)(x)
    
    x = QConv2DBatchnorm(
        filters=16, kernel_size=3, strides=1, padding='valid',
        kernel_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
        bias_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
        kernel_initializer='lecun_uniform',
        kernel_regularizer=tf.keras.regularizers.l1(0.0001),
        use_bias=True,
    )(x)
    x = QActivation(f"quantized_relu({total_bits}, {integer_bits})")(x)
    x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2)(x)

    x = QConv2DBatchnorm(
        filters=4, kernel_size=3, strides=1, padding='valid',
        kernel_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
        bias_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
        kernel_initializer='lecun_uniform',
        kernel_regularizer=tf.keras.regularizers.l1(0.0001),
        use_bias=True,
    )(x)
    x = QActivation(f"quantized_relu({total_bits}, {integer_bits})")(x)
    x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2)(x)
    
    x = tf.keras.layers.Flatten()(x)
    
    x = QDense(
        units=52,
        kernel_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
        bias_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
    )(x)
    x = QActivation(f"quantized_relu({total_bits}, {integer_bits})")(x)
    
    outputs = QDense(
        units=5,
        kernel_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
        bias_quantizer=f"quantized_bits({total_bits}, {integer_bits}, alpha=1)",
    )(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [11]:
input_shape = (48, 48, 1) 
total_bits = 8
integer_bits = 2

# Build the model
model = build_model(input_shape, total_bits, integer_bits)

# Compile the model
model.compile(optimizer='adam', loss=custom_weighted_mse_loss, run_eagerly=True)

# Display the model summary
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 48, 48, 1)]       0         
                                                                 
 q_conv2d_batchnorm_5 (QConv  (None, 46, 46, 6)        85        
 2DBatchnorm)                                                    
                                                                 
 q_activation_7 (QActivation  (None, 46, 46, 6)        0         
 )                                                               
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 11, 11, 6)        0         
 2D)                                                             
                                                                 
 q_conv2d_batchnorm_6 (QConv  (None, 9, 9, 16)         945       
 2DBatchnorm)                                              

In [12]:
dummy_input = tf.random.normal((1, 48, 48, 1))  
model.predict(dummy_input)  # Call the model once to initialize the weights

# model.load_weights('/home/mattwilk/8bit6fractional/ml4fg/Gaussian_Model_QAT_2I_6F_weights.h5')  # Load Weights



array([[-0.04443359, -3.1835938 , -0.6152344 ,  0.6333008 , -1.074707  ]],
      dtype=float32)

## HLS4ML Conversion

In [18]:
REUSE_FACTOR = 32

io_type = 'io_stream'
backend = 'Vitis'
part = 'xcku035-fbva676-2-e'
OUT_DIR = f'rtl_models/model_dummy'

config = hls4ml.utils.config_from_keras_model (model,
                                                   default_precision = 'ap_fixed<8,2>',
                                                   granularity = 'name') # Import example CNN
config['Model']['ReuseFactor'] = REUSE_FACTOR
config['Model']['Strategy'] = 'Resource'

# config["LayerName"]["q_conv2d_batchnorm"]["ReuseFactor"] = 50
# config["LayerName"]["q_conv2d_batchnorm"]["Strategy"] = "Resource"

# config["LayerName"]["q_conv2d_batchnorm_1"]["ReuseFactor"] = 150
# config["LayerName"]["q_conv2d_batchnorm_1"]["Strategy"] = "Resource"

# config["LayerName"]["q_dense"]["ReuseFactor"] = 288
# config["LayerName"]["q_dense"]["Strategy"] = "Resource"

# config["LayerName"]["q_dense_1"]["ReuseFactor"] = 98
# config["LayerName"]["q_dense_1"]["Strategy"] = "Resource"

# config["LayerName"]["q_dense_2"]["ReuseFactor"] = 26
# config["LayerName"]["q_dense_2"]["Strategy"] = "Resource"

print_dict(config)

Interpreting Model
Topology:
Layer name: input_3, layer type: InputLayer, input shapes: [[None, 48, 48, 1]], output shape: [None, 48, 48, 1]
Layer name: q_conv2d_batchnorm_5, layer type: QConv2DBatchnorm, input shapes: [[None, 48, 48, 1]], output shape: [None, 46, 46, 6]
Layer name: q_activation_7, layer type: Activation, input shapes: [[None, 46, 46, 6]], output shape: [None, 46, 46, 6]
Layer name: max_pooling2d_5, layer type: MaxPooling2D, input shapes: [[None, 46, 46, 6]], output shape: [None, 11, 11, 6]
Layer name: q_conv2d_batchnorm_6, layer type: QConv2DBatchnorm, input shapes: [[None, 11, 11, 6]], output shape: [None, 9, 9, 16]
Layer name: q_activation_8, layer type: Activation, input shapes: [[None, 9, 9, 16]], output shape: [None, 9, 9, 16]
Layer name: max_pooling2d_6, layer type: MaxPooling2D, input shapes: [[None, 9, 9, 16]], output shape: [None, 4, 4, 16]
Layer name: q_conv2d_batchnorm_7, layer type: QConv2DBatchnorm, input shapes: [[None, 4, 4, 16]], output shape: [None, 2

In [19]:
hls_model = hls4ml.converters.convert_from_keras_model(model,
                                                       hls_config = config,
                                                       io_type = io_type,
                                                       backend = backend,
                                                       output_dir = OUT_DIR,
                                                       part = part
                                                       )

hls_model.compile()

Interpreting Model
Topology:
Layer name: input_3, layer type: InputLayer, input shapes: [[None, 48, 48, 1]], output shape: [None, 48, 48, 1]
Layer name: q_conv2d_batchnorm_5, layer type: QConv2DBatchnorm, input shapes: [[None, 48, 48, 1]], output shape: [None, 46, 46, 6]
Layer name: q_activation_7, layer type: Activation, input shapes: [[None, 46, 46, 6]], output shape: [None, 46, 46, 6]
Layer name: max_pooling2d_5, layer type: MaxPooling2D, input shapes: [[None, 46, 46, 6]], output shape: [None, 11, 11, 6]
Layer name: q_conv2d_batchnorm_6, layer type: QConv2DBatchnorm, input shapes: [[None, 11, 11, 6]], output shape: [None, 9, 9, 16]
Layer name: q_activation_8, layer type: Activation, input shapes: [[None, 9, 9, 16]], output shape: [None, 9, 9, 16]
Layer name: max_pooling2d_6, layer type: MaxPooling2D, input shapes: [[None, 9, 9, 16]], output shape: [None, 4, 4, 16]
Layer name: q_conv2d_batchnorm_7, layer type: QConv2DBatchnorm, input shapes: [[None, 4, 4, 16]], output shape: [None, 2

In [20]:
hls_model.predict(np.full((1, 48, 48), 7).astype(float))

array([0., 0., 0., 0., 0.])

In [22]:
hls_model.build(csim=False, synth=True, vsynth=True)


****** Vitis HLS - High-Level Synthesis from C, C++ and OpenCL v2022.2 (64-bit)
  **** SW Build 3670227 on Oct 13 2022
  **** IP Build 3669848 on Fri Oct 14 08:30:02 MDT 2022
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.

source /tools/Xilinx/Vitis_HLS/2022.2/scripts/vitis_hls/hls.tcl -notrace
INFO: [HLS 200-10] Running '/tools/Xilinx/Vitis_HLS/2022.2/bin/unwrapped/lnx64.o/vitis_hls'
INFO: [HLS 200-10] For user 'aelabd' on host 'DESKTOP-Q0UCNGC.' (Linux_x86_64 version 5.15.133.1-microsoft-standard-WSL2) on Thu May 22 19:51:18 CEST 2025
INFO: [HLS 200-10] On os Ubuntu 24.04 LTS
INFO: [HLS 200-10] In directory '/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/rtl_models/model_dummy'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-1510] Running: open_project myproject_prj 
INFO: [HLS 200-10] Opening project '/home/aelabd/RHEED/CoaxlinkQuadCxp12_1cam/rtl_models/model_dummy/myproject_prj'.
INFO: [HLS 200-1510] Running: set_top myproject 
INFO: [HLS 200-1510] Running: add_files f

{'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '3.617',
  'BestLatency': '78333',
  'WorstLatency': '78365',
  'IntervalMin': '9218',
  'IntervalMax': '78338',
  'BRAM_18K': '91',
  'DSP': '0',
  'FF': '15388',
  'LUT': '31826',
  'URAM': '0',
  'AvailableBRAM_18K': '1080',
  'AvailableDSP': '1700',
  'AvailableFF': '406256',
  'AvailableLUT': '203128',
  'AvailableURAM': '0'},
 'VivadoSynthReport': {'LUT': '14160',
  'FF': '13500',
  'BRAM_18K': '23.5',
  'DSP48E': '0'}}

In [None]:
hls4ml.report.read_vivado_report(hls_model.config.config['OutputDir'])