In [None]:
# source /data/Xilinx_no_Vitis/Vivado/2020.1/settings64.sh
!vivado -version

In [None]:
import os 
import pickle
import hashlib
import json

import hls4ml 
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.metrics import accuracy_score
import keras
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, BatchNormalization, Dropout
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.losses import CategoricalCrossentropy, BinaryCrossentropy 

from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule
from tensorflow_model_optimization.sparsity.keras import strip_pruning
import tensorflow_model_optimization as tfmot

from qkeras.qlayers import QDense, QActivation
from qkeras import QBatchNormalization
from qkeras.quantizers import quantized_bits, quantized_relu
from qkeras.utils import _add_supported_quantized_objects
from tensorflow.keras.models import load_model
from qkeras.utils import _add_supported_quantized_objects

os.environ['PATH'] = os.environ['XILINX_VIVADO'] + '/bin:' + os.environ['PATH']
keras.utils.set_random_seed(32)

## Setup data

In [None]:
def save_array_to_dat(data, top_nrows, bottom_nrows, filename):
    print('Save top {} rows and bottom {} rows in file {}'.format(top_nrows, bottom_nrows, filename))
    with open(filename, 'w') as file:
        if data.ndim == 1:
            data_ = [[x] for x in data]
        else:
            data_ = data
            
        for row in data_[:top_nrows]:
            file.write(' '.join(map(str, row)) + '\n')
        for row in data_[-bottom_nrows:]:
            file.write(' '.join(map(str, row)) + '\n')


In [None]:
train_data_dir = "../data/malab_05282024/npz/"
test_data_dir = "../data/malab_05282024/npz/"
start_location = 100
window_size = 400
end_window = start_location + window_size # 500

In [None]:
"""Loadning training split"""
x_train_path = os.path.join(train_data_dir, f'0528_X_train_0_770.npy')
y_train_path = os.path.join(train_data_dir, f'0528_y_train_0_770.npy')

assert os.path.exists(x_train_path), f"ERROR: File {x_train_path} does not exist."
assert os.path.exists(y_train_path), f"ERROR: File {y_train_path} does not exist."

X_train_val = np.load(x_train_path)
y_train_val = np.load(y_train_path)

# Insure same dataset is loaded 
assert hashlib.md5(X_train_val).hexdigest() == 'b61226c86b7dee0201a9158455e08ffb',  "Checksum failed. Wrong file was loaded or file may be corrupted."
assert hashlib.md5(y_train_val).hexdigest() == 'c59ce37dc7c73d2d546e7ea180fa8d31',  "Checksum failed. Wrong file was loaded or file may be corrupted."

# Get readout window
X_train_val = X_train_val[:,start_location*2:end_window*2]
assert len(X_train_val[0]) == (end_window-start_location)*2, f"ERROR: X_test sample size {len(X_train_val[0])} does not match (start window, end window) ({start_location},{end_window}) size."


print("Train Data Set:")
print(f"  X Path : {x_train_path}")
print(f"  y Path : {y_train_path}")
print(f"  Size : {len(X_train_val):,}")
print(f"  Shape : {X_train_val[0].shape}")

In [None]:
"""Loading testing split"""
x_test_path = os.path.join(test_data_dir, f'0528_X_test_0_770.npy')
y_test_path = os.path.join(test_data_dir, f'0528_y_test_0_770.npy')

assert os.path.exists(x_test_path), f"ERROR: File {x_test_path} does not exist."
assert os.path.exists(y_test_path), f"ERROR: File {y_test_path} does not exist."

X_test = np.load(x_test_path)
y_test = np.load(y_test_path)

# Insure same dataset is loaded 
assert hashlib.md5(X_test).hexdigest() == 'b7d85f42522a0a57e877422bc5947cde', "Checksum failed. Wrong file was loaded or file may be corrupted."
assert hashlib.md5(y_test).hexdigest() == '8c9cce1821372380371ade5f0ccfd4a2', "Checksum failed. Wrong file was loaded or file may be corrupted."

# Get readout window
X_test = X_test[:,start_location*2:end_window*2]
assert len(X_test[0]) == (end_window-start_location)*2, f"ERROR: X_test sample size {len(X_test[0])} does not match (start window, end window) ({start_location},{end_window}) size."


print("Test Data Set:")
print(f"  X Path : {x_test_path}")
print(f"  y Path : {y_test_path}")
print(f"  Size : {len(X_test):,}" )
print(f"  Sample Shape : {X_test[0].shape}")

## Build our model 
QKeras is "Quantized Keras" for deep heterogeneous quantization of ML models. We're using QDense layer instead of Dense. We're also training with model sparsity, since QKeras layers are prunable.

In [None]:
hidden_neurons = 4
input_shape = int((end_window-start_location)*2)

In [None]:
def get_model(input_shape, hidden=8, is_pruned=True, activation='sigmoid'):
    model = keras.models.Sequential()
    model.add(QDense(
        hidden, 
        activation='relu', 
        name='fc1',
        input_shape=(input_shape,), 
        kernel_quantizer=quantized_bits(3,0,alpha=1), bias_quantizer=quantized_bits(3,0,alpha=1)
    ))
    model.add(BatchNormalization(name='batchnorm1'))
    # model.add(QBatchNormalization(
    #         name='batchnorm1',
    #         gamma_quantizer=quantized_bits(6, 0, 1),
    #         mean_quantizer=quantized_bits(6, 6, 1),
    #         variance_quantizer=quantized_bits(6, 6, 1),
    #         beta_quantizer=quantized_bits(6, 0, 1),
    # ))
    model.add(
        QDense(1, name='fc2', activation=activation, kernel_quantizer=quantized_bits(3,0,alpha=1), bias_quantizer=quantized_bits(3,0,alpha=1))
    )

    if is_pruned == True:
        # adding pruning 
        pruning_params = {'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.10, final_sparsity=0.50, begin_step=100, end_step=500)}
        model = prune.prune_low_magnitude(model, **pruning_params)
    return model


model = get_model(input_shape=input_shape, hidden=hidden_neurons, is_pruned=False)
print(model.summary())
print('Input shape:', input_shape)
print('Number of hidden neurons:', hidden_neurons)

## Training 

In [None]:
init_learning_rate = 1e-3
validation_split = 0.05  # 45,000 sample size 
batch_size = 256
epochs = 100
early_stopping_patience = 20
checkpoint_dir = f'../checkpoints/scan_window_location_and_size_h{hidden_neurons}'
checkpoint_filename = 'qkeras_model_best.h5'

if os.path.exists(checkpoint_dir) == False:
    print(f'Checkpoint directory {checkpoint_dir} does not exist.')
    print('Creating directory...')
    os.mkdir(checkpoint_dir)

In [None]:
#########################
# 0. init callbacks
#########################
ckp_dir = os.path.join(checkpoint_dir, f'sl{start_location}_ws{window_size}')
if os.path.exists(ckp_dir) == False: os.mkdir(ckp_dir)
print('Saving to', ckp_dir)

ckp_filename = os.path.join(ckp_dir, checkpoint_filename)
callbacks = [
    ModelCheckpoint(
        ckp_filename,
        monitor="val_loss",
        verbose=0,
        save_best_only=True,
        save_weights_only=True,
        save_freq="epoch",
    ),
    EarlyStopping(
        monitor='val_loss',
        patience=early_stopping_patience,
        restore_best_weights=False,
    ),
]


In [None]:
if False:
    #########################
    # 1. declare model 
    #########################
    opt = Adam(learning_rate=init_learning_rate)
    model = get_model(input_shape=window_size*2, hidden=hidden_neurons, is_pruned=False, activation='sigmoid')
    model.compile(
        optimizer=opt, 
        loss=BinaryCrossentropy(from_logits=False), 
        metrics=['accuracy']
    )

    #########################
    # 2. train 
    #########################
    history = model.fit(
        X_train_val, 
        y_train_val, 
        batch_size=batch_size,
        epochs=epochs, 
        validation_split=validation_split, 
        shuffle=True, 
        callbacks=callbacks,
    )

    # Save the history dictionary
    with open(os.path.join(ckp_dir, 'qkeras_training_history.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    #########################
    # 3. compute fidelity 
    #########################
    y_pred = model.predict(X_test)
    test_acc = accuracy_score(y_test, np.where(y_pred <= 0.5, 0, 1).reshape(-1))

    print('\n===================================')
    print(f'Start location = {start_location}, Window size = {window_size}')
    print('    Accuracy', test_acc)
    print('    Fidelity', test_acc*2-1)


## Check performance

In [None]:
notes = {}
ckp_filename = os.path.join(ckp_dir, checkpoint_filename)
ckp_filename

In [None]:
checkpoint_model = get_model(input_shape=input_shape, hidden=hidden_neurons, is_pruned=False, activation='sigmoid')
checkpoint_model.load_weights(ckp_filename)

y_keras = checkpoint_model.predict(X_test)
test_acc = accuracy_score(y_test, np.where(y_keras < 0.5, 0, 1).reshape(-1))

print(f"Keras  Accuracy (with sigmoid): {test_acc}")
print(f"Keras  Fidelity (with sigmoid): {test_acc*2-1}")
notes["Keras  Accuracy (with sigmoid)"] = test_acc
notes["Keras  Fidelity (with sigmoid)"] = test_acc*2-1

checkpoint_model = get_model(input_shape=input_shape, hidden=hidden_neurons, is_pruned=False, activation=None)
checkpoint_model.load_weights(ckp_filename)

y_pred = checkpoint_model.predict(X_test)
test_acc = accuracy_score(y_test, np.where(y_pred < 0.5, 0, 1).reshape(-1))

print(f"Keras  Accuracy (w/o sigmoid): {test_acc}")
print(f"Keras  Fidelity (w/o sigmoid): {test_acc*2-1}")
notes["Keras  Accuracy (w/o sigmoid)"] = test_acc
notes["Keras  Fidelity (w/o sigmoid)"] = test_acc*2-1

## Check sparsity 

In [None]:
num_layers = len(checkpoint_model.layers)
print(f'Number of layers: {num_layers}')


for idx in range(num_layers):
    w = checkpoint_model.layers[idx].weights[0].numpy()
    h, b = np.histogram(w, bins=100)
    layer_sparsity = np.sum(w == 0) / np.size(w)

    # plot weight distribution
    plt.figure(figsize=(7, 7))
    plt.bar(b[:-1], h, width=b[1] - b[0])
    plt.semilogy()
    plt.title(f'Layer {checkpoint_model.layers[idx].name}, {layer_sparsity:.2f} Sparsity')
    plt.savefig(os.path.join(ckp_dir, f'model-dist-idx{idx}.png'))
    print('% of zeros = {}'.format(layer_sparsity))


checkpoint_model = strip_pruning(checkpoint_model)  # remove prune layers for hls4ml parsing 
checkpoint_model.save_weights(ckp_filename)  # save as weights only for keras tracing (cannot directly pass strip_pruned model)
checkpoint_model = get_model(input_shape=input_shape, hidden=hidden_neurons, is_pruned=False, activation=None)
checkpoint_model.load_weights(ckp_filename)

## HLS4ML

In [None]:
import sys 
sys.path.append("../utils")
from config import print_dict

from tensorflow.keras.models import load_model
from qkeras.utils import _add_supported_quantized_objects

In [None]:
# Create HLS configuration 
hls_config = {}
hls_config['Model'] = {}
hls_config['Model']['Precision'] = 'ap_fixed<16,6>'  # Default precision
hls_config['Model']['ReuseFactor'] = 1  # parallelized 
hls_config['Model']['Strategy'] = 'Resource'

hls_config['LayerName'] = {}
keras_layers = ['fc1', 'fc1_relu', 'batchnorm1', 'fc2', 'fc2_linear']
for layer in keras_layers:
    hls_config['LayerName'][layer] = {}
    hls_config['LayerName'][layer]['Precision'] = {}
    hls_config['LayerName'][layer]['Trace'] = True

# Input - ZCU216 uses 14-bit ADCS 
hls_config['LayerName']['fc1_input'] = {}
hls_config['LayerName']['fc1_input']['Precision'] = {}
hls_config['LayerName']['fc1_input']['Trace'] = False
hls_config['LayerName']['fc1_input']['Precision'] = 'ap_fixed<14,14>' 

# Fc1
hls_config['LayerName']['fc1']['Precision']['result'] = 'ap_fixed<19,18>'
hls_config['LayerName']['fc1']['accum_t'] = 'ap_fixed<19,18>'

# Fc1 activation 
hls_config['LayerName']['fc1_relu']['Precision']['result'] = 'ap_fixed<19,18>'

# Batchnormalization
hls_config['LayerName']['batchnorm1']['Precision']['scale'] = 'ap_fixed<18,2>'
hls_config['LayerName']['batchnorm1']['Precision']['bias'] = 'ap_fixed<18,2>'
hls_config['LayerName']['batchnorm1']['Precision']['result'] = 'ap_fixed<10,4>'
hls_config['LayerName']['batchnorm1']['accum_t'] = 'ap_fixed<10,4>'

# Fc2
hls_config['LayerName']['fc2']['Precision']['result'] = 'ap_fixed<10,5>'
hls_config['LayerName']['fc2']['accum_t'] = 'ap_fixed<10,5>'

# Fc2 activation 
hls_config['LayerName']['fc2_linear']['Precision']['result'] = 'ap_fixed<10,5>'

print_dict(hls_config)

### Build HLS model 

In [None]:
output_dir = f'../hls4ml_projects/sl-{start_location}_ws-{window_size}_hn-{hidden_neurons}_Vivado'
xilinx_part = 'xczu49dr-ffvf1760-2-e'
io_type = 'io_parallel'
clock_period = 3.225  # 3.225ns (307.2 MHz)
hls_fig = os.path.join(output_dir, 'model.png')
backend = 'Vivado' 
interface = 'axi_stream'
#driver = 'c'
board = 'zcu216'

In [None]:
hls_model = hls4ml.converters.convert_from_keras_model(
    model=checkpoint_model,
    hls_config=hls_config,
    output_dir=output_dir,
    part=xilinx_part,
    io_type=io_type,
    clock_period=clock_period,
    backend=backend,
    board=board,
    interface=interface,
    #driver=driver,
    project_name='NN'
)

print(f"Creating hls4ml project directory {output_dir}")
hls_model.compile()  # Must compile for C Sim. 

# Visualize model
hls4ml.utils.plot_model(
    hls_model, show_shapes=True, show_precision=True, to_file=hls_fig 
)

## Check performance

In [None]:
# Trace output 
y_hls = hls_model.predict(np.ascontiguousarray(X_test.astype(np.float32))) 

keras_acc = accuracy_score(y_test, np.where(y_keras < 0.5, 0, 1).reshape(-1))
hls_acc = accuracy_score(y_test, np.where(y_hls < 0, 0, 1).reshape(-1))

print(f'Keras Acc (w/ sigmoid): {keras_acc*100:.5}%')
print(f'Keras Fidelity (w/ sigmoid): {(keras_acc*2-1) * 100:.5}%')
print(f'HLS Acc: {hls_acc*100:.5}:%')
print(f'HLS Fidelity: {(hls_acc*2-1) * 100:.5}:%')

notes["HLS Acc"] = hls_acc
notes["HLS Fidelity"] = hls_acc*2-1
with open(os.path.join(output_dir, 'notes.json'), 'w') as file:
    json.dump(notes, file)

### Create testbench files

In [None]:
top_nrows = 10
bottom_nrows = 10

y_pred_top = checkpoint_model.predict(X_test[:top_nrows])
y_pred_bottom = checkpoint_model.predict(X_test[-bottom_nrows:])
ykeras_pred = np.vstack((y_pred_top, y_pred_bottom))

y_hls_top = hls_model.predict(np.ascontiguousarray(X_test[:top_nrows].astype(np.float32))) 
y_hls_bottom = hls_model.predict(np.ascontiguousarray(X_test[-bottom_nrows:].astype(np.float32))) 
yhls_pred = np.vstack((y_hls_top, y_hls_bottom))

In [None]:
save_array_to_dat(data=X_test, top_nrows=top_nrows, bottom_nrows=bottom_nrows, filename=os.path.join(output_dir, 'tb_data/tb_input_features.dat'))
save_array_to_dat(data=ykeras_pred, top_nrows=top_nrows, bottom_nrows=bottom_nrows, filename=os.path.join(output_dir, 'tb_data/ykeras_pred.dat'))
save_array_to_dat(data=yhls_pred, top_nrows=top_nrows, bottom_nrows=bottom_nrows, filename=os.path.join(output_dir, 'tb_data/yhls_pred.dat'))

### Collect traces and compare

In [None]:
_, hls_trace = hls_model.trace(np.ascontiguousarray(X_test.astype(np.float32))) 
keras_trace = hls4ml.model.profiling.get_ymodel_keras(checkpoint_model, X_test) 

print(f'HLS Keys: {hls_trace.keys()}')
print(f'Keras Keys: {keras_trace.keys()}')

In [None]:
idx = 0

hls_layers = hls_trace.keys()
keras_layers = list(keras_trace.keys())

for layer in hls_trace.keys():
    keras_layer = layer 
    hls_layer = layer 
    keras_layer, hls_layer = keras_trace[keras_layer], hls_trace[hls_layer]
    try:
        diff = np.average(np.abs(keras_layer - hls_layer ))
        print(f'Layer(s): {list(hls_trace.keys())[idx]}', '\t\t', diff)
        
        plt.figure(figsize=(7, 5))

        plt.scatter(hls_layer.flatten(), keras_layer.flatten())
        min_x = min(keras_layer.min(), hls_layer.min())
        max_x = min(keras_layer.max(), hls_layer.max())

        onnx_min, onnx_max = keras_layer.flatten().min(), keras_layer.flatten().max()
        hls_min, hls_max = hls_layer.flatten().min(), hls_layer.flatten().max()
        
        print(f'hls/keras min: {hls_min}/{onnx_min}')
        print(f'hls/keras max: {hls_max}/{onnx_max}')
        
        plt.plot([min_x, max_x], [min_x, max_x], c='red')
        plt.axhline(min_x, c='red')
        plt.axhline(max_x, c='red')

        plt.title(f'(hls) {list(hls_trace.keys())[idx]} -- (keras) {list(keras_trace.keys())[idx]}')
        plt.xlabel(f'hls4ml - [{hls_min:.3f},  {hls_max:.3f}]')
        plt.ylabel(f'keras - [{onnx_min:.3f},  {onnx_max:.3f}]')
        plt.yscale('linear')
        idx += 1
    except Exception as e:
        print(e)


## Synthesize 

In [None]:
hls_model.build(
    csim=False,
    synth=True,
    cosim=False,
    export=False,
    vsynth=True,
)

## Check the report

In [None]:
hls4ml.report.read_vivado_report(output_dir)