# Vivado Flow

We're going to train a fully connected neural network with QKeras on the jet tagging dataset and run it baremetal on Zynq-class boards (ZCU106, Ultra96, Pynq-Z1, MiniZed).

## Setup

Let's import the libraries, call the magic functions, and setup the environment variables.

In [None]:
import tensorflow as tf

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

from qkeras.qlayers import QDense, QActivation
from qkeras.quantizers import quantized_bits, quantized_relu
from qkeras.utils import _add_supported_quantized_objects

import numpy as np

import hls4ml

from callbacks import all_callbacks

%matplotlib inline

import os
os.environ['PATH'] = '/extra/tools/Xilinx/Vivado/2019.1/bin:' + os.environ['PATH']

def is_tool(name):
    from distutils.spawn import find_executable
    return find_executable(name) is not None

print('-----------------------------------')
if not is_tool('vivado_hls'):
    print('Xilinx Vivado HLS is NOT in the PATH')
else:
    print('Xilinx Vivado HLS is in the PATH')
print('-----------------------------------')

## Load the dataset

This is a lot like the previous notebooks, so we will go through quickly.

First, we fetch the dataset from OpenML, do the normalization and make a train and test split.

We save the test dataset to files so that we can use them later.

In [None]:
data = fetch_openml('hls4ml_lhc_jets_hlf')
X, y = data['data'], data['target']

le = LabelEncoder()
y = le.fit_transform(y)
y = to_categorical(y, 5)
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_val = scaler.fit_transform(X_train_val)
X_test = scaler.transform(X_test)
classes = le.classes_

os.makedirs('npy', exist_ok=True)
np.save('npy/y_test.npy', y_test)
np.save('npy/X_test.npy', X_test)
np.save('npy/classes.npy', le.classes_, allow_pickle=True)

## Model training
Our favourite 3 hidden-layer model. 6 bit quantizers everywhere.

In [None]:
model = Sequential()
model.add(QDense(8, input_shape=(16,), name='fc1',
                 kernel_quantizer=quantized_bits(6,0,alpha=1), bias_quantizer=quantized_bits(6,0,alpha=1),
                 kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))
model.add(QActivation(activation=quantized_relu(6), name='relu1'))
model.add(QDense(8, name='fc2',
                 kernel_quantizer=quantized_bits(6,0,alpha=1), bias_quantizer=quantized_bits(6,0,alpha=1),
                 kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))
model.add(QActivation(activation=quantized_relu(6), name='relu2'))
model.add(QDense(8, name='fc3',
                 kernel_quantizer=quantized_bits(6,0,alpha=1), bias_quantizer=quantized_bits(6,0,alpha=1),
                 kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))
model.add(QActivation(activation=quantized_relu(6), name='relu3'))
model.add(QDense(5, name='output',
                 kernel_quantizer=quantized_bits(6,0,alpha=1), bias_quantizer=quantized_bits(6,0,alpha=1),
                 kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))
model.add(Activation(activation='softmax', name='softmax'))

## Prune
Because why not?

In [None]:
from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule
from tensorflow_model_optimization.sparsity.keras import strip_pruning

pruning_params = {"pruning_schedule" : pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100)}
model = prune.prune_low_magnitude(model, **pruning_params)

## Train

In [None]:
train = True
#not os.path.exists('model/KERAS_check_best_model.h5')
if train:
    adam = Adam(lr=0.0001)
    model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])
    callbacks = all_callbacks(stop_patience = 1000,
                              lr_factor = 0.5,
                              lr_patience = 10,
                              lr_epsilon = 0.000001,
                              lr_cooldown = 2,
                              lr_minimum = 0.0000001,
                              outputDir = 'model')
    callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())
    model.fit(X_train_val, y_train_val, batch_size=1024,
              epochs=30, validation_split=0.25, shuffle=True,
              callbacks = callbacks.callbacks)
    # Save the model again but with the pruning 'stripped' to use the regular layer types
    model = strip_pruning(model)
    model.save('model/KERAS_check_best_model.h5')
else:
    from tensorflow.keras.models import load_model
    from qkeras.utils import _add_supported_quantized_objects
    co = {}
    _add_supported_quantized_objects(co)
    model = load_model('model/KERAS_check_best_model.h5', custom_objects=co)

## Check accuracy

Do not expect a good accuracy because of the low amount of neurons. I could have done better than this, but as long as it fits both Pynq-Z1 and MiniZed, it is fine with us.

In [None]:
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
%matplotlib inline
import plotting

y_keras = model.predict(X_test)
np.save('npy/y_qkeras.npy', y_keras)

print("Accuracy: {}".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))))

plt.figure(figsize=(9,9))
_ = plotting.plotMultiClassRoc(y_test, y_keras, classes)

import plotting # Import local package plotting.py
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true=np.argmax(y_test, axis=1), y_pred=np.argmax(y_keras, axis=1))
plt.figure(figsize=(9,9))
_ = plotting.plot_confusion_matrix(cm, le.classes_)

## Make an hls4ml configuration
Notice we're using `Strategy: Resource` for every layer, and `ReuseFactor: 64`. The Programmable Logic (FPGA part) of the Pynq-Z1 SoC is not big compared to VU9P type of parts.

We also use some settings which are good for QKeras.

Notice the `fpga_part:'xc7z020clg400-1'`.

In [None]:
import plotting

hls4ml.model.optimizer.OutputRoundingSaturationMode.layers = ['Activation']
hls4ml.model.optimizer.OutputRoundingSaturationMode.rounding_mode = 'AP_RND'
hls4ml.model.optimizer.OutputRoundingSaturationMode.saturation_mode = 'AP_SAT'

hls_config = hls4ml.utils.config_from_keras_model(model, granularity='name')
hls_config['Model'] = {}
hls_config['Model']['ReuseFactor'] = 64
hls_config['Model']['Strategy'] = 'Resource'
hls_config['Model']['Precision'] = 'ap_fixed<16,6>'
hls_config['LayerName']['fc1']['ReuseFactor'] = 64
hls_config['LayerName']['fc2']['ReuseFactor'] = 64
hls_config['LayerName']['fc3']['ReuseFactor'] = 64
hls_config['LayerName']['output']['ReuseFactor'] = 64
hls_config['LayerName']['softmax']['exp_table_t'] = 'ap_fixed<18,8>'
hls_config['LayerName']['softmax']['inv_table_t'] = 'ap_fixed<18,4>'

print("-----------------------------------")
plotting.print_dict(hls_config)
print("-----------------------------------")

## Convert and Compile

In [None]:
## ZCU106
#output_dir='hls/zcu106_prj'
#fpga_part='xczu7ev-ffvc1156-2-e'
 
## Ultra96
#output_dir='hls/ultra96_prj'
#fpga_part='xczu3eg-sbva484-1-e'

## Pynq-Z1
output_dir='hls/pynqz1_prj'
fpga_part='xc7z020clg400-1'

## MiniZed
#output_dir='hls/minized_prj'
#fpga_part='xc7z007sclg225-1'


backend_config = hls4ml.converters.create_backend_config(fpga_part=fpga_part)
backend_config['KerasModel'] = model
backend_config['HLSConfig'] = hls_config
backend_config['OutputDir'] = output_dir
backend_config['Backend'] = 'Pynq'
backend_config['Interface'] = 'm_axi' # 's_axilite' or 'm_axi'

#print("-----------------------------------")
#plotting.print_dict(backend_config)
#print("-----------------------------------")

hls_model = hls4ml.converters.keras_to_hls(backend_config)

_ = hls_model.compile()

## Manually Edit the Project

Similarly to the Pynq flow we can generate a project to support the Vivado flow. For the moment we manually edit the hls4ml project (from the `main` branch).

0. Move in the `hls/BOARD_prj` where `BOARD` can be `ultra96`, `pynqz1`, `minized` etc.
   ```
   cd hls/BOARD_prj
   ```

1. Manually edit the file `build_prj.tcl`
   - Set target clock period 10ns

**===== Do not go beyond here if you did not have manually edited the hls4ml project! =====**

## Prediction and Comparison

(At this point, the C++ code that you have previously edited gets compiled and executed.)


In [None]:
y_hls = hls_model.predict(np.ascontiguousarray(X_test))

print('-----------------------------------')
print("Keras  Accuracy: {}".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))))
print("hls4ml Accuracy: {}".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))
print('-----------------------------------')

In [None]:
# Enable logarithmic scale on TPR and FPR axes 
logscale_tpr = False # Y axis
logscale_fpr = False # X axis

fig, ax = plt.subplots(figsize=(9, 9))
_ = plotting.plotMultiClassRoc(y_test, y_keras, classes, logscale_tpr=logscale_tpr, logscale_fpr=logscale_fpr)
plt.gca().set_prop_cycle(None) # reset the colors
_ = plotting.plotMultiClassRoc(y_test, y_hls, classes, logscale_tpr=logscale_tpr, logscale_fpr=logscale_fpr, linestyle='--')

from matplotlib.lines import Line2D
lines = [Line2D([0], [0], ls='-'),
         Line2D([0], [0], ls='--')]
from matplotlib.legend import Legend
leg = Legend(ax, lines, labels=['keras', 'hls4ml'],
            loc='lower right', frameon=False)
_ = ax.add_artist(leg)

## Synthesis

In [None]:
hls_model.build(csim=False,synth=True,export=True)

hls4ml.report.read_vivado_report(output_dir)

## Results

See the resource usage for different boards.


```
+-----------------+---------+-------+--------+-------+-----+
|                        PYNQ-Z1                           |
+-----------------+---------+-------+--------+-------+-----+
|Total            |        8|      7|    6089|  13557|    0|
+-----------------+---------+-------+--------+-------+-----+
|Available        |      280|    220|  106400|  53200|    0|
+-----------------+---------+-------+--------+-------+-----+
|Utilization (%)  |        2|      3|       5|     25|    0|
+-----------------+---------+-------+--------+-------+-----+
    +-----+-----+-----+-----+---------+
    |  Latency  |  Interval | Pipeline|
    | min | max | min | max |   Type  |
    +-----+-----+-----+-----+---------+
    |  305|  309|  305|  309|   none  |
    +-----+-----+-----+-----+---------+


+-----------------+---------+-------+--------+-------+----+
|                        MiniZed                          |
+-----------------+---------+-------+-------+-------+-----+
|Total            |        8|      7|   6089|  13557|    0|
+-----------------+---------+-------+-------+-------+-----+
|Available        |      100|     66|  28800|  14400|    0|
+-----------------+---------+-------+-------+-------+-----+
|Utilization (%)  |        8|     10|     21|     94|    0|
+-----------------+---------+-------+-------+-------+-----+
    +-----+-----+-----+-----+---------+
    |  Latency  |  Interval | Pipeline|
    | min | max | min | max |   Type  |
    +-----+-----+-----+-----+---------+
    |  305|  309|  305|  309|   none  |
    +-----+-----+-----+-----+---------+
    
``` 
    
 


## Generate .dat Files

The .dat files are used
- during the following `csim` step
- to generate the header files for SDK

In [None]:
f = open(output_dir + '/tb_data/tb_input_features.dat', 'w')
for i in range(X_test.shape[0]):
    for j in range(X_test.shape[1]):
        f.write('{} '.format(X_test[i][j]))
    f.write('\n')
f.close()

f = open(output_dir + '/tb_data/tb_output_predictions.dat', 'w')
for i in range(y_test.shape[0]):
    for j in range(y_test.shape[1]):
        f.write('{} '.format(y_test[i][j]))
    f.write('\n')
f.close()

## Run Vivado HLS csim

Move in the `hls/BOARD_prj` where `BOARD` can be `ultra96`, `pynqz1`, `minized` etc. and run Vivado HLS.
```
cd hls/BOARD_prj
vivado_hls -p myproject_prj
```

Run C-sim.

## Integrate IP in a Vivado Project

Move to the directory `sys` for the next steps.