In [None]:
!apt-get  -qq  install  -y  graphviz  &&  pip  install  pydot
!pip  install  -U  matplotlib
!pip  install  git+https://github.com/fastmachinelearning/hls4ml.git@main#egg=hls4ml[profiling]
!pip  install  qkeras==0.9.0

# Advanced Configuration

## Load the dataset and model (if you are restarting from this point)

In [None]:
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
import plotting

# import os
# os.environ['PATH'] = '/opt/Xilinx/Vivado/2019.2/bin:' + os.environ['PATH']
# for this tutorial we wont be actually running Vivado, so I have commented these lines out
#     but if you want to look into actually running on an FPGA then simply uncomment these lines

X_train_val = np.load("X_train_val.npy")
X_test = np.ascontiguousarray(np.load("X_test.npy"))
y_train_val = np.load("y_train_val.npy")
y_test = np.load("y_test.npy", allow_pickle=True)
classes = np.load("classes.npy", allow_pickle=True)

from tensorflow.keras.models import load_model

model = load_model("model_1/KERAS_check_best_model.h5")
y_keras = model.predict(X_test)

## Make a new hls4ml config & model
This time, we'll create a config with finer granularity. When we print the config dictionary, you'll notice that an entry is created for each named Layer of the model. See for the first layer, for example:
```LayerName:
    fc1:
        Precision:
            weight: ap_fixed<10,4>
            bias:   ap_fixed<10,4>
            result: ap_fixed<10,4>
        ReuseFactor: 1
```
We will also modify the default_precision to be smaller than we know is good just to demonstrate the effect.

In [None]:
import hls4ml

config = hls4ml.utils.config_from_keras_model(
    model, granularity="name", default_precision="ap_fixed<10,4>"
)
print("-----------------------------------")
plotting.print_dict(config)
print("-----------------------------------")

## Profiling
As you can see, hls4ml will allow is to choose the precision of _everything_ in our Neural Network. This is a powerful way to tune the performance, but it's also complicated. Luckily there are tools in `hls4ml.model.profiling` that will help choose the right precision for a given model.

The first thing we will do is to numerically profile the model. This method plots the distribution of the weights (and biases) as a box and whisker plot. The grey boxes show the values which can be represented with the data types used in the `hls_model`. Generally, you need the box to overlap completely with the whisker 'to the right' (large values) otherwise you'll get saturation & wrap-around issues from exceeding the top of the fixed-point range. It can be okay for the box not to overlap completely 'to the left' (small values), but finding how small you can go is a matter of trial-and-error.

Providing data, in this case just using the first 1000 examples for speed, will show the same distributions captured at the output of each layer.

In [None]:
%matplotlib inline
for layer in config["LayerName"].keys():
    config["LayerName"][layer]["Trace"] = True
hls_model = hls4ml.converters.convert_from_keras_model(
    model,
    hls_config=config,
    output_dir="model_1/hls4ml_prj_2",
    part="xcu250-figd2104-2L-e",
)
hls4ml.model.profiling.numerical(model=model, hls_model=hls_model, X=X_test[:1000])

We can see that in this case the default precision of `ap_fixed<16,6>` will fully cover the upper range of the outputs from each layer. This is fully consistent with what we saw earlier from the ROC curve where the fixed-point model was capable of reproducing the floating point result. However, we know that reducing the integer or fractional precision slightly will begin to result in degraded performance.

In [None]:
hls_model.compile()
y_hls = hls_model.predict(X_test)

print(
    "Keras  Accuracy: {}".format(
        accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))
    )
)
print(
    "hls4ml Accuracy: {}".format(
        accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))
    )
)

fig, ax = plt.subplots(figsize=(9, 9))
_ = plotting.makeRoc(y_test, y_keras, classes)
plt.gca().set_prop_cycle(None)  # reset the colors
_ = plotting.makeRoc(y_test, y_hls, classes, linestyle="--")

from matplotlib.lines import Line2D

lines = [Line2D([0], [0], ls="-"), Line2D([0], [0], ls="--")]
from matplotlib.legend import Legend

leg = Legend(ax, lines, labels=["keras", "hls4ml"], loc="lower right", frameon=False)
ax.add_artist(leg)

Not good at all! Let's see if we can figure out how to create a model that will work at these lower precisions.

The first thing we can try is adding some regularizers. This will penalize the model for using large weights, which can help to reduce the number of bits that are necessary.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1
from callbacks import all_callbacks

In [None]:
model = Sequential()
model.add(
    Dense(
        64,
        input_shape=(16,),
        name="fc1",
        kernel_initializer="lecun_uniform",
        kernel_regularizer=l1(0.0001),
    )
)
model.add(Activation(activation="relu", name="relu1"))
model.add(
    Dense(
        32,
        name="fc2",
        kernel_initializer="lecun_uniform",
        kernel_regularizer=l1(0.0001),
    )
)
model.add(Activation(activation="relu", name="relu2"))
model.add(
    Dense(
        32,
        name="fc3",
        kernel_initializer="lecun_uniform",
        kernel_regularizer=l1(0.0001),
    )
)
model.add(Activation(activation="relu", name="relu3"))
model.add(
    Dense(
        5,
        name="output",
        kernel_initializer="lecun_uniform",
        kernel_regularizer=l1(0.0001),
    )
)
model.add(Activation(activation="softmax", name="softmax"))

In [None]:
train = True
if train:
    adam = Adam(lr=0.0001)
    model.compile(
        optimizer=adam, loss=["categorical_crossentropy"], metrics=["accuracy"]
    )
    callbacks = all_callbacks(
        stop_patience=1000,
        lr_factor=0.5,
        lr_patience=10,
        lr_epsilon=0.000001,
        lr_cooldown=2,
        lr_minimum=0.0000001,
        outputDir="model_2",
    )
    model.fit(
        X_train_val,
        y_train_val,
        batch_size=1024,
        epochs=30,
        validation_split=0.25,
        shuffle=True,
        callbacks=callbacks.callbacks,
    )
else:
    from tensorflow.keras.models import load_model

    model = load_model("model_2/KERAS_check_best_model.h5")

Again we will se the default precision to be `ap_fixed<10,4>`.

In [None]:
for layer in config["LayerName"].keys():
    config["LayerName"][layer]["Trace"] = True
hls_model = hls4ml.converters.convert_from_keras_model(
    model,
    hls_config=config,
    output_dir="model_2/hls4ml_prj_1",
    part="xcu250-figd2104-2L-e",
)

hls4ml.model.profiling.numerical(model=model, hls_model=hls_model, X=X_test[:1000])

You can see the difference in the weight profile plots between this model and the previous one quite clearly. Whereas before the smallest weight in the first layer was approximately $10^{-14}$, now its almost $10^{-24}$! However, it hasn't markedly improved the upper bound of the layers post-activation, so we will need to try something else.

## Trace
Another thing we can try is to use different precisions in different layers. In this case, it seems that the third layer is the one with the largest output, so perhaps we could increase only that precision and leave the others as is?

In [None]:
config["LayerName"]["fc1"]["Precision"]["weight"] = "ap_fixed<12,6>"
hls_model = hls4ml.converters.convert_from_keras_model(
    model,
    hls_config=config,
    output_dir="model_2/hls4ml_prj_2",
    part="xcu250-figd2104-2L-e",
)

Now lets check how this model performs. We are also going to enable a functionality that will extract the intermediate network values from each layer, for botht the hls4ml model and the Keras model.

In [None]:
hls_model.compile()
hls4ml_pred, hls4ml_trace = hls_model.trace(X_test[:1000])
keras_trace = hls4ml.model.profiling.get_ymodel_keras(model, X_test[:1000])
y_hls = hls_model.predict(X_test)

## Inspect
Now we can print out, make plots, or do any other more detailed analysis on the output of each layer to understand the performance we see. Let's print the output of that third layer, for the first sample, for both the Keras and hls4ml models, and also make a plot of the mean difference per sample

In [None]:
print("Keras layer 'fc3', first sample:")
print(keras_trace["fc3"][0])
print("hls4ml layer 'fc3', first sample:")
print(hls4ml_trace["fc3"][0])
print("layer fc3 diff, first sample:")
print(hls4ml_trace["fc3"][0] - keras_trace["fc3"][0])

In [None]:
plt.hist(
    np.mean(hls4ml_trace["fc3"] - keras_trace["fc3"], axis=-1),
    bins=np.linspace(-1.0, 1.0, 51),
    density=True,
)
plt.xlabel("mean difference (hls4ml - keras)")

plt.show()

## Compare 
It's not looking great. Let's check the accuracy and ROC curve.

In [None]:
print(
    "Keras  Accuracy: {}".format(
        accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))
    )
)
print(
    "hls4ml Accuracy: {}".format(
        accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))
    )
)

fig, ax = plt.subplots(figsize=(9, 9))
_ = plotting.makeRoc(y_test, y_keras, classes)
plt.gca().set_prop_cycle(None)  # reset the colors
_ = plotting.makeRoc(y_test, y_hls, classes, linestyle="--")

from matplotlib.lines import Line2D

lines = [Line2D([0], [0], ls="-"), Line2D([0], [0], ls="--")]
from matplotlib.legend import Legend

leg = Legend(ax, lines, labels=["keras", "hls4ml"], loc="lower right", frameon=False)
ax.add_artist(leg)

### Improving
Better, but still not great, especially depending on which class we look at. In principle we could try this for other layers, but eventually we may find we are just back to a larger model. Let's look at some other methods for reducing the size of the network.