# Part 4: Quantization

In [None]:
import tensorflow.keras as keras
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
seed = 0
np.random.seed(seed)
import tensorflow as tf

tf.random.set_seed(seed)
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']

## Fetch the jet tagging dataset from Open ML

In [None]:
size = 64

In [None]:
#X_train_val = np.load('X_train_val.npy')
#X_test = np.load('X_test.npy')
#y_train_val = np.load('y_train_val.npy')
#y_test = np.load('y_test.npy')
#classes = np.load('classes.npy', allow_pickle=True)

n_samples = 1000
X_train_val = np.random.rand(n_samples, size, size, 3).astype("float64")
y_train_val = X_train_val[:,:,:,0].copy().astype("float32")  # auto-encoder target is the input itself
X_test = np.random.rand(n_samples, size//2, size//2, 1).astype("float64")
y_test = X_test[:,:,:,0].copy().astype("float32")  # auto-encoder target is the input itself

print(X_train_val.shape, y_train_val.shape)
print(X_train_val.dtype, y_train_val.dtype)
print(X_train_val[0:5])
print(y_train_val[0:5])

## Construct a model
This time we're going to use QKeras layers.
QKeras is "Quantized Keras" for deep heterogeneous quantization of ML models.

https://github.com/google/qkeras

It is maintained by Google and we recently added support for QKeras model to hls4ml.

In [None]:
import tensorflow.keras.models as models
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1
from callbacks import all_callbacks
import tensorflow.keras.layers as layers
from tensorflow.keras.layers import Dense, Activation, Conv2D, MaxPooling2D, UpSampling2D
from qkeras.qlayers import QDense, QActivation
from qkeras import QConv2D, QConv2DBatchnorm
from qkeras.quantizers import quantized_bits, quantized_relu

We're using `QDense` layer instead of `Dense`, and `QActivation` instead of `Activation`. We're also specifying `kernel_quantizer = quantized_bits(6,0,0)`. This will use 6-bits (of which 0 are integer) for the weights. We also use the same quantization for the biases, and `quantized_relu(6)` for 6-bit ReLU activations.

In [None]:
def build_depth_q_autoencoder(input_shape=(32, 32, 3)):
    
    model = Sequential()
    model.add(layers.Input(shape=input_shape))
    

    model.add(layers.Conv2D(32, kernel_size=(3, 3), padding='same'))
    model.add(Activation(activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(32, kernel_size=(3, 3), padding='same'))
    model.add(Activation(activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Conv2D(64, kernel_size=(3, 3), padding='same'))
    model.add(Activation(activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(64, kernel_size=(3, 3), padding='same'))
    model.add(Activation(activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Conv2D(128, kernel_size=(3, 3), padding='same'))
    model.add(Activation(activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(128, kernel_size=(3, 3), padding='same'))
    model.add(Activation(activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(1, kernel_size=(3, 3)))
    model.add(Activation(activation='sigmoid'))

    #model = models.Model(inputs=inputs, outputs=outputs)
    return model

#model = build_depth_q_autoencoder()
#model.summary()

from keras.saving import load_model
from qkeras.utils import _add_supported_quantized_objects

co = {}
_add_supported_quantized_objects(co)
model = load_model("depth_model64_f.keras", custom_objects=co, safe_mode=False)

model.summary()

## Train the model
We'll use the same settings as the model for part 1: Adam optimizer with categorical crossentropy loss.
The callbacks will decay the learning rate and save the model into a directory 'model_2'
The model isn't very complex, so this should just take a few minutes even on the CPU.
If you've restarted the notebook kernel after training once, set `train = False` to load the trained model rather than training again.

In [None]:
train = False
if train:
    #adam = Adam(lr=0.0001)
    #model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])
    #model.compile(optimizer=adam, loss=['binary_crossentropy'], metrics=['accuracy'])
    model.compile(
        optimizer=keras.optimizers.Adam(3e-4),
        loss="mse",
        metrics=["mae"],
    )
    callbacks = all_callbacks(
        stop_patience=1000,
        lr_factor=0.5,
        lr_patience=10,
        lr_epsilon=0.000001,
        lr_cooldown=2,
        lr_minimum=0.0000001,
        outputDir='estimate_sign_example',
    )
    #callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())
    model.fit(
        X_train_val,
        y_train_val,
        # batch_size=8,
        epochs=1,
        validation_split=0.25,
        # shuffle=True,
        # callbacks=callbacks.callbacks,
    )
    # Save the model again but with the pruning 'stripped' to use the regular layer types
    # model = strip_pruning(model)
    model.save('estimate_depth_example/KERAS_check_best_model.h5')
"""else:
    from tensorflow.keras.models import load_model
    from qkeras.utils import _add_supported_quantized_objects

    co = {}
    _add_supported_quantized_objects(co)
    model = load_model('estimate_depth_example/KERAS_check_best_model.h5', custom_objects=co)""";

In [None]:
y_sw = model.predict(X_test)

print(X_test[0:10])
print(y_test[0:10])
print(y_sw[0:10])

## NB
Note as well that the Vitis HLS resource estimates tend to _overestimate_ LUTs, while generally estimating the DSPs correctly. Running the subsequent stages of FPGA compilation reveals the more realistic resource usage, You can run the next step, 'logic synthesis' with `hls_model.build(synth=True, vsynth=True)`, but we skipped it in this tutorial in the interest of time.

In [None]:
import hls4ml
import plotting

config = hls4ml.utils.config_from_keras_model(model, granularity='model')
config['Model']['Strategy'] = 'Resource'
config['Model']['Precision']['default'] = 'ap_fixed<4,2>'

print("-----------------------------------")
plotting.print_dict(config)
print("-----------------------------------")
hls_model = hls4ml.converters.convert_from_keras_model(
    model, hls_config=config,
    output_dir='estimate_depth_example/hls4ml_prj_pynq',
    backend='VivadoAccelerator',
    board='pynq-z2',
    io_type='io_stream'
)
hls_model.compile()

In [None]:
plotting.print_dict(hls4ml.backends.get_backend('VivadoAccelerator').create_initial_config())

In [None]:
hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file=None)

In [None]:
hls_model.build(csim=False, export=True, bitfile=True)

In [None]:
!sed -n '30,45p' estimate_depth_example/hls4ml_prj_pynq/myproject_vivado_accelerator/project_1.runs/impl_1/design_1_wrapper_utilization_placed.rpt

In [None]:
!mkdir -p estimate_depth_example/hls4ml_prj_pynq/package
!cp estimate_depth_example/hls4ml_prj_pynq/myproject_vivado_accelerator/project_1.runs/impl_1/design_1_wrapper.bit estimate_depth_example/hls4ml_prj_pynq/package/hls4ml_nn.bit
!cp estimate_depth_example/hls4ml_prj_pynq/myproject_vivado_accelerator/project_1.srcs/sources_1/bd/design_1/hw_handoff/design_1.hwh estimate_depth_example/hls4ml_prj_pynq/package/hls4ml_nn.hwh
!cp estimate_depth_example/hls4ml_prj_pynq/axi_stream_driver.py estimate_depth_example/hls4ml_prj_pynq/package/
#np.save('estimate_depth_example/hls4ml_prj_pynq/package/X_test.npy', X_test)
#np.save('estimate_depth_example/hls4ml_prj_pynq/package/y_test.npy', y_test)
#!cp X_test.npy y_test.npy estimate_sign_example/hls4ml_prj_pynq/package
!cp part7b_deployment.ipynb estimate_depth_example/hls4ml_prj_pynq/package

!tar -czvf estimate_depth_example/hls4ml_prj_pynq/package.tar.gz -C estimate_depth_example/hls4ml_prj_pynq/package/ .

In [None]:
import hls4ml

model = mymodel()

config = hls4ml.utils.config_from_keras_model(model, granularity='model')
config['Model']['Strategy'] = 'Resource'

hls_model = hls4ml.converters.convert_from_keras_model(
    model, 
    hls_config=config,
    output_dir='estimate_depth_example/hls4ml_prj_pynq',
    backend='VivadoAccelerator',
    board='pynq-z2',
    io_type='io_stream'
)
hls_model.compile()

hls_model.build(csim=False, export=True, bitfile=True)