FPGA ML inference with oneAPI backend

In [None]:
!which icpx

/opt/intel/oneapi/compiler/2024.2/bin/icpx


In [None]:
import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import tensorflow as tf; tf.get_logger().setLevel('INFO')

import hls4ml
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Conv1D, Conv2D, Flatten, MaxPool1D, MaxPool2D, Activation, BatchNormalization, Dropout
from tensorflow.keras.losses import MSE
from tensorflow.keras.optimizers import Adam
import numpy as np

Example of Conv1D layers followed by GRU (dma hostpipe)



In [30]:
model = Sequential()
model.add(Conv1D(16, kernel_size=3, padding='same', input_shape=(32,3)))
model.add(Activation(activation='relu', name='relu1'))
model.add(GRU(16))
model.compile(loss='mse', optimizer=Adam())

In [None]:
hls_model = hls4ml.converters.convert_from_keras_model(
    model=model,
    output_dir="hostpipe",
    backend="oneAPI",
    part="Agilex7")

Interpreting Sequential
Topology:
Layer name: conv1d_4_input, layer type: InputLayer, input shapes: [[None, 32, 3]], output shape: [None, 32, 3]
Layer name: conv1d_4, layer type: Conv1D, input shapes: [[None, 32, 3]], output shape: [None, 32, 16]
Layer name: relu1, layer type: Activation, input shapes: [[None, 32, 16]], output shape: [None, 32, 16]
Layer name: gru_4, layer type: GRU, input shapes: [[None, 32, 16]], output shape: [None, 16]
Creating HLS model


In [33]:
# Write the project to disk and invoke oneAPI backend
hls_model.compile()

Writing HLS project
Done


In [34]:
X_test = np.ones((3, 32, 3))
hls_model.predict(X_test)

array([[ 0.34667969, -0.32226562,  0.49316406, -0.2265625 , -0.48046875,
        -0.55273438, -0.04882812, -0.16699219,  0.21972656, -0.31347656,
         0.25390625, -0.15820312,  0.17578125,  0.2890625 ,  0.30957031,
         0.06054688],
       [ 0.34667969, -0.32226562,  0.49316406, -0.2265625 , -0.48046875,
        -0.55273438, -0.04882812, -0.16699219,  0.21972656, -0.31347656,
         0.25390625, -0.15820312,  0.17578125,  0.2890625 ,  0.30957031,
         0.06054688],
       [ 0.34667969, -0.32226562,  0.49316406, -0.2265625 , -0.48046875,
        -0.55273438, -0.04882812, -0.16699219,  0.21972656, -0.31347656,
         0.25390625, -0.15820312,  0.17578125,  0.2890625 ,  0.30957031,
         0.06054688]])

Get dense streaming

In [None]:
def get_dense():
    model = Sequential()
    model.add(Dense(4, input_shape=(8,), name='fc1'))
    model.add(Dense(2, name='fc2'))
    model.compile()
    model.summary()
    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')
    hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir="dense_streaming", backend="oneAPI", part="Agilex7", io_type="io_stream", hls_config=config)
    return model, config, hls_model
mlp_cpu, config, mlp_hls = get_dense()
mlp_hls.compile()
mlp_hls.predict(np.ones(8))

MLP streaming

In [None]:
def get_mlp():
    model = Sequential()
    model.add(Dense(4, input_shape=(8,), name='fc1'))
    model.add(Activation(activation='relu', name='relu1'))
    model.add(Dense(2, name='fc2'))
    model.add(Activation(activation='relu', name='relu2'))
    model.compile()
    model.summary()
    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')
    hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir="mlp_streaming", backend="oneAPI", part="Agilex7", io_type="io_stream", hls_config=config)
    return model, config, hls_model
mlp_cpu, config, mlp_hls = get_mlp()

In [None]:
def get_larger_mlp():
    model = Sequential()
    model.add(Dense(64, input_shape=(16,), name='fc1', kernel_initializer='lecun_uniform'))
    model.add(Activation(activation='relu', name='relu1'))
    model.add(Dense(32, name='fc2', kernel_initializer='lecun_uniform'))
    model.add(Activation(activation='relu', name='relu2'))
    model.add(Dense(32, name='fc3', kernel_initializer='lecun_uniform'))
    model.add(Activation(activation='relu', name='relu3'))
    model.add(Dense(5, name='output', kernel_initializer='lecun_uniform'))
    model.add(Activation(activation='softmax', name='softmax'))
    model.compile(loss='mse', optimizer=Adam())
    model.summary()

    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')
    hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir="model_mlp_out", backend="oneAPI", part="Agilex7", hls_config=config)

    return model, config, hls_model

mlp_cpu, config, mlp_hls = get_larger_mlp()
mlp_hls.compile()
mlp_hls.predict(np.ones(8))

CNN - MNIST

In [None]:
def get_cnn():
    model = Sequential()
    model.add(Conv2D(5, (4, 4), input_shape=(5, 5, 3)))
    model.compile()
    
    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')
    hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir="model_cnn_out", io_type="io_stream", backend="oneAPI", part="Agilex7", hls_config=config)

    return model, config, hls_model

cnn_cpu, config, cnn_hls = get_cnn()

In [None]:
def get_cnn():
    model = Sequential()
    model.add(Conv2D(16, (3, 3), activation='relu', padding='same', input_shape=(28, 28, 1)))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()

    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')
    hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir="model_cnn_out", backend="oneAPI", part="Agilex7", hls_config=config)

    return model, config, hls_model

cnn_cpu, config, cnn_hls = get_cnn()

In [None]:
cnn_hls.compile()

In [None]:
cnn_cpu.predict(np.ones((1,5,5,3)))

## Library and Layer Support

### ML framework support:

(Q)Keras

PyTorch

(Q)ONNX (in development)

### Neural network architectures:

Fully connected NN (multilayer perceptron, MLP)

Convolutional NN

Recurrent NN (LSTM, GRU)

Graph NN (GarNet)

### Layers:

- Core Layers

InputLayer, Dropout, Flatten, Dense, TernaryDense, BinaryDense, Transpose, Resize

- Convolution

Conv1D, Conv2D

- Pooling

MaxPooling1D, MaxPooling2D, AveragePooling1D, AveragePooling2D

- Normalization

BatchNormalization

- Activation

LeakyReLU, ThresholdedReLU, Sigmoid, ELU, PReLU, TanH, Binary TanH, Softmax, Softsign, SELU Activation