In [1]:
import keras
import numpy as np
from HGQ.layers import HDense, HConv2D, PMaxPooling2D, PFlatten, PReshape, HQuantize
from HGQ import ResetMinMax, FreeBOPs
from HGQ import trace_minmax, to_proxy_model

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [3]:
model = keras.models.Sequential([
    HQuantize(beta=3e-5),
    PReshape((28, 28, 1)),
    PMaxPooling2D((2, 2)),
    HConv2D(1, (3, 3), activation='relu', beta=3e-5, parallel_factor=144),
    PMaxPooling2D((2, 2)),
    HConv2D(1, (3, 3), activation='relu', beta=3e-5, parallel_factor=16),
    PMaxPooling2D((2, 2)),
    PFlatten(),
    HDense(10, beta=3e-5)
])

In [4]:
opt = keras.optimizers.Adam(learning_rate=0.001)
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])
callbacks = [ResetMinMax(), FreeBOPs()]

model.fit(x_train[:100], y_train[:100], epochs=1, batch_size=32, callbacks=callbacks)
model.summary()

2024-06-27 15:06:03.457498: I external/local_xla/xla/service/service.cc:168] XLA service 0x177343d00 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2024-06-27 15:06:03.457520: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
I0000 00:00:1719493563.465704       1 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2024-06-27 15:06:03.465881: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2024-06-27 15:06:05.925208: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-06-27 15:06:06.118721: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.




2024-06-27 15:06:06.284652: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2024-06-27 15:06:06.284716: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2024-06-27 15:06:06.285390: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 h_quantize (HQuantize)      (None, 28, 28)            786       
                                                                 
 p_reshape (PReshape)        (None, 28, 28, 1)         0         
                                                                 
 p_max_pooling2d (PMaxPooli  (None, 14, 14, 1)         0         
 ng2D)                                                           
                                                                 
 h_conv2d (HConv2D)          (None, 12, 12, 1)         165       
                                                                 
 p_max_pooling2d_1 (PMaxPoo  (None, 6, 6, 1)           0         
 ling2D)                                                         
                                                                 
 h_conv2d_1 (HConv2D)        (None, 4, 4, 1)           3

In [5]:
trace_minmax(model, x_train, cover_factor=1.0)
proxy = to_proxy_model(model, aggressive=True)
proxy.summary()

h_quantize: 0.0
h_conv2d: 5920.0
h_conv2d_1: 894.0
h_dense: 219.0
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28)]          0         
                                                                 
 h_quantize (FixedPointQuan  (None, 28, 28)            2352      
 tizer)                                                          
                                                                 
 p_reshape (Reshape)         (None, 28, 28, 1)         0         
                                                                 
 p_max_pooling2d (MaxPoolin  (None, 14, 14, 1)         0         
 g2D)                                                            
                                                                 
 h_conv2d (Conv2D)           (None, 12, 12, 1)         10        
                                                           

In [6]:
from qonnx.converters.keras import from_keras
import onnx
onnx_model, external_storage = from_keras(proxy, "test_qkeras_conversion", opset=9)
onnx.save(onnx_model, '/tmp/hgq.onnx')

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28)]          0         
                                                                 
 h_quantize (FixedPointQuan  (None, 28, 28)            2352      
 tizer)                                                          
                                                                 
 p_reshape (Reshape)         (None, 28, 28, 1)         0         
                                                                 
 p_max_pooling2d (MaxPoolin  (None, 14, 14, 1)         0         
 g2D)                                                            
                                                                 
 h_conv2d (Conv2D)           (None, 12, 12, 1)         10        
                                                                 
 h_conv2d_quantizer (FixedP  (None, 12, 12, 1)         432 

2024-06-27 15:06:14.062161: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-06-27 15:06:14.171857: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


In [7]:
import netron
import os
from IPython.display import IFrame

def showInNetron(model_filename: str, localhost_url: str = None, port: int = None):
    """Shows a ONNX model file in the Jupyter Notebook using Netron.

    :param model_filename: The path to the ONNX model file.
    :type model_filename: str

    :param localhost_url: The IP address used by the Jupyter IFrame to show the model.
     Defaults to localhost.
    :type localhost_url: str, optional

    :param port: The port number used by Netron and the Jupyter IFrame to show
     the ONNX model.  Defaults to 8088.
    :type port: int, optional

    :return: The IFrame displaying the ONNX model.
    :rtype: IPython.lib.display.IFrame
    """
    try:
        port = port or int(os.getenv("NETRON_PORT", default="8088"))
    except ValueError:
        port = 8088
    localhost_url = localhost_url or os.getenv("LOCALHOST_URL", default="localhost")
    netron.start(model_filename, address=("0.0.0.0", port), browse=False)
    return IFrame(src=f"http://{localhost_url}:{port}/", width="100%", height=400)

In [8]:
showInNetron('/tmp/hgq.onnx')

Serving '/tmp/hgq.onnx' at http://0.0.0.0:8088


In [11]:
from qonnx.util.exec_qonnx import exec_qonnx
np.save("/tmp/x_test.npy", x_test[:100])
qonnx_out = exec_qonnx('/tmp/hgq.onnx', "/tmp/x_test.npy")
hgq_out = proxy.predict(x_test[:100])
np.isclose(qonnx_out, hgq_out).all()

Batch [100/100]: running: 100%|██████████| 100/100 [00:01<00:00, 73.61it/s]




True