# Quantize and compiling Models

Related fields:
- tensor_name_to_range
- origin

See the Model's input/output summary

In [1]:
from furiosa.models import vision
from furiosa.quantizer import quantize
from furiosa.runtime.sync import create_runner

import onnx

model = vision.ResNet50()
print("First field of calibration ranges:", next(iter(model.tensor_name_to_range.items())))

f32_onnx_model = onnx.load_from_string(model.origin)
quantized_onnx = quantize(f32_onnx_model, model.tensor_name_to_range)

with create_runner(quantized_onnx) as runner:
    runner.model.print_summary()

libfuriosa_hal.so --- v0.11.0, built @ 43c901f
libfuriosa_hal.so --- v0.11.0, built @ 43c901f


First field of calibration ranges: ('input_tensor:0', (-123.5584560111165, 150.34208860248327))
[2m2023-08-28T01:28:05.078547Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...
[2m2023-08-28T01:28:05.084541Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)
[2m2023-08-28T01:28:05.084554Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)
[2m2023-08-28T01:28:05.084557Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m [Runtime-0] detected 1 NPU device(s):
[2m2023-08-28T01:28:05.094238Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m - [0] npu:2:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7

Run inferences with scaling

In [2]:
import numpy as np
from time import perf_counter


quantized_onnx = quantize(f32_onnx_model, model.tensor_name_to_range)

with create_runner(quantized_onnx) as runner:
    input_tensor_desc = runner.model.inputs()
    runner.model.print_summary()
    fake_input = [
        np.asarray(np.random.randint(256, size=desc.shape), dtype=desc.dtype.numpy)
        for desc in input_tensor_desc
    ]
    starting_time = perf_counter()
    for _ in range(1000):
        runner.run(fake_input)
    print("Average inference time:", perf_counter() - starting_time, "ms")

[2m2023-08-28T01:28:13.806497Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...
[2m2023-08-28T01:28:13.811804Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)
[2m2023-08-28T01:28:13.811809Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)
[2m2023-08-28T01:28:13.811811Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m [Runtime-1] detected 1 NPU device(s):
[2m2023-08-28T01:28:13.823402Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m - [0] npu:2:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.0, f7b0f28)
[2m2023-08-28T01:28:13.823534Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::eve

Run inferences without scaling (and quantize).


See [performance tuning guide](https://furiosa-ai.github.io/docs/latest/ko/software/performance.html#quantize) for more details.


Please note that input data type has been changed

In [3]:
from copy import deepcopy
from furiosa.quantizer import ModelEditor, get_pure_input_names, TensorType


model_wo_input_quantize = deepcopy(f32_onnx_model)
editor = ModelEditor(model_wo_input_quantize)
for input_name in get_pure_input_names(model_wo_input_quantize):
    editor.convert_input_type(input_name, TensorType.UINT8)
quantized_onnx_wo_input_quantize = quantize(model_wo_input_quantize, model.tensor_name_to_range)

with create_runner(quantized_onnx_wo_input_quantize) as runner:
    input_tensor_desc = runner.model.inputs()
    runner.model.print_summary()
    fake_input = [
        np.random.randint(256, size=desc.shape, dtype=desc.dtype.numpy)
        for desc in input_tensor_desc
    ]
    starting_time = perf_counter()
    for _ in range(1000):
        runner.run(fake_input)
    print("Average inference time:", perf_counter() - starting_time, "ms")

[2m2023-08-28T01:28:27.838330Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...
[2m2023-08-28T01:28:27.844387Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)
[2m2023-08-28T01:28:27.844399Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)
[2m2023-08-28T01:28:27.844403Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m [Runtime-2] detected 1 NPU device(s):
[2m2023-08-28T01:28:27.854235Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m - [0] npu:2:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.0, f7b0f28)
[2m2023-08-28T01:28:27.854453Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::eve

You can compile & use only 1pe

In [4]:
from furiosa.runtime.sync import Runtime


with Runtime(device="warboy(1)*1") as runtime:
    with runtime.create_runner(quantized_onnx_wo_input_quantize) as runner:
        input_tensor_desc = runner.model.inputs()
        runner.model.print_summary()
        fake_input = [
            np.random.randint(256, size=desc.shape, dtype=desc.dtype.numpy)
            for desc in input_tensor_desc
        ]
        starting_time = perf_counter()
        for _ in range(1000):
            runner.run(fake_input)
        print("Average inference time:", perf_counter() - starting_time, "ms")

[2m2023-08-28T01:28:39.079503Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...
[2m2023-08-28T01:28:39.084505Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)
[2m2023-08-28T01:28:39.084523Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)
[2m2023-08-28T01:28:39.084529Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m [Runtime-3] detected 1 NPU device(s):
[2m2023-08-28T01:28:39.094433Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driven::coord[0m[2m:[0m - [0] npu:2:0 (warboy-b0, 64dpes, firmware: 1.7.0, f7b0f28)
[2m2023-08-28T01:28:39.094599Z[0m [32m INFO[0m [2mfuriosa_rt_core::driver::event_driv