In [1]:
import os
import sys
import logging
import argparse

import numpy as np
import tensorrt as trt
from cuda import cudart
import common
from image_batcher import ImageBatcher


In [2]:
logging.basicConfig(level=logging.INFO)
logging.getLogger("EngineBuilder").setLevel(logging.INFO)
log = logging.getLogger("EngineBuilder")

In [3]:
class EngineCalibrator(trt.IInt8EntropyCalibrator2):
    """
    Implements the INT8 Entropy Calibrator 2.
    """

    def __init__(self, cache_file):
        """
        :param cache_file: The location of the cache file.
        """
        super().__init__()
        self.cache_file = cache_file
        self.image_batcher = None
        self.batch_allocation = None
        self.batch_generator = None

    def set_image_batcher(self, image_batcher: ImageBatcher):
        """
        Define the image batcher to use, if any. If using only the cache file, an image batcher doesn't need
        to be defined.
        :param image_batcher: The ImageBatcher object
        """
        self.image_batcher = image_batcher
        size = int(np.dtype(self.image_batcher.dtype).itemsize *
                   np.prod(self.image_batcher.shape))
        self.batch_allocation = common.cuda_call(cudart.cudaMalloc(size))
        self.batch_generator = self.image_batcher.get_batch()

    def get_batch_size(self):
        """
        Overrides from trt.IInt8EntropyCalibrator2.
        Get the batch size to use for calibration.
        :return: Batch size.
        """
        if self.image_batcher:
            return self.image_batcher.batch_size
        return 1

    def get_batch(self, names):
        """
        Overrides from trt.IInt8EntropyCalibrator2.
        Get the next batch to use for calibration, as a list of device memory pointers.
        :param names: The names of the inputs, if useful to define the order of inputs.
        :return: A list of int-casted memory pointers.
        """
        if not self.image_batcher:
            return None
        try:
            batch, _, _ = next(self.batch_generator)
            log.info("Calibrating image {} / {}".format(
                self.image_batcher.image_index, self.image_batcher.num_images))
            common.memcpy_host_to_device(
                self.batch_allocation, np.ascontiguousarray(batch))
            return [int(self.batch_allocation)]
        except StopIteration:
            log.info("Finished calibration batches")
            return None

    def read_calibration_cache(self):
        """
        Overrides from trt.IInt8EntropyCalibrator2.
        Read the calibration cache file stored on disk, if it exists.
        :return: The contents of the cache file, if any.
        """
        if os.path.exists(self.cache_file):
            with open(self.cache_file, "rb") as f:
                log.info("Using calibration cache file: {}".format(
                    self.cache_file))
                return f.read()

    def write_calibration_cache(self, cache):
        """
        Overrides from trt.IInt8EntropyCalibrator2.
        Store the calibration cache to a file on disk.
        :param cache: The contents of the calibration cache to store.
        """
        with open(self.cache_file, "wb") as f:
            log.info("Writing calibration cache data to: {}".format(
                self.cache_file))
            f.write(cache)

In [4]:

logger = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(logger, namespace="")
builder = trt.Builder(logger)
config= builder.create_builder_config()
config.max_workspace_size = 2**30
network_flags = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
network = builder.create_network(network_flags)
parser = trt.OnnxParser(network, logger)
onnx_path = 'average_model.onnx'
with open(onnx_path, "rb") as f:
    parser.parse(f.read())
inputs = [network.get_input(i) for i in range(network.num_inputs)]
outputs = [network.get_output(i) for i in range(network.num_outputs)]
for input in inputs:
    batch_size = input.shape[0]
config.set_flag(trt.BuilderFlag.STRICT_TYPES)
config.set_flag(trt.BuilderFlag.FP16)
config.set_flag(trt.BuilderFlag.INT8)
calib_shape = [8] + list(inputs[0].shape[1:])
calib_dtype = trt.nptype(inputs[0].dtype)
img_batch = ImageBatcher('calibration/', calib_shape, calib_dtype, max_num_images=458, exact_batches=True)



  config.max_workspace_size = 2**30


[12/18/2023-11:56:42] [TRT] [I] [MemUsageChange] Init CUDA: CPU +10, GPU +0, now: CPU 31, GPU 706 (MiB)
[12/18/2023-11:56:48] [TRT] [I] [MemUsageChange] Init builder kernel library: CPU +227, GPU +34, now: CPU 334, GPU 725 (MiB)
[12/18/2023-11:56:48] [TRT] [W] onnx2trt_utils.cpp:374: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.


In [5]:
calibrator = EngineCalibrator('calibration.cache')

In [6]:
config.int8_calibrator= calibrator
config.int8_calibrator.set_image_batcher(img_batch)

In [7]:
engine = builder.build_serialized_network(network, config)

[12/18/2023-11:57:08] [TRT] [I] BuilderFlag::kTF32 is set but hardware does not support TF32. Disabling TF32.
[12/18/2023-11:57:08] [TRT] [W] Reshape_306: IShuffleLayer with zeroIsPlaceHolder=true has reshape dimension at position 1 that might or might not be zero. TensorRT resolves it at runtime, but this may cause excessive memory consumption and is usually a sign of a bug in the network.
[12/18/2023-11:57:08] [TRT] [W] Reshape_345: IShuffleLayer with zeroIsPlaceHolder=true has reshape dimension at position 1 that might or might not be zero. TensorRT resolves it at runtime, but this may cause excessive memory consumption and is usually a sign of a bug in the network.
[12/18/2023-11:57:08] [TRT] [W] Reshape_384: IShuffleLayer with zeroIsPlaceHolder=true has reshape dimension at position 1 that might or might not be zero. TensorRT resolves it at runtime, but this may cause excessive memory consumption and is usually a sign of a bug in the network.
[12/18/2023-11:57:08] [TRT] [W] Reshap

INFO:EngineBuilder:Calibrating image 8 / 456


[12/18/2023-11:57:14] [TRT] [E] 1: [softMaxV2Runner.cpp::execute::226] Error Code 1: Cask (shader run failed)
[12/18/2023-11:57:14] [TRT] [E] 3: [engine.cpp::~Engine::298] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/engine.cpp::~Engine::298, condition: mExecutionContextCounter.use_count() == 1. Destroying an engine object before destroying the IExecutionContext objects it created leads to undefined behavior.
)
[12/18/2023-11:57:14] [TRT] [E] 2: [calibrator.cpp::calibrateEngine::1181] Error Code 2: Internal Error (Assertion context->executeV2(&bindings[0]) failed. )


In [8]:
print(engine)

None
