In [1]:
import tensorrt as trt
import os, sys

def build_engine(model_file, shapes, max_ws=512*1024*1024, fp16=False, timing_cache=None, faster_dynamic_shapes=False):
    if faster_dynamic_shapes and float(trt.__version__[:3]) < 8.5:
        print("Faster dynamic shapes preview feature is only supported on TRT 8.5+")
        sys.exit(1)

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    builder = trt.Builder(TRT_LOGGER)

    config = builder.create_builder_config()
    # config.max_workspace_size = max_ws
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, max_ws)
    if fp16:
        config.flags |= 1 << int(trt.BuilderFlag.FP16)
    config.set_preview_feature(trt.PreviewFeature.FASTER_DYNAMIC_SHAPES_0805, faster_dynamic_shapes)
    profile = builder.create_optimization_profile()
    for s in shapes:
        profile.set_shape(s['name'], min=s['min'], opt=s['opt'], max=s['max'])
    config.add_optimization_profile(profile)

    timing_cache_available = int(trt.__version__[0]) >= 8 and timing_cache != None
    # load global timing cache
    if timing_cache_available:
        if os.path.exists(timing_cache):
            with open(timing_cache, "rb") as f:
                cache = config.create_timing_cache(f.read())
                config.set_timing_cache(cache, ignore_mismatch = False)
        else:
            cache = config.create_timing_cache(b"")
            config.set_timing_cache(cache, ignore_mismatch = False)

    explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    network = builder.create_network(explicit_batch)

    with trt.OnnxParser(network, TRT_LOGGER) as parser:
        with open(model_file, 'rb') as model:
            parsed = parser.parse(model.read())
            for i in range(parser.num_errors):
                print("TensorRT ONNX parser error:", parser.get_error(i))
            # engine = builder.build_engine(network, config=config)
            engine = builder.build_serialized_network(network, config=config)

            # save global timing cache
            if timing_cache_available:
                cache = config.get_timing_cache()
                with cache.serialize() as buffer:
                    with open(timing_cache, "wb") as f:
                        f.write(buffer)
                        f.flush()
                        os.fsync(f)

            return engine
        
def load_engine(engine_filepath, trt_logger):
    with open(engine_filepath, "rb") as f, trt.Runtime(trt_logger) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
    return engine


In [2]:
model_name = 'findCenter_folded_op17_v1'
onnx_path = '../onnx/'+model_name+'.onnx'
static_shapes=[{"name": "input_tensor", "min": (1, 256, 180, 180), "opt": (1, 256, 180, 180), "max": (1, 256, 180, 180)}]
static_engine = build_engine(onnx_path, shapes=static_shapes, faster_dynamic_shapes=True, fp16=True)
engine_path = model_name+'.trt'
with open(engine_path, 'wb') as f:
    f.write(static_engine)

[12/14/2023-01:58:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
[12/14/2023-01:58:18] [TRT] [W] onnx2trt_utils.cpp:377: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[12/14/2023-01:58:18] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-01:58:18] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-01:58:18] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-01:58:18] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-01:58:18] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as in

In [3]:
model_name = 'findCenter_folded_op11_v1'
onnx_path = '../onnx/'+model_name+'.onnx'
static_shapes=[{"name": "input_tensor", "min": (1, 256, 180, 180), "opt": (1, 256, 180, 180), "max": (1, 256, 180, 180)}]
static_engine = build_engine(onnx_path, shapes=static_shapes, faster_dynamic_shapes=True, fp16=True)
engine_path = model_name+'.trt'
with open(engine_path, 'wb') as f:
    f.write(static_engine)

[12/14/2023-02:00:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
[12/14/2023-02:00:03] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-02:00:03] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-02:00:03] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-02:00:03] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-02:00:03] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-02:00:03] [TRT] [W] Tensor DataType is determined at build time for tensors not marked as input or output.
[12/14/2023-02:00:03] [TRT] [E] Output tensor out_masks of 