In [1]:
import onnx
import argparse
import math
import os
from typing import List, Tuple, Union
import logging
import tensorrt as trt
import sys

def remove_initializer_from_input(input, output):

    model = onnx.load(input)
    if model.ir_version < 4:
        print(
            'Model with ir_version below 4 requires to include initilizer in graph input'
        )
        return

    inputs = model.graph.input
    name_to_input = {}
    for input in inputs:
        name_to_input[input.name] = input

    for initializer in model.graph.initializer:
        if initializer.name in name_to_input:
            inputs.remove(name_to_input[initializer.name])

    onnx.save(model, output)
    

def reshape(model):
    
    # New width and height. Using -1,-1 so that we can use variable input size in model while using triton inference server.
    value = -1 
    inputs = model.graph.input
    outputs = model.graph.output

    inputs[0].type.tensor_type.shape.dim[0].dim_value = -1
    inputs[0].type.tensor_type.shape.dim[2].dim_value = value
    inputs[0].type.tensor_type.shape.dim[3].dim_value = value
    
    for output in outputs:
        output.type.tensor_type.shape.dim[0].dim_value = -1
        output.type.tensor_type.shape.dim[2].dim_value = value # 
        output.type.tensor_type.shape.dim[3].dim_value = value
    return model


TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)


def _build_engine_onnx(input_onnx: Union[str, bytes], force_fp16: bool = False, max_batch_size: int = 1,
                       max_workspace: int = 1024):

    with trt.Builder(TRT_LOGGER) as builder, \
            builder.create_network(EXPLICIT_BATCH) as network, \
            builder.create_builder_config() as config, \
            trt.OnnxParser(network, TRT_LOGGER) as parser:
        has_fp16 = builder.platform_has_fast_fp16
        if force_fp16 or has_fp16:
            logging.info('Building TensorRT engine with FP16 support.')
            if not has_fp16:
                logging.warning('Builder reports no fast FP16 support. Performance drop expected.')
            config.set_flag(trt.BuilderFlag.FP16)
        else:
            logging.warning('Building engine in FP32 mode.')

        config.max_workspace_size = max_workspace * 1024 * 1024

        if not parser.parse(input_onnx):
            print('ERROR: Failed to parse the ONNX')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            sys.exit(1)

        if max_batch_size != 1:
            logging.warning('Batch size !=1 is used. Ensure your inference code supports it.')
        profile = builder.create_optimization_profile()
        # Get input name and shape for building optimization profile
        input = network.get_input(0)
        inp_shape = list(input.shape)
        inp_shape[0] = 1
        min_opt_shape = tuple(inp_shape)
        inp_shape[0] = max_batch_size
        max_shape = tuple(inp_shape)
        input_name = input.name
        profile.set_shape(input_name, min_opt_shape, min_opt_shape, max_shape)
        config.add_optimization_profile(profile)

        return builder.build_engine(network, config=config)


def check_fp16():
    builder = trt.Builder(TRT_LOGGER)
    has_fp16 = builder.platform_has_fast_fp16
    return has_fp16


def convert_onnx(input_onnx: Union[str, bytes], engine_file_path: str, force_fp16: bool = False,
                 max_batch_size: int = 1):
    '''
    Creates TensorRT engine and serializes it to disk
    :param input_onnx: Path to ONNX file on disk or serialized ONNX model.
    :param engine_file_path: Path where TensorRT engine should be saved.
    :param force_fp16: Force use of FP16 precision, even if device doesn't support it. Be careful.
    :param max_batch_size: Define maximum batch size supported by engine. If >1 creates optimization profile.
    :return: None
    '''

    onnx_obj = None
    if isinstance(input_onnx, str):
        with open(input_onnx, "rb") as f:
            onnx_obj = f.read()
    elif isinstance(input_onnx, bytes):
        onnx_obj = input_onnx

    engine = _build_engine_onnx(input_onnx=onnx_obj,
                                force_fp16=force_fp16, max_batch_size=max_batch_size)

    assert not isinstance(engine, type(None))

    with open(engine_file_path, "wb") as f:
        f.write(engine.serialize())

In [2]:
remove_initializer_from_input('centerface.onnx', 'centerface_clean.onnx')

In [3]:
net1 = onnx.load('centerface.onnx')
print((net1.graph.input)[0])
print((net1.graph.output)) 

name: "input.1"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 10
      }
      dim {
        dim_value: 3
      }
      dim {
        dim_value: 32
      }
      dim {
        dim_value: 32
      }
    }
  }
}

[name: "537"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 10
      }
      dim {
        dim_value: 1
      }
      dim {
        dim_value: 8
      }
      dim {
        dim_value: 8
      }
    }
  }
}
, name: "538"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 10
      }
      dim {
        dim_value: 2
      }
      dim {
        dim_value: 8
      }
      dim {
        dim_value: 8
      }
    }
  }
}
, name: "539"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 10
      }
      dim {
        dim_value: 2
      }
      dim {
        dim_value: 8
      }
      dim {
        dim_value: 8
      }
    }
  }
}
, name: "540"
type {
  t

In [4]:
net2 = onnx.load('centerface_clean.onnx')
print((net2.graph.input)[0])
print((net2.graph.output)) 

name: "input.1"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 10
      }
      dim {
        dim_value: 3
      }
      dim {
        dim_value: 32
      }
      dim {
        dim_value: 32
      }
    }
  }
}

[name: "537"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 10
      }
      dim {
        dim_value: 1
      }
      dim {
        dim_value: 8
      }
      dim {
        dim_value: 8
      }
    }
  }
}
, name: "538"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 10
      }
      dim {
        dim_value: 2
      }
      dim {
        dim_value: 8
      }
      dim {
        dim_value: 8
      }
    }
  }
}
, name: "539"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 10
      }
      dim {
        dim_value: 2
      }
      dim {
        dim_value: 8
      }
      dim {
        dim_value: 8
      }
    }
  }
}
, name: "540"
type {
  t

In [13]:
im_size = [640, 480]
max_batch_size = 128
if max_batch_size !=1:
    batch_size=-1

model = onnx.load('centerface.onnx')
reshaped = reshape(model)
onnx.save(reshaped, 'centerface_dynamic2.onnx')
# with open('centerface_dynamic.onnx', "wb") as file_handle:
#     serialized = reshaped.SerializeToString()
#     file_handle.write(serialized)

In [14]:
net3 = onnx.load('centerface_dynamic2.onnx')
print((net3.graph.input)[0])
print((net3.graph.output)) 

name: "input.1"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: -1
      }
      dim {
        dim_value: 3
      }
      dim {
        dim_value: -1
      }
      dim {
        dim_value: -1
      }
    }
  }
}

[name: "537"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: -1
      }
      dim {
        dim_value: 1
      }
      dim {
        dim_value: -1
      }
      dim {
        dim_value: -1
      }
    }
  }
}
, name: "538"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: -1
      }
      dim {
        dim_value: 2
      }
      dim {
        dim_value: -1
      }
      dim {
        dim_value: -1
      }
    }
  }
}
, name: "539"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: -1
      }
      dim {
        dim_value: 2
      }
      dim {
        dim_value: -1
      }
      dim {
        dim_value: -1
      }
    }
  }
}
, name: "540"
type

In [15]:
convert_onnx(serialized, 
             engine_file_path='centerface_dynamic.plan',
             max_batch_size=max_batch_size,
             force_fp16=True)



[02/05/2023-19:31:43] [TRT] [E] 3: [optimizationProfile.cpp::setDimensions::128] Error Code 3: API Usage Error (Parameter check failed at: runtime/common/optimizationProfile.cpp::setDimensions::128, condition: std::all_of(dims.d, dims.d + dims.nbDims, [](int32_t x) noexcept { return x >= 0; })
)


RuntimeError: Shape provided for min is inconsistent with other shapes.