## onnx library import

In [2]:
import os
import sys
import numpy as np
import cv2
import argparse
import onnxruntime
from tqdm import tqdm

In [3]:
sys.path.append('./trt_inference/')
sys.path.append('./onnx_inference/')

### tensorrt library import

In [4]:
from trt_inference.yolo_face_trt_inference import Predictor
from onnx_inference.yolo_pose_onnx_inference import model_inference_image_list_wo_nms, model_inference_image_list

  from .autonotebook import tqdm as notebook_tqdm


### export onnx and tensorrt (without nms)

In [5]:
# build onnx file (32bit)
#!python models/export.py --weights ./yolov7-tiny-face.pt --grid --simplify
# build onnx file to trt file (32bit)
#!python models/export_tensorrt.py -o ./yolov7-tiny-face_wo_nms.onnx -e ./yolov7-tiny-face_wo_nms.trt -p fp32

In [6]:
onnx_path = './yolov7-tiny-face_include_nms.onnx'
trt_path = './yolov7-tiny-face_wo_nms_fp16.trt'
img_path = './data/images/22_Picnic_Picnic_22_10.jpg'

model_inference_image_list(onnx_path, img_path, mean=0, scale=0.003, dst_path='./onnx_w_nms')

0/1: 100%|██████████| 1/1 [00:00<00:00,  3.54it/s]

output path :  ./onnx_w_nms/22_Picnic_Picnic_22_10.jpg





In [7]:
onnx_path = './yolov7-tiny-face_wo_nms.onnx'
trt_path = './yolov7-tiny-face_wo_nms.trt'
img_path = './data/images/22_Picnic_Picnic_22_10.jpg'

#model_inference_image_list_wo_nms(model_path=onnx_path, img_path = img_path, mean=0, scale=0.0039, dst_path='./onnx_output')

In [8]:
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit

In [24]:
trt_logger = trt.Logger(trt.Logger.INFO)
trt_builder = trt.Builder(trt_logger)
trt_config = trt_builder.create_builder_config()
trt_config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 8 * (2 ** 30)) # 8 is workspace

network_flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
print(network_flag)
network = trt_builder.create_network(network_flag)
parser = trt.OnnxParser(network, trt_logger)

with open(onnx_path, "rb") as f:
    if not parser.parse(f.read()):
        print("Failed to load onnx file")
        for error in range(parser.num_errors):
            print(parser.get_error(error))
        sys.exit(1)

inputs = [network.get_input(i) for i in range(network.num_inputs)]
outputs = [network.get_output(i) for i in range(network.num_outputs)]
print("inputs : " , inputs[0].shape)
print("outputs : ", outputs[0].shape)
print("num layers : ", network.num_layers)
nkpt = 5 # face landmark
strides = trt.Dims([1,1,1])
starts = trt.Dims([0,0,0])
bs, num_grid, temp = outputs[0].shape
shapes = trt.Dims([bs, num_grid, 4])
num_classes = temp - 5 - nkpt*3 
boxes = network.add_slice(outputs[0], starts, shapes, strides)
print("boxes's slice start :", starts)
print("boxes's slice output shapes :", shapes)

print("boxes : ", boxes.shape)

starts[2] = 4
shapes[2] = 1
print("obj's score slice start :", starts)
print("obj's score slice output shapes :", shapes)
obj_score = network.add_slice(outputs[0], starts, shapes, strides)
print("obj score : ", obj_score.shape)

starts[2] = 5
shapes[2] = num_classes
print("class score's score slice start :", starts)
print("class score's score slice output shapes :", shapes)

cls_score = network.add_slice(outputs[0], starts, shapes, strides)     
print("score :", cls_score.shape)

starts[2] = 5 + num_classes
shapes[2] = temp - 5 - num_classes
print("key point's score slice start : ", starts)
print("key point's score slice output shpae : ", shapes)

keypoints = network.add_slice(outputs[0], starts, shapes, strides)
print("keypoint :", keypoints.shape)
# compute obj_score * cls_score
updated_score = network.add_elementwise(obj_score.get_output(0), cls_score.get_output(0), trt.ElementWiseOperation.PROD)

max_det = 10
conf_thres = 0.4
iou_thres = 0.5
registry = trt.get_plugin_registry()
assert(registry)
creator = registry.get_plugin_creator("EfficientNMS_ONNX_TRT", "1")
assert(creator)
fc = []
fc.append(trt.PluginField("background_class", np.array([-1], dtype=np.int32), trt.PluginFieldType.INT32))
fc.append(trt.PluginField("max_output_boxes", np.array([max_det], dtype=np.int32), trt.PluginFieldType.INT32))
fc.append(trt.PluginField("score_threshold", np.array([conf_thres], dtype=np.float32), trt.PluginFieldType.FLOAT32))
fc.append(trt.PluginField("iou_threshold", np.array([iou_thres], dtype=np.float32), trt.PluginFieldType.FLOAT32))
fc.append(trt.PluginField("box_coding", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32))

fc = trt.PluginFieldCollection(fc) 
nms_layer = creator.create_plugin("nms_layer", fc)
layer = network.add_plugin_v2([boxes.get_output(0), updated_score.get_output(0)], nms_layer)
#network.mark_output(layer.get_output(0))

1
[01/09/2023-04:44:50] [TRT] [I] The logger passed into createInferBuilder differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.

[01/09/2023-04:44:50] [TRT] [I] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 774, GPU 1615 (MiB)
[01/09/2023-04:44:50] [TRT] [W] parsers/onnx/onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped
inputs :  (1, 3, 640, 640)
outputs :  (1, 25200, 21)
num layers :  408
boxes's slice start : (0, 0, 0)
boxes's slice output shapes : (1, 25200, 4)
boxes :  (1, 25200, 4)
obj's score slice start : (0, 0, 4)
obj's score slice output shapes : (1, 25200, 1)
obj score :  (1, 25200, 1)
class score's score slice start : (0, 0, 5)
class score's score slice output shapes : (1, 25200, 1)
score : (1, 25200, 1)
key point's score slice start :  (0, 0, 6)
key point's score slice output shpae :  (1, 25200, 15)
keypoint : (1,

In [25]:
network.get_output(1)

[01/09/2023-04:44:56] [TRT] [E] [network.cpp::getOutput::1845] Error Code 3: API Usage Error (Parameter check failed at: optimizer/api/network.cpp::getOutput::1845, condition: index < getNbOutputs()
)
