forked from YonghaoHe/LFD-A-Light-and-Fast-Detector
/
predict_tensorrt.py
144 lines (118 loc) · 7.05 KB
/
predict_tensorrt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# -*- coding: utf-8 -*-
# predict using tensorrt as inference engine
# In this script, we provide you a demo only.
# In this demo, we build engines that accepts fixed input shape (tensorrt supports dynamic shape, and we will implement it later)
# If you want to deploy in the product, you should make some modifications, like rewrite the post-processing, in order to make it more efficient
import numpy
import torch
import onnx
import os
from lfd.execution.utils import load_checkpoint
from lfd.data_pipeline.augmentation import *
from lfd.deployment.tensorrt.inference import allocate_buffers
from lfd.deployment.tensorrt.build_engine import GB, build_tensorrt_engine, INT8Calibrator
from lfd.data_pipeline.sampler.region_sampler import crop_from_image
import tensorrt
tensorrt.init_libnvinfer_plugins(None, '')
import cv2
def prepare_data_for_int8_calibrator(image_root, normalizer, crop_size=(512, 512)):
"""
Steps:
1. read images and crop images
2. do normalization (if you have integrated normalization in computation graph, then normalization can be abandoned)
3. organize all images into one batch
:param image_root: path that contains all images for calibration (images from test set that cover most situations, and the number of images is not necessarily large, say 128 is enough)
:param normalizer: normalizer used for normalizing images
:param crop_size: (w, h)
:return:
"""
assert os.path.exists(image_root), '[%s] root does not exist!!!'
image_paths_list = [os.path.join(image_root, name) for name in os.listdir(image_root) if name.lower().endswith(('jpg', 'jpeg', 'png', 'bmp'))]
batch = numpy.zeros((len(image_paths_list), 3, crop_size[1], crop_size[0]), dtype=numpy.float32)
for i in range(len(image_paths_list)):
image = cv2.imread(image_paths_list[i], cv2.IMREAD_COLOR)
image_crop = crop_from_image(image, (int((image.shape[1] - crop_size[0]) / 2), int((image.shape[0] - crop_size[1]) / 2), crop_size[0], crop_size[1]))
image_crop_normalized = normalizer(**{'image': image_crop})['image']
batch[i] = image_crop_normalized.transpose([2, 0, 1])
return batch
# set the target model script ------------------------------------------------------------------------
from WIDERFACE_LFD_XS_work_dir_20210210_115210.WIDERFACE_LFD_XS import config_dict, prepare_model
prepare_model()
# set the model weight file ------------------------------------------------------------------------
param_file_path = './WIDERFACE_LFD_XS_work_dir_20210210_115210/epoch_1000.pth'
load_checkpoint(config_dict['model'], load_path=param_file_path, strict=True)
# set the image path to be tested ------------------------------------------------------------------------
image_path = './test_images/image1.jpg'
image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
# generate engine ------------------------------------------------------------------------
engine_folder = './tensorrt_engine_folder'
if not os.path.exists(engine_folder):
os.makedirs(engine_folder)
# setting for engine building
input_shapes = [[1, 3, image.shape[0], image.shape[1]]]
input_names = ['input_data']
output_names = ['classification_output', 'regression_output']
precision_mode = 'int8'
max_workspace_size = GB(6)
min_timing_iterations = 2
avg_timing_iterations = 2
int8_calibrator = None
image_root_for_int8_calibration = '/home/yonghaohe/datasets/WIDER_FACE/WIDER_val/images/0--Parade'
normalizer = simple_normalize
if precision_mode == 'int8':
data_batch = prepare_data_for_int8_calibrator(image_root=image_root_for_int8_calibration, normalizer=normalizer, crop_size=(512, 512))
int8_calibrator = INT8Calibrator(data_batch, os.path.join(engine_folder, param_file_path.split('/')[-2] + '_' + param_file_path.split('/')[-1].split('.')[0] + '_' + precision_mode + '.cache'), batch_size=8)
else:
int8_calibrator = None
onnx_file_path = os.path.join(engine_folder, param_file_path.split('/')[-2] + '_' + param_file_path.split('/')[-1].split('.')[0] + '_' + precision_mode + '.onnx')
engine_file_path = os.path.join(engine_folder, param_file_path.split('/')[-2] + '_' + param_file_path.split('/')[-1].split('.')[0] + '_' + precision_mode + '.engine')
# if the engine exists, skip building process
if not os.path.exists(engine_file_path):
# generate onnx file
input_tensors = [torch.rand(input_shape) for input_shape in input_shapes]
torch.onnx.export(config_dict['model'],
args=tuple(input_tensors),
f=onnx_file_path,
verbose=True,
input_names=input_names,
output_names=output_names,
opset_version=9
)
onnx_model = onnx.load(onnx_file_path)
onnx.checker.check_model(onnx_model)
assert build_tensorrt_engine(onnx_file_path,
engine_file_path,
precision_mode=precision_mode,
max_workspace_size=max_workspace_size, # in bytes
max_batch_size=input_shapes[0][0],
min_timing_iterations=min_timing_iterations,
avg_timing_iterations=avg_timing_iterations,
int8_calibrator=int8_calibrator)
# inference
logger = tensorrt.Logger(tensorrt.Logger.ERROR)
with open(engine_file_path, 'rb') as fin, tensorrt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(fin.read())
assert engine is not None, 'deserialize engine failed!'
print('Engine info:')
print('\tmax batch size: ', engine.max_batch_size)
print('\tmax workspace_size: ', engine.max_workspace_size)
print('\tdevice memory_size: ', engine.device_memory_size)
inputs, outputs, bindings, stream = allocate_buffers(engine, 1)
engine_context = engine.create_execution_context()
results = config_dict['model'].predict_for_single_image_with_tensorrt(image,
input_buffers=inputs,
output_buffers=outputs,
bindings=bindings,
stream=stream,
engine=engine,
tensorrt_engine_context=engine_context,
aug_pipeline=simple_widerface_val_pipeline,
classification_threshold=0.5,
nms_threshold=0.3,
class_agnostic=False)
for bbox in results:
print(bbox)
cv2.rectangle(image, (int(bbox[2]), int(bbox[3])), (int(bbox[2] + bbox[4]), int(bbox[3] + bbox[5])), (0, 255, 0), 1)
print('%d faces are detected!' % len(results))
cv2.imshow('im', image)
cv2.waitKey()