In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import numpy as np
import cv2
import pandas as pd

In [3]:
models_base_dir = Path('/home/improlab/Desktop/FastMOT/fastmot/models')
assert models_base_dir.exists(), 'Invalid path'

In [4]:
googlenet = 'GoogLeNet', models_base_dir / 'googlenet' / 'age_googlenet.onnx'
# batch size 1
ssrnet_1 = 'SSRNet_1', Path('assets') / 'ssrnet_bs1.onnx'
# batch size 32
ssrnet_32 = 'SSRNet_32', Path('assets') / 'ssrnet_bs32.onnx'

osnet_x025 = 'osnet_x0.25', models_base_dir / 'osnet' / 'osnet_x0_25_msmt17.onnx'
osnet_ain_10 = 'osnet_ain_x1.0', models_base_dir / 'osnet' / 'osnet_ain_x1_0_msmt17.onnx'
# Runing YOLOv4 without optimization might be too much for Jetson
# yolov4 = 'YOLOv4', models_base_dir / 'yolo' / 'yolov4_crowdhuman.onnx'

In [5]:
# onnx file not available
# peoplenet = models_base_dir / 'peoplenet' / 'resnet34_peoplenet_pruned.etlt'

In [6]:
from onnx_inference import Model
from trt_inference import TRTModel, TRTInferenceBackend
from timing import timethat

In [7]:
try:
    output_dir = Path(__file__) / 'assets'
except NameError:
    output_dir = Path('.') / 'assets'

In [8]:
onnx_models = [ssrnet_1, ssrnet_32, googlenet, osnet_x025, osnet_ain_10]

In [9]:
def time_inference(model, input):
    stmt = 'model.run(input)'
    # run stmt as setup to warmup
    return timethat(stmt=stmt, setup=stmt, globals={'model': model, 'input': input})

In [10]:
results = []

for name, path in onnx_models:
    print(f'Evaluating {name}...')
    model = Model(path)

    input = model.get_sample_input()
    model.use_cpu()
    t_cpu = time_inference(model, input)
    print(f'CPU: {t_cpu}')
    model.use_gpu()
    t_gpu = time_inference(model, input)
    print(f'GPU: {t_gpu}')

    engine_name = path.with_suffix('.trt').name
    engine_path = output_dir / engine_name
    batch_size = model.input_shape[0]
    trt_model = TRTModel(path, engine_path, model.input_shape[1:])
    backend = TRTInferenceBackend(trt_model, batch_size)
    t_trt = time_inference(backend, input)
    print(f'TRT: {t_trt}')

    results.append({
        'model': name,
        'cpu_mean': t_cpu.average,
        'cpu_std': t_cpu.stdev,
        'gpu_mean': t_gpu.average,
        'gpu_std': t_gpu.stdev,
        'trt_mean': t_trt.average,
        'trt_std': t_trt.stdev,
    })

    print('-'*20)

print('Completed evaluation.')

Evaluating SSRNet...
CPU: 3.7 ms ± 123 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
GPU: 4.02 ms ± 62 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
TRT: 1.4 ms ± 58.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
--------------------
Evaluating SSRNet...
CPU: 106 ms ± 2.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
GPU: 8.85 ms ± 170 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
TRT: 3.89 ms ± 17.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
--------------------
Evaluating GoogLeNet...
CPU: 144 ms ± 1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
GPU: 11.9 ms ± 53.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
TRT: 3.21 ms ± 15.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
--------------------
Evaluating osnet_x0.25...
CPU: 356 ms ± 6.99 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
GPU: 36.1 ms ± 224 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
TRT: 12.4 ms ± 

In [11]:
df = pd.DataFrame(results)
display(df)

Unnamed: 0,model,cpu_mean,cpu_std,gpu_mean,gpu_std,trt_mean,trt_std
0,SSRNet,0.003703,0.000123,0.004015,6.2e-05,0.001403,5.9e-05
1,SSRNet,0.106288,0.002399,0.008851,0.00017,0.003887,1.7e-05
2,GoogLeNet,0.143667,0.001005,0.011867,5.3e-05,0.00321,1.6e-05
3,osnet_x0.25,0.356449,0.006992,0.036102,0.000224,0.012372,2.3e-05
4,osnet_ain_x1.0,2.113494,0.017287,0.155038,0.001188,0.05347,0.000219


In [12]:
# df.to_csv('performance_results.csv')