# YoloV5 ONNX inference example

Can reference the [link](https://github.com/ultralytics/yolov5/issues/343#issuecomment-658021043). It's a really interesting discussion thread about YoloV5 ONNX detection.

In [None]:
!pip install onnxruntime     # CPU build
# pip install onnxruntime-gpu   # GPU build

In [6]:
!git clone https://github.com/ultralytics/yolov5

Cloning into 'yolov5'...
remote: Enumerating objects: 1791, done.[K
remote: Total 1791 (delta 0), reused 0 (delta 0), pack-reused 1791[K
Receiving objects: 100% (1791/1791), 5.06 MiB | 39.23 MiB/s, done.
Resolving deltas: 100% (1165/1165), done.


In [22]:
import onnxruntime, os, sys
import numpy as np
from numpy import asarray
from PIL import Image

In [12]:
fp_yolov5 = os.path.abspath(os.getcwd()) + "/yolov5"
fp_yolov5

'/home/ec2-user/SageMaker/myAWSStudyBlog/sagemaker-greengrass/1-yolov5-onnx-inference/yolov5'

In [13]:
sys.path.append(fp_yolov5)
sys.path

['',
 '/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python36.zip',
 '/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6',
 '/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/lib-dynload',
 '/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages',
 '/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/IPython/extensions',
 '/home/ec2-user/.ipython',
 '/home/ec2-user/SageMaker/myAWSStudyBlog/sagemaker-greengrass/1-yolov5-onnx-inference/yolov5']

In [14]:
from utils.datasets import *
from utils.utils import *

In [3]:
fp_onnx = './yolov5s.onnx'
session = onnxruntime.InferenceSession(fp_onnx)
session.get_modelmeta()
first_input_name = session.get_inputs()[0].name
first_output_name = session.get_outputs()[0].name

print(f'input_name:{first_input_name} output_name:{first_output_name}')

input_name:images output_name:output


In [25]:
def detect_onnx(official=True, image_path=None):
    num_classes = 80
    anchors = [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]  # 5s

    session = onnxruntime.InferenceSession(fp_onnx)
    # print("The model expects input shape: ", session.get_inputs()[0].shape)
    batch_size = session.get_inputs()[0].shape[0]
    img_size_h = session.get_inputs()[0].shape[2]
    img_size_w = session.get_inputs()[0].shape[3]

    # input
    image_src = Image.open(image_path)
    # resized = letterbox_image(image_src, (img_size_w, img_size_h))
    # convert image to numpy array
    np_image = asarray(image_src)
    print(np_image.shape)
    
    resized = letterbox(np_image, (img_size_w, img_size_h))

    print(resized.shape)    
    img_in = np.transpose(resized, (2, 0, 1)).astype(np.float32)  # HWC -> CHW
    img_in = np.expand_dims(img_in, axis=0)
    img_in /= 255.0
    # print("Shape of the image input shape: ", img_in.shape)

    # inference
    input_name = session.get_inputs()[0].name
    outputs = session.run(None, {input_name: img_in})

    batch_detections = []
    if official and len(outputs) == 4:   # model.model[-1].export = boolean ---> True:3 False:4
        # model.model[-1].export = False ---> outputs[0] (1, xxxx, 85)
        # official
        batch_detections = torch.from_numpy(np.array(outputs[0]))
        batch_detections = non_max_suppression(batch_detections, conf_thres=0.4, iou_thres=0.5, agnostic=False)
    else:
        # model.model[-1].export = False ---> outputs[1]/outputs[2]/outputs[2]
        # model.model[-1].export = True  ---> outputs
        # (1, 3, 20, 20, 85)
        # (1, 3, 40, 40, 85)
        # (1, 3, 80, 80, 85)
        # myself (from yolo.py Detect)
        boxs = []
        a = torch.tensor(anchors).float().view(3, -1, 2)
        anchor_grid = a.clone().view(3, 1, -1, 1, 1, 2)
        if len(outputs) == 4:
            outputs = [outputs[1], outputs[2], outputs[3]]
        for index, out in enumerate(outputs):
            out = torch.from_numpy(out)
            batch = out.shape[1]
            feature_w = out.shape[2]
            feature_h = out.shape[3]

            # Feature map corresponds to the original image zoom factor
            stride_w = int(img_size_w / feature_w)
            stride_h = int(img_size_h / feature_h)

            conf = out[..., 4]
            pred_cls = out[..., 5:]

            grid_x, grid_y = np.meshgrid(np.arange(feature_w), np.arange(feature_h))

            # cx, cy, w, h
            pred_boxes = torch.FloatTensor(out[..., :4].shape)
            pred_boxes[..., 0] = (torch.sigmoid(out[..., 0]) * 2.0 - 0.5 + grid_x) * stride_w  # cx
            pred_boxes[..., 1] = (torch.sigmoid(out[..., 1]) * 2.0 - 0.5 + grid_y) * stride_h  # cy
            pred_boxes[..., 2:4] = (torch.sigmoid(out[..., 2:4]) * 2) ** 2 * anchor_grid[index]  # wh

            conf = torch.sigmoid(conf)
            pred_cls = torch.sigmoid(pred_cls)

            output = torch.cat((pred_boxes.view(batch_size, -1, 4),
                                conf.view(batch_size, -1, 1),
                                pred_cls.view(batch_size, -1, num_classes)),
                               -1)
            boxs.append(output)

        outputx = torch.cat(boxs, 1)
        # NMS
        batch_detections = w_non_max_suppression(outputx, num_classes, conf_thres=0.4, nms_thres=0.3)

    return batch_detections

In [27]:
fp_img = fp_yolov5 + '/inference/images/zidane.jpg'
response = detect_onnx(official=True, image_path=fp_img)
response

(720, 1280, 3)


ValueError: axes don't match array