# Object Detection with YoloV4 and TensorRT

CSI Camera is hw-acc encoding jpg frames

YoloV4 imported from ONNX.

TensorRT conversion is cached.
FP32 is cast to FP16 here.

TODO activate observer for object_detection

## Pipeline
- nvargus
- GStreamer 
`nvarguscamerasrc sensor-id=%d ! video/x-raw(memory:NVMM), width=%d, height=%d, format=(string)NV12, framerate=(fraction)%d/1 ! nvvidconv flip-method=0 !  nvjpegenc`
- manual / observer
- preprocessor
    - scaling and padding
    (camera size matches)
- TensorRT yoloV4
- postprocessor
    - threshold
    - intersection over union
    - nms
- jupyter image widget

In [1]:
!pip3 install wget

Collecting wget
  Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip
Building wheels for collected packages: wget
  Running setup.py bdist_wheel for wget ... [?25ldone
[?25h  Stored in directory: /root/.cache/pip/wheels/40/15/30/7d8f7cea2902b4db79e3fea550d7d7b85ecb27ef992b618f3f
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [3]:
from common import download_file

YOLOv4_FILE = '/nvdli-nano/data/yolov4.onnx'
YOLOv4_URL = 'https://media.githubusercontent.com/media/onnx/models/master/vision/object_detection_segmentation/yolov4/model/yolov4.onnx'

YOLOv4_ANCHORS_FILE = '/nvdli-nano/data/yolov4.anchors'
YOLOv4_ANCHORS_URL = 'https://raw.githubusercontent.com/onnx/models/master/vision/object_detection_segmentation/yolov4/dependencies/yolov4_anchors.txt'

COCO_NAMES_FILE = '/nvdli-nano/data/coco.names'
COCO_NAMES_URL = 'https://raw.githubusercontent.com/onnx/models/master/vision/object_detection_segmentation/yolov4/dependencies/coco.names'

display(
    download_file(YOLOv4_FILE, YOLOv4_URL)
)
display(
    download_file(YOLOv4_ANCHORS_FILE, YOLOv4_ANCHORS_URL)
)
display(
    download_file(COCO_NAMES_FILE, COCO_NAMES_URL)
)

'/nvdli-nano/data/yolov4.onnx'

'/nvdli-nano/data/yolov4.anchors'

'/nvdli-nano/data/coco.names'

In [8]:
ENGINE_FILE = '/nvdli-nano/data/yolov4.trt'

# FIXME https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work_dynamic_shapes
## network.get_input(0).shape = [1, 416, 416, 3]
YOLOv4_DIMS = (416, 416)

def build_engine(onnx_file_path, engine_file_path):
    import tensorrt as trt
    from onnx_to_tensorrt import TRT_LOGGER
    import common
    import os
    
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, builder.create_builder_config() as config, trt.OnnxParser(network, TRT_LOGGER) as parser:
        config.max_workspace_size = 1 << 28 # 256MiB
        builder.max_batch_size = 1
        if builder.platform_has_fast_int8:
            builder.int8_mode = True
            print('int8 mode enabled')
        elif builder.platform_has_fast_fp16:
            builder.fp16_mode = True
            print('fp16 mode enabled')
        elif build.platform_has_tf32:
            builder.tf32_mode = True
            print('tf32 mode enabled')
        else:
            print('platform has no int8 or fp16 support')

        # Parse model file
        if not os.path.exists(onnx_file_path):
            print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
            exit(0)
        print('Loading ONNX file from path {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            print('Beginning ONNX file parsing')
            if not parser.parse(model.read()):
                print ('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print (parser.get_error(error))
                return None
        # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
        network.get_input(0).shape = [1, 416, 416, 3] # ORIG [1, 3, 608, 608]
        print('Completed parsing of ONNX file')
        print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
        engine = builder.build_engine(network, config)
        print("Completed creating Engine")
        with open(engine_file_path, "wb") as f:
            f.write(engine.serialize())
        return engine

# del engine
try: 
    engine
except NameError:
    from onnx_to_tensorrt import get_engine
    from common import allocate_buffers
    
    cfx = make_default_context()
    engine = build_engine(YOLOv4_FILE, ENGINE_FILE) # get_engine(YOLOv4_FILE, ENGINE_FILE)
    inputs, outputs, bindings, stream = allocate_buffers(engine)

fp16 mode enabled
Loading ONNX file from path /nvdli-nano/data/yolov4.onnx...
Beginning ONNX file parsing
Completed parsing of ONNX file
Building an engine from file /nvdli-nano/data/yolov4.onnx; this may take a while...
Completed creating Engine


In [4]:
from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES

try: preprocessor
except NameError:
    preprocessor = PreprocessYOLO(YOLOv4_DIMS)

def reshape_output(trt_output):
    if len(trt_output) % (52*52) == 0:
        return trt_output.reshape(1, 52, 52, 3, 85)
    elif len(trt_output) % (26*26) == 0:
        return trt_output.reshape(1, 26, 26, 3, 85)
    elif len(trt_output) % (13*13) == 0:
        return trt_output.reshape(1, 13, 13, 3, 85)
    else:
        print('unknown trt_output size {}'.format(len(trt_output)))
        return []

def infer_from_camera(widget, camera):
    camera.running = False
    image = camera.read_image()
    infer_from_bytes(widget, image)
    
def infer_from_change(widget, change):
    # print("infering for widget {} with change {}".format(widget, change))
    infer_from_bytes(widget, change.new)

def infer_from_bytes(widget, change):
    from common import do_inference_v2
    from yolo4_inference import image_ppreprocess, postprocess_bbbox, postprocess_boxes, nms, draw_bbox, get_anchors
    import numpy as np

    cfx.push()
    
    ## size matches, jpeg encoded
    # image_raw, image_preprocessed = preprocessor.processLoaded(change)
    pil_img, pre_img, np_img = image_ppreprocess(change, YOLOv4_DIMS)

    with engine.create_execution_context() as context:
        inputs[0].host = pre_img
        trt_outputs = do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)            
        # fixed by applying preprocessors shuffle https://forums.developer.nvidia.com/t/yolo-v3-output-boxes-are-nan-both-in-python-and-c/142289/2

        ## print("Output shape:", list(map(lambda trt_output: trt_output.shape, trt_outputs)))
        trt_outputs_reshaped = list(map(reshape_output, trt_outputs))
        ## print("Output re-shape:", list(map(lambda trt_output: trt_output.shape, trt_outputs_reshaped)))

        ANCHORS = get_anchors(YOLOv4_ANCHORS_FILE)
        STRIDES = np.array([8, 16, 32])
        XYSCALE = [1.2, 1.1, 1.05]

        pred_bbox = postprocess_bbbox(trt_outputs_reshaped, ANCHORS, STRIDES, XYSCALE)
        pp_bboxes = postprocess_boxes(pred_bbox, YOLOv4_DIMS, YOLOv4_DIMS[0], score_threshold=0.2)
        bboxes = nms(pp_bboxes, iou_threshold=0.2, method='nms')

        # bboxes.append([0,0,YOLOv4_DIMS[0]//2, YOLOv4_DIMS[1]//2, 1, 56])
        # print('draw_bbox({}, {}, classes={})'.format(image_raw, bboxes, ALL_CATEGORIES))

        boxed_image = draw_bbox(np_img, bboxes, classes=ALL_CATEGORIES)
        widget.value = cv2.imencode('.jpg', boxed_image)[1].tobytes()
    cfx.pop()
        
def transform_image(image):
    return image.flatten().tobytes()


In [5]:
!ls -ltrh /dev/video*

crw-rw---- 1 root video 81, 3 Jun  9 12:55 /dev/video1
crw-rw---- 1 root video 81, 0 Jun  9 12:55 /dev/video0


In [6]:
import tensorrt
print('tensorrt %s' % tensorrt.__version__)

import cv2
print('cv2 %s' % cv2.__version__)

tensorrt 7.1.3.0
cv2 4.1.1


In [7]:
from jetcam.csi_camera import CSICamera

class MyCamera(CSICamera):
    def __init__(self, *args, **kwargs):
        super(MyCamera, self).__init__(*args, **kwargs)

    ## https://forums.developer.nvidia.com/t/example-gstreamer-pipeline-to-test-li-imx219-mipi-ff-nano-on-the-jetson-nano/72226
    # gst-launch-1.0 nvarguscamerasrc ! ‘video/x-raw(memory:NVMM),width=1024, height=768, framerate=120/1, format=NV12’ ! nvvidconv flip-method=0 ! nvegltransform ! nveglglessink -e
    def _gst_str(self):
        return 'nvarguscamerasrc sensor-id=%d ! video/x-raw(memory:NVMM), width=%d, height=%d, format=(string)NV12, framerate=(fraction)%d/1 ! nvvidconv flip-method=0 !  nvjpegenc ! appsink' % (
                self.capture_device, self.capture_width, self.capture_height, self.capture_fps) # , self.width, self.height
    
    def release(self):
        self.cap.release()
        self.cap = None
        
    def read_image(self):
        return transform_image(self.read())
    
def update_image(image_widget, change):
    image = transform_image(change['new'])
    image_widget.value = image


In [8]:
# https://www.waveshare.com/wiki/IMX219-83_Stereo_Camera
# Resolution: 3280 × 2464 (per camera)
WIDTH = YOLOv4_DIMS[0]
HEIGHT = YOLOv4_DIMS[1]
FPS = 1

try: cameras
except NameError:
    cameraLeft = MyCamera(capture_device=0, capture_width=WIDTH, capture_height=HEIGHT, capture_fps=FPS)
    cameraRight = MyCamera(capture_device=1, capture_width=WIDTH, capture_height=HEIGHT, capture_fps=FPS)

    cameras = [cameraLeft, cameraRight]
    
display(cameras)

[<__main__.MyCamera at 0x7f813b01d0>, <__main__.MyCamera at 0x7f813aba20>]

In [9]:
from importlib import reload
import yolo4_inference
reload(yolo4_inference)

import ipywidgets
from IPython.display import display

try: widgets
except NameError:
    widgets = []
    
if not widgets:
    for camera in cameras:
        camera.running = False
        image = camera.read_image()

        image_widget = ipywidgets.Image(format='jpeg')
        image_widget.value = image

        widgets.append(image_widget)
        
for camera, widget in zip(cameras, widgets):
    infer_from_camera(widget, camera)
    camera.running = False
    display(widget)

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x0…

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x0…

In [10]:
from importlib import reload
import yolo4_inference
reload(yolo4_inference)

from functools import partial

try: 
    cameras[0].unobserve(obs, names=['value'])
    cameras[0].running=False
except NameError:
    pass
except ValueError:
    pass

#del obs
try: obs
except NameError:
    obs = partial(infer_from_change, widgets[0])

cameras[0].observe(obs, names=['value'])
cameras[0].running=True

print(obs)
print(cameras[0])

functools.partial(<function infer_from_change at 0x7f81423510>, Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x01\x0...', format='jpeg'))
<__main__.MyCamera object at 0x7f813b01d0>


In [11]:
return  

from functools import partial

try: observers
except NameError:
    observers = []
    
if len(observers) < len(widgets):
    for (camera, widget) in zip(cameras, widgets):
        # observer = partial(update_image, widget)
        observer = partial(infer_from_change, widget)
        camera.observe(observer, names='value')

        camera.running = True

        observers.append(observer)

display(observers)

SyntaxError: 'return' outside function (<ipython-input-11-4371bf926264>, line 1)

In [None]:
for (camera, observer) in zip(cameras, observers):
    try: camera.unobserve(observer, names='value')
    except ValueError as err: 
        print('observer %s already unregistered: %s' % (observer, err))
    observers.remove(observer)
    camera.running = False
    
display(observers)

In [None]:
import cv2

releaseCams = True

if releaseCams:
    for camera in cameras:
        camera.release()
        cameras.remove(camera)
cv2.destroyAllWindows()        
        
display(cameras)