## Yolo test

LVA prep - Yolov3 Test

### prep images

> You need GPU enabled machine to run yolo

get yolov3 image from microsoft and run it

```
docker pull mcr.microsoft.com/lva-utilities/lvaextension:http-yolov3-onnx-v1.0
docker tag mcr.microsoft.com/lva-utilities/lvaextension:http-yolov3-onnx-v1.0 yolov3:latest

docker run --name yolo -p 8088:80 -d yolov3
```

In [None]:
# test image (416x416)

!curl -s -X POST -H "Content-Type:image/jpeg" --data-binary @"./sample/people.jpg" localhost:8088/score | jq

In [None]:
import requests
import json
from PIL import Image
import cv2
from io import BytesIO

In [None]:
testimg = './sample/people.jpg'

img = cv2.imread(testimg, cv2.IMREAD_COLOR)
img = cv2.resize(img, (416, 416), interpolation = cv2.INTER_AREA)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
pilimg = Image.fromarray(img)
display(pilimg)

In [None]:
yolov3_url = "http://localhost:8088/score"
headers = {"Content-Type": "image/jpeg" }

with open(testimg, 'rb') as roi:
    r = requests.post(yolov3_url, headers=headers, data=roi)
    
    pred = json.loads(r.content.decode("utf-8"))

In [None]:
for p in pred["inferences"]:
    print("%s %.4f (%.2f, %.2f, %.2f, %.2f)" % (p["entity"]["tag"]["value"], p["entity"]["tag"]["confidence"],
        p["entity"]["box"]["l"], p["entity"]["box"]["t"], p["entity"]["box"]["w"], p["entity"]["box"]["h"]))

In [None]:
testimg = './sample/people.jpg'

img = cv2.imread(testimg, cv2.IMREAD_COLOR)
img = cv2.resize(img, (416, 416), interpolation = cv2.INTER_AREA)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

for p in pred["inferences"]:
    x = int(p["entity"]["box"]["l"] * 416 + 0.5)
    y = int(p["entity"]["box"]["t"] * 416 + 0.5)
    w = int(p["entity"]["box"]["w"] * 416 + 0.5)
    h = int(p["entity"]["box"]["h"] * 416 + 0.5)
    
    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 1) 
    
pilimg = Image.fromarray(img)
display(pilimg)

## Yolo video test

Video capture from camera or rtsp souce

- show list of camera device in linux (for selecting front/rear cam)
```
v4l2-ctl --list-devices
```

In [None]:
import requests
import json
from PIL import Image
import cv2
from io import BytesIO

import time
import io

In [None]:
def yolov3(frame):
    yolov3_url = "http://localhost:8088/score"
    headers = {"Content-Type": "image/jpeg" }

    try:
        r = requests.post(yolov3_url, headers=headers, data=frame)
        j = json.loads(r.content.decode("utf-8"))
        return j
    except:
        return json.loads("{\"inferences\": []}")

note: `cv2.VideoCapture` is numpy `ndarray`

```
import numpy as np
print(frame.shape)
```

In [None]:
### capture camera
cap = cv2.VideoCapture(0)
# cap = cv2.VideoCapture(rtsp://<ip>)
print('width: {0}, height: {1}'.format(cap.get(3),cap.get(4)))
cap.set(3,416)
cap.set(4,416)

prev_time = 0
FPS = 30
fbuff = None
response = None

while(True):
    ret, frame = cap.read()
    curr_time = time.time() - prev_time
    if (ret) and (curr_time > 1./FPS):
        prev_time = time.time()
        
        #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)       
        fbuff = cv2.resize(frame, (416, 416), interpolation =cv2.INTER_AREA)
        
        # encode to jpg
        is_success, buffer = cv2.imencode(".jpg", fbuff)
        io_buf = io.BytesIO(buffer)
        pred = yolov3(io_buf)

        # highlights
        for p in pred["inferences"]:
            x = int(p["entity"]["box"]["l"] * 416 + 0.5)
            y = int(p["entity"]["box"]["t"] * 416 + 0.5)
            w = int(p["entity"]["box"]["w"] * 416 + 0.5)
            h = int(p["entity"]["box"]["h"] * 416 + 0.5)
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 1) 
            cv2.putText(frame, "%s" % p["entity"]["tag"]["value"], (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.imshow('frame', frame)
            
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


In [None]:
### capture camera
cap = cv2.VideoCapture(0)
# cap = cv2.VideoCapture(rtsp://<ip>)
print('width: {0}, height: {1}'.format(cap.get(3),cap.get(4)))
cap.set(3,416)
cap.set(4,416)

fbuff = None
response = None

while(True):
    ret, frame = cap.read()

    if (ret):
       
        #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)       
        fbuff = cv2.resize(frame, (416, 416), interpolation =cv2.INTER_AREA)
               
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break
        elif key & 0xFF == ord('p'):
        # encode to jpg
            is_success, buffer = cv2.imencode(".jpg", fbuff)
            io_buf = io.BytesIO(buffer)
            pred = yolov3(io_buf)

            # highlights
            for p in pred["inferences"]:
                x = int(p["entity"]["box"]["l"] * 416 + 0.5)
                y = int(p["entity"]["box"]["t"] * 416 + 0.5)
                w = int(p["entity"]["box"]["w"] * 416 + 0.5)
                h = int(p["entity"]["box"]["h"] * 416 + 0.5)
                cv2.rectangle(fbuff, (x, y), (x+w, y+h), (255, 0, 0), 1) 
                cv2.putText(fbuff, "%s" % p["entity"]["tag"]["value"], (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        
                pilimg = Image.fromarray(fbuff)
                display(pilimg)

        cv2.imshow('frame', frame)     

cap.release()
cv2.destroyAllWindows()