In [None]:
import cv2 as cv
import numpy as np
import va
from ipywidgets import interact
import IPython

# Object detection con YOLO v3

In [None]:
img = cv.imread('images/tbbt.jpg')
va.show(img)

In [None]:
path = 'yolov3.'
net = cv.dnn.readNet(path + 'weights', path + 'cfg')

In [None]:
height, width = img.shape[:2]
size = max(height, width)
bh, bw = (size - height) // 2, (size - width) // 2
padded_img = cv.copyMakeBorder(img, bh, size-height-bh, bw, size-width-bw, cv.BORDER_CONSTANT, (0,0,0))
va.show(padded_img)

In [None]:
blob = cv.dnn.blobFromImage(padded_img, 1.0 / 255, (416, 416), swapRB = True)
blob.shape

In [None]:
va.show(*blob[0])

In [None]:
net.setInput(blob)
output_names = net.getUnconnectedOutLayersNames()
print(output_names)

In [None]:
out = net.forward(output_names)

In [None]:
Classes = open(path + 'txt').read().split('\n')
Colors = cv.applyColorMap((np.arange(len(Classes)) * 43 % 256).astype(np.uint8)[...,np.newaxis], cv.COLORMAP_HSV).squeeze()

In [None]:
Classes

In [None]:
def detection(image, conf_threshold):    
    height, width = image.shape[:2]
    size = max(height, width)
    bh, bw = (size - height) // 2, (size - width) // 2
    padded_img = cv.copyMakeBorder(image, bh, size-height-bh, bw, size-width-bw, cv.BORDER_CONSTANT, (0,0,0))
    net.setInput(cv.dnn.blobFromImage(padded_img, 1.0 / 255, (416, 416), swapRB = True))

    detected_objects, confidences, class_indices = [], [], []
    for out in net.forward(net.getUnconnectedOutLayersNames()):
        for tx, ty, tw, th, p0, *scores in out:            
            if (confidence := float(scores[ (class_index := np.argmax(scores)) ])) > conf_threshold:
                x, y = int((tx - tw / 2) * size - bw), int((ty - th / 2) * size - bh)
                w, h = int(tw * size), int(th * size)
                detected_objects.append( (x, y, w, h) )
                confidences.append(confidence)
                class_indices.append(class_index)
    return detected_objects, confidences, class_indices

In [None]:
detection(img, 0.5)

In [None]:
def draw_detected_object(img, class_index, confidence, box):
    x, y, w, h = box
    label = f'{Classes[class_index]} ({confidence*100:.2f}%)'
    color, font = Colors[class_index].tolist(), cv.FONT_HERSHEY_PLAIN  
    (sx,sy), baseline = cv.getTextSize(label, font, 1, 1)
    cv.rectangle(img, (x,y), (x+w,y+h), color, 2)    
    cv.rectangle(img, (x,y), (x+max(w,sx), y+sy+5), color, -1)
    cv.putText(img, label, (x,y+baseline+6), font, 1, (255,255,255))

In [None]:
tmp = img.copy()
draw_detected_object(tmp, 42, 0.42, (42,42,199,99))
va.show(tmp)

In [None]:
def draw_detected_objects(image, detected_objects, confidences, class_indices, nms_threshold):         
    boxes = cv.dnn.NMSBoxes(detected_objects, confidences, 0, nms_threshold)
    if boxes is not None and type(boxes) is not tuple:
        boxes = boxes.ravel()
    for i in boxes:
        draw_detected_object(image, class_indices[i], confidences[i], detected_objects[i])

In [None]:
res = detection(img, 0.9)
tmp = img.copy()
draw_detected_objects(tmp, *res, 0.4)
va.show(tmp)

# Colleghiamo una webcam

In [None]:
# Connessione e configurazione webcam
def camera_open():
    cam = cv.VideoCapture(0)
    cam.set(cv.CAP_PROP_FRAME_WIDTH, 640)
    cam.set(cv.CAP_PROP_FRAME_HEIGHT, 480)
    for _ in range(10):
        cam.read()
    return cam

In [None]:
# Processing video eseguendo una funzione su ciascun frame: 
# l'esecuzione termina quando viene interrotto il kernel
def video_processing(processing_func):
    cam = camera_open()
    display_id = va.get_new_display_id()
    while True:
        try:   
            frame = cam.read()[1]
            img = processing_func(frame)
            va.show((img), display_id=display_id)
        except KeyboardInterrupt:
            cam.release()
            break

In [None]:
def f(frame):
    img = frame.copy()
    draw_detected_objects(img, *detection(frame, 0.4), 0.6)    
    return img

In [None]:
video_processing(f)