In [18]:
import numpy as np
import imutils
import time
import cv2
import os

In [19]:
label_path = 'data/coco.names'
config_path = 'data/yolov3-tiny.cfg'
weights_path = 'data/yolov3-tiny.weights'
writer_path = '../../../Desktop/yolo_test.avi'

# Import labels & color setup
labels = open(label_path).read().strip().split("\n")
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")

# Load model
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Parameters setup
conf_threshold = 0.5    # Confidence threshold
nms_threshold = 0.4    # Non-maximum suppression threshold

# Video resolution
cap = cv2.VideoCapture(0)
grabbed, frame = cap.read()
(frame_height, frame_width) = frame.shape[:2]
cap.release()

[INFO] loading YOLO from disk...


In [20]:
def process_outputs(outputs, frame_width, frame_height, conf_threshold, nms_threshold):
    # Reset bounding boxes, confidences
    boxes = []
    confidences = []
    class_ids = []
    
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            
            if confidence > conf_threshold:
                # Scale bounding boxes back to frame
                box = detection[0:4] * np.array([frame_width, frame_height, frame_width, frame_height])
                (center_x, center_y, width, height) = box.astype("int")
                
                # Upper-left corner
                x = int(center_x - (width / 2))
                y = int(center_y - (height / 2))
                
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    
    # Apply non-maxima suppression
    selected = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    
    return boxes, confidences, class_ids, selected

In [21]:
def draw_predictions(frame, boxes, confidences, class_ids, selected, labels, colors):
    if len(selected) > 0:
        for i in selected.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            
            color = [int(c) for c in colors[class_ids[i]]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = "{}: {:.2f}".format(labels[class_ids[i]], confidences[i])
            cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1)
    
    return frame

In [22]:
# Capture from default camera
cap = cv2.VideoCapture(0)

fourcc = cv2.VideoWriter_fourcc(*"MJPG")
writer = cv2.VideoWriter(writer_path, fourcc, 10, (frame.shape[1], frame.shape[0]), True)

while(True):
    start = time.time()
    
    # Capture frame-by-frame
    grabbed, frame = cap.read()
    
    # Transform frame in 416x416 blob
    blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), swapRB=True, crop=False)
    
    # Forward pass
    net.setInput(blob)
    outputs = net.forward(ln)
    
    # Post-processing
    boxes, confidences, class_ids, selected = process_outputs(outputs, frame_width, frame_height, conf_threshold, nms_threshold)
    frame = draw_predictions(frame, boxes, confidences, class_ids, selected, labels, colors)
    
    end = time.time()
    fps = str(round(1/(end-start))) + ' fps'
    cv2.putText(frame, fps, (5, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.4, [255, 255, 255], 1)

    # Display the frame
    cv2.imshow('frame', frame)

#     writer.write(frame)
    
    # Pause code for 1 ms and check key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        
# Release capture
writer.release()
cap.release()
cv2.destroyAllWindows()