In [1]:
import cv2
import numpy as np
import os

In [2]:
image_sequence_path = "./data/sequence"

In [3]:
yolo_weights_path = "./yolov3/yolov3.weights"
yolo_config_path = "./yolov3/yolov3.cfg"

net = cv2.dnn.readNetFromDarknet(yolo_config_path, yolo_weights_path)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

layer_names = net.getLayerNames()
layer_names = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

labels = []
labels_path = "./yolov3/coco.names"
with open(labels_path, 'rt') as f:
     labels = f.read().rstrip('\n').split('\n')
        
np.random.seed(42)
label_colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")

In [4]:
image = cv2.imread("./data/sequence/000001.jpg")
height, width, _ = image.shape

blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
layer_outputs = net.forward(layer_names)
layer_outputs

boxes = []
confidences = []
class_ids = []

for output in layer_outputs:
    for detection in output:
        scores = detection[5:]
        class_id = np.argmax(scores)
        if labels[class_id] != "person":
            continue
            
        confidence = scores[class_id]
        if confidence > 0.5:
            box = detection[0:4] * np.array([width, height, width, height])
            center_x, center_y, box_width, box_height = box.astype("int")            
            x = int(center_x - box_width / 2)
            y = int(center_y - box_height / 2)            
            boxes.append([x, y, int(box_width), int(box_height)])
            confidences.append(float(confidence))
            class_ids.append(class_id)
            
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

In [5]:
if len(indexes) > 0:
    for i in indexes.flatten():
        x, y = (boxes[i][0], boxes[i][1])
        w, h = (boxes[i][2], boxes[i][3])
        color = [int(c) for c in label_colors[class_ids[i]]]
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        text = "{}: {:.4f}".format(labels[class_ids[i]], confidences[i])
        cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

cv2.imshow("Image", image)
cv2.waitKey(0)

107

In [21]:
cap = cv2.VideoCapture(f"{image_sequence_path}/%06d.jpg", cv2.CAP_IMAGES)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('video.avi', fourcc, 10.0, (frame_width*2, frame_height*2))
while (cap.isOpened()):
    ret, frame = cap.read()
    if frame is None:
        break
    resized_frame = cv2.resize(frame, (frame_width*2, frame_height*2))
    out.write(resized_frame)

cap.release()
out.release()
cv2.destroyAllWindows()