In [1]:
import cv2
import numpy as np

# Load the YOLO model
net = cv2.dnn.readNet("/home/greg/Documents/yolov3_final.weights", "/home/greg/Documents/yolov3.cfg")

# For getting names of all layers
layer_names = net.getLayerNames()

# For getting only detection layers i.e. 'yolo' layers
output_layers_indices = net.getUnconnectedOutLayers().flatten()
output_layers = [layer_names[i - 1] for i in output_layers_indices]

# Load video
cap = cv2.VideoCapture("/home/greg/Documents/video.mp4")

# Initialize VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output_video.avi', fourcc, 30.0, (416, 416))

while(cap.isOpened()):
    ret, frame = cap.read()

    if not ret or frame is None:
        print("Video has ended or couldn't read frame.")
        break
    
    # Resize frame
    scale_percent = 50  # Percentage by which to scale down the video
    width = int(frame.shape[1] * scale_percent / 100)
    height = int(frame.shape[0] * scale_percent / 100)
    frame = cv2.resize(frame, (width, height))
    
    # Prepare the frame for object detection
    height, width, channels = frame.shape
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    
    # Forward pass
    outs = net.forward(output_layers)

    # Information to show on the object (class id, confidence, bounding box coordinates)
    class_ids = []
    confidences = []
    boxes = []
    
    for detection_layer_output in outs:
        for detection in detection_layer_output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
                
    # Apply non-max suppression
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Load class names
    with open("/home/greg/Desktop/Capstone_Project_Current/Data_Visualization/coco.names", "r") as f:
        classes = [line.strip() for line in f.readlines()]

    # Draw bounding boxes and labels
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = confidences[i]
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(frame, label + " " + str(round(confidence, 2)), (x, y + 30), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
            
    # Write frame to output video
    out.write(frame)

    # Display the resulting frame
    cv2.imshow('Frame', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release everything
cap.release()
out.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 