In [5]:
# find YOLO Bounding Boxes in a video
import ultralytics
import cv2

# Load the model
model = ultralytics.YOLO('weights/yolo8nTablePlayer.pt')

FileNotFoundError: [Errno 2] No such file or directory: 'weights/yolo8nTablePlayer.pt'

In [None]:
import cv2
import json

def process_video_with_bboxes(video_path, model, output_video_path='output_video.mp4', bbox_output_path='bboxes.json'):
    cap = cv2.VideoCapture(video_path)

    # Check if the video opened successfully
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    # Get class names from the model
    class_names = model.names

    # To store bounding box data
    bbox_data = []

    # Loop through the video frames
    frame_id = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Perform inference on the frame
        results = model(frame)

        frame_bboxes = []

        # Extract bounding boxes and labels if there are any detections
        if len(results) > 0 and results[0].boxes is not None:
            bboxes = results[0].boxes.xyxy.cpu().numpy()  # x1, y1, x2, y2, confidence, class
            classes = results[0].boxes.cls.cpu().numpy()  # class IDs
            confidences = results[0].boxes.conf.cpu().numpy()  # confidences

            # Draw bounding boxes and labels on the frame
            for bbox, cls, conf in zip(bboxes, classes, confidences):
                x1, y1, x2, y2 = map(int, bbox[:4])
                label = f"{class_names[int(cls)]} {conf:.2f}"

                # Draw rectangle
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                # Put label text above the rectangle
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

                # Save bounding box data
                frame_bboxes.append({
                    'frame_id': frame_id,
                    'class': class_names[int(cls)],
                    'confidence': float(conf),
                    'bbox': [x1, y1, x2, y2]
                })

        # Append frame bounding boxes to bbox data
        if frame_bboxes:
            bbox_data.append(frame_bboxes)

        # Write the frame to the output video
        out.write(frame)

        # Display the frame with bounding boxes
        cv2.imshow('Frame', frame)

        # Break the loop if 'q' is pressed (commented out for Colab)
        # if cv2.waitKey(1) & 0xFF == ord('q'):
        #    break

        frame_id += 1

    # Release the video capture and writer objects
    cap.release()
    out.release()
    cv2.destroyAllWindows()

    # Save bounding box data to a file
    with open(bbox_output_path, 'w') as f:
        json.dump(bbox_data, f, indent=4)

    print("Video saved to:", output_video_path)
    print("Bounding box data saved to:", bbox_output_path)

In [7]:
video_path = 'videos/cut/MLTT.mp4'
process_video_with_bboxes(video_path, model, 'out/videos/MLTT_Y8_with_bboxes.mp4', 'out/data/MLTT_Y8_bbox.json')




0: 384x640 2 players, 1 table, 431.0ms
Speed: 1.7ms preprocess, 431.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 1 table, 425.3ms
Speed: 1.3ms preprocess, 425.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 1 table, 423.7ms
Speed: 1.3ms preprocess, 423.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 1 table, 419.5ms
Speed: 1.3ms preprocess, 419.5ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 1 table, 422.4ms
Speed: 1.1ms preprocess, 422.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 1 table, 416.3ms
Speed: 1.3ms preprocess, 416.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 1 table, 425.0ms
Speed: 2.0ms preprocess, 425.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 1 table, 419