In [1]:
from ultralytics import YOLO

In [2]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

2.6.0+cu126
True


In [None]:
import cv2

# Load the YOLOv8 Pose Model
model = YOLO("models/yolo11l-pose.pt")

# Load Video
input_video_path = "data/sample_video.mp4"
output_video_path = "outputs/test.mp4"

# Open the video file
cap = cv2.VideoCapture(input_video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define a VideoWriter to save the output
fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

xy_list = []
xyn_list = []

# Process Each Frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break  # Exit loop if no more frames

    # Run YOLO Pose Estimation on Frame
    results = model(frame)
    # Access the results
    for result in results:
        xy = result.keypoints.xy  # x and y coordinates
        xyn = result.keypoints.xyn  # normalized

        xy_list.append(xy)
        xyn_list.append(xyn)

    # Visualize Pose Results
    annotated_frame = results[0].plot()  # Overlayed frame with pose keypoints

    # Write the processed frame to output video
    out.write(annotated_frame)

    # Optional: Display frame (Press 'q' to quit)
    # cv2.imshow("Pose Estimation", annotated_frame)
    # if cv2.waitKey(1) & 0xFF == ord("q"):
    #     break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Processed video saved to {output_video_path}")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt to 'models\yolo11x-pose.pt'...


100%|██████████| 113M/113M [00:02<00:00, 44.6MB/s] 



0: 640x480 1 person, 69.4ms
Speed: 2.7ms preprocess, 69.4ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 49.3ms
Speed: 3.6ms preprocess, 49.3ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 48.3ms
Speed: 2.2ms preprocess, 48.3ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 48.1ms
Speed: 2.2ms preprocess, 48.1ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 49.0ms
Speed: 3.2ms preprocess, 49.0ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 49.3ms
Speed: 2.2ms preprocess, 49.3ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 49.8ms
Speed: 2.3ms preprocess, 49.8ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 48.7ms
Speed: 2.6ms preprocess, 48.7ms inference, 2.3ms postprocess per image at shape (1, 3, 64

In [3]:
from extract_metrics import Get_Pose_Estimation

results = Get_Pose_Estimation(input_video_path = "data/sample_video.mp4", model_size = "l")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-pose.pt to 'models\yolo11l-pose.pt'...


100%|██████████| 50.7M/50.7M [00:01<00:00, 36.0MB/s]



0: 640x480 1 person, 106.1ms
Speed: 3.3ms preprocess, 106.1ms inference, 118.2ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 31.0ms
Speed: 3.1ms preprocess, 31.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 30.8ms
Speed: 2.1ms preprocess, 30.8ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 57.6ms
Speed: 2.2ms preprocess, 57.6ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 33.1ms
Speed: 2.7ms preprocess, 33.1ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 31.1ms
Speed: 2.4ms preprocess, 31.1ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 28.6ms
Speed: 2.0ms preprocess, 28.6ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 30.7ms
Speed: 2.2ms preprocess, 30.7ms inference, 2.6ms postprocess per image at shape (1, 3