In [1]:
!pip install yt-dlp opencv-python numpy scikit-image ultralytics

Collecting yt-dlp
  Downloading yt_dlp-2025.3.31-py3-none-any.whl.metadata (172 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/172.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m163.8/172.2 kB[0m [31m5.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.2/172.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics
  Downloading ultralytics-8.3.114-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux201

In [3]:
import os
import cv2
import yt_dlp
import numpy as np
from ultralytics import YOLO

# Folder setup
VIDEO_FOLDER = "youtube_videos"
FRAME_FOLDER = "frames"
ANNOTATED_FOLDER = "annotated_frames"

os.makedirs(VIDEO_FOLDER, exist_ok=True)
os.makedirs(FRAME_FOLDER, exist_ok=True)
os.makedirs(ANNOTATED_FOLDER, exist_ok=True)

# Step 1: Download YouTube video
def download_youtube_videos(video_urls):
    ydl_opts = {"format": "best", "outtmpl": f"{VIDEO_FOLDER}/%(id)s.%(ext)s"}
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download(video_urls)

# Step 2: Extract frames every N frames
def extract_frames(video_folder, frame_folder, frame_interval=30):
    for video_file in os.listdir(video_folder):
        if video_file.endswith(".mp4"):
            video_path = os.path.join(video_folder, video_file)
            video_id = video_file.split(".")[0]
            cap = cv2.VideoCapture(video_path)
            frame_count = 0
            success, frame = cap.read()
            while success:
                if frame_count % frame_interval == 0:
                    frame_filename = f"{frame_folder}/{video_id}_frame{frame_count}.jpg"
                    cv2.imwrite(frame_filename, frame)
                success, frame = cap.read()
                frame_count += 1
            cap.release()

# Function to detect green field (playground)
def detect_green_field(image):
    # Convert to HSV color space
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Define range for green color (these values might need adjustment)
    lower_green = np.array([35, 50, 50])
    upper_green = np.array([85, 255, 255])

    # Threshold the HSV image to get only green colors
    mask = cv2.inRange(hsv, lower_green, upper_green)

    # Find contours of the green areas
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return None

    # Find the largest green area (assuming that's the field)
    largest_contour = max(contours, key=cv2.contourArea)

    # Create a mask for the field
    field_mask = np.zeros_like(mask)
    cv2.drawContours(field_mask, [largest_contour], -1, 255, thickness=cv2.FILLED)

    return field_mask

# Step 3: Detect and annotate only players on the field
model = YOLO("yolov8n.pt")

def annotate_players_on_field(frame_folder, save_folder):
    os.makedirs(save_folder, exist_ok=True)
    for frame_file in sorted(os.listdir(frame_folder)):
        frame_path = os.path.join(frame_folder, frame_file)
        image = cv2.imread(frame_path)
        if image is None:
            continue

        # Detect green field
        field_mask = detect_green_field(image)
        if field_mask is None:
            continue  # Skip frames where we can't detect the field

        result = model(image)[0]
        for box in result.boxes:
            cls_id = int(box.cls)
            if cls_id == 0:  # 'person' class
                x1, y1, x2, y2 = map(int, box.xyxy[0])

                # Check if the center of the bounding box is on the field
                center_x = (x1 + x2) // 2
                center_y = (y1 + y2) // 2

                if field_mask[center_y, center_x] > 0:  # Player is on the field
                    label = f"Player"
                    color = (0, 255, 0)  # Green
                    cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(image, label, (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        save_path = os.path.join(save_folder, frame_file)
        cv2.imwrite(save_path, image)

# RUN THE PIPELINE

# Step 1: Replace the video URL below
video_urls = ["https://www.youtube.com/watch?v=VT61OMrFKLM&t=200s"]

download_youtube_videos(video_urls)                         # Download video
extract_frames(VIDEO_FOLDER, FRAME_FOLDER, frame_interval=15)  # Extract frames
annotate_players_on_field(FRAME_FOLDER, ANNOTATED_FOLDER)        # Annotate all players

[youtube] Extracting URL: https://www.youtube.com/watch?v=VT61OMrFKLM&t=200s
[youtube] VT61OMrFKLM: Downloading webpage
[youtube] VT61OMrFKLM: Downloading tv client config
[youtube] VT61OMrFKLM: Downloading player 6450230e-main
[youtube] VT61OMrFKLM: Downloading tv player API JSON
[youtube] VT61OMrFKLM: Downloading ios player API JSON
[youtube] VT61OMrFKLM: Downloading m3u8 information
[info] VT61OMrFKLM: Downloading 1 format(s): 18
[download] youtube_videos/VT61OMrFKLM.mp4 has already been downloaded
[download] 100% of   59.74MiB

0: 384x640 8 persons, 338.1ms
Speed: 2.4ms preprocess, 338.1ms inference, 37.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 260.2ms
Speed: 2.8ms preprocess, 260.2ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 235.7ms
Speed: 2.2ms preprocess, 235.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 237.3ms
Speed: 2.6ms preprocess, 237.3ms inference, 

In [4]:
def create_annotated_video(frame_folder, output_path, fps=30):
    # Get sorted list of frames
    frame_files = sorted([f for f in os.listdir(frame_folder) if f.endswith(".jpg")])
    if not frame_files:
        print("No frames found to create video.")
        return

    # Read first frame to get dimensions
    first_frame = cv2.imread(os.path.join(frame_folder, frame_files[0]))
    height, width, layers = first_frame.shape

    # Define video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use 'XVID' for .avi
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Write frames to video
    for frame_file in frame_files:
        frame_path = os.path.join(frame_folder, frame_file)
        frame = cv2.imread(frame_path)
        if frame is not None:
            out.write(frame)

    out.release()
    print(f"Video saved to {output_path}")


In [5]:
ANNOTATED_VIDEO_PATH = "annotated_output.mp4"
create_annotated_video(ANNOTATED_FOLDER, ANNOTATED_VIDEO_PATH, fps=2)


Video saved to annotated_output.mp4
