Setup:

In [8]:
import cv2
import numpy as np
from IPython.display import Video
import matplotlib.pyplot as plt
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

testFile = "TestGoal.mp4"
target_fps = 10

Video("TestGoal.mp4")

Converting to 10 fps:

In [9]:
cap = cv2.VideoCapture(testFile)

# Get the original FPS and dimensions
original_fps = cap.get(cv2.CAP_PROP_FPS)
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Calculate how many frames we skip between each frame we keep
# (rounding to ensure we don't miss frames due to float precision)
frame_interval = int(round(original_fps / target_fps))

# Define output codec and create VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'avc1')
out = cv2.VideoWriter('fpsAdjusted.mp4', fourcc, target_fps, (width, height))

frame_index = 0
written_frames = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Write only every "frame_interval"-th frame
    if frame_index % frame_interval == 0:
        out.write(frame)
        written_frames += 1
    
    frame_index += 1

cap.release()
out.release()

Video("fpsAdjusted.mp4")

Finding the Ball:

In [10]:
# Load YOLOv8 model.
model = YOLO("yolov8x.pt")  # Use the nano model (or choose another)

# Open your video.
video_path = "fpsAdjusted.mp4"
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error opening video:", video_path)
    exit()

# Read the first frame to get dimensions.
ret, frame = cap.read()
if not ret:
    cap.release()
    exit("Error reading video frame.")

# Define width, height, and (optionally) target fps.
height, width = frame.shape[:2]
target_fps = cap.get(cv2.CAP_PROP_FPS)  # you can adjust if you like

# Parameters for detection.
detection_threshold = 0.5  # Only accept detections with confidence > 0.8
ball_class = "sports ball"  # Label expected in YOLO. (COCO uses "sports ball")
center = None
circle_radius = None
frame_idx = 0

# Loop through frames until we get a high-confidence detection.
while True:
    ret, frame = cap.read()
    if not ret:
        break  # Reached end of video without detection

    # Run YOLO inference on the frame.
    # (Set a lower conf here to not miss detections; we filter after.)
    results = model(frame, conf=0.5, verbose=False)
    
    # Iterate through detections.
    for box in results[0].boxes:
        cls_id = int(box.cls[0].item())
        conf = box.conf[0].item()
        label = model.names[cls_id] if hasattr(model, "names") else str(cls_id)
        
        # Check for the ball with high confidence.
        if label == ball_class and conf >= detection_threshold:
            # Extract bounding box coordinates (xyxy format).
            x_min, y_min, x_max, y_max = box.xyxy[0].tolist()
            # Compute the center as the midpoint of the bounding box.
            center_x = (x_min + x_max) / 2
            center_y = (y_min + y_max) / 2
            center = (int(center_x), int(center_y))
            # Estimate the ball's radius (average half-width and half-height).
            width_box = x_max - x_min
            height_box = y_max - y_min
            circle_radius = int((width_box + height_box) / 4)
            print(f"Ball detected at frame {frame_idx} with confidence {conf:.2f}")
            print("Initial center:", center, "Initial radius:", circle_radius)
            break
    if center is not None:
        break
    frame_idx += 1


Ball detected at frame 3 with confidence 0.61
Initial center: (2793, 1487) Initial radius: 18


Ball Tracking

In [11]:
windowWidth, windowHeight = width // 10, height // 10
threshold = 25.0               # Color similarity threshold (Euclidean distance)

fourcc = cv2.VideoWriter_fourcc(*'avc1')
out = cv2.VideoWriter("ball_tracked.mp4", fourcc, target_fps, (width, height))

# Create an initial mask and compute the initial average color.
mask_init = np.zeros(frame.shape[:2], dtype=np.uint8)
cv2.circle(mask_init, center, circle_radius, 255, thickness=-1)
avg_color = cv2.mean(frame, mask=mask_init)[:3]  # (B, G, R)
avg_color = np.array(avg_color, dtype=float)

# --- Process Each Frame ---
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Define the window boundaries (ensure they remain within image limits).
    start_x = max(center[0] - windowWidth, 0)
    end_x   = min(center[0] + windowWidth, width)
    start_y = max(center[1] - windowHeight, 0)
    end_y   = min(center[1] + windowHeight, height)

    similar_coords = []  # List for storing coordinates of pixels that meet the threshold.
    for y in range(start_y, end_y):
        for x in range(start_x, end_x):
            # Compute Euclidean distance in RGB space using the original frame colors.
            diff = np.linalg.norm(frame[y, x].astype(float) - avg_color)
            if diff < threshold:
                similar_coords.append((x, y))
                
    # Update the average color and the center based on the similar pixels.
    if similar_coords:
        # Build a proper mask for the similar pixels.
        mask_similar = np.zeros(frame.shape[:2], dtype=np.uint8)
        for (x, y) in similar_coords:
            mask_similar[y, x] = 255

        # Compute the new average based on the pixels in this mask.
        new_avg = np.array(cv2.mean(frame, mask=mask_similar)[:3], dtype=float)

        # Calculate learning rate based on the number of similar pixels relative to window area.
        n = len(similar_coords)
        window_area = (end_x - start_x) * (end_y - start_y)
        ratio = (float(n) / window_area)
        lr = ratio * ratio * ratio

        # Update the average color with the weighted combination.
        avg_color = (1 - lr) * avg_color + lr * new_avg
        # Compute the centroid of the similar pixels.
        similar_coords = np.array(similar_coords)
        centroid_x = int(np.mean(similar_coords[:, 0]))
        centroid_y = int(np.mean(similar_coords[:, 1]))
        center = (centroid_x, centroid_y)

        # Draw the final red circle (with radius 20) at the computed centroid.
        cv2.circle(frame, center, circle_radius, (0, 0, 255), thickness=2)
    # Write the processed frame to the output video.
    out.write(frame)

cap.release()
out.release()

Video("ball_tracked.mp4")

Pitch Line tracking:

In [14]:
import cv2
import numpy as np

# Open the video
video_path = "ball_tracked.mp4"
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error opening video:", video_path)
    exit()

# Read the first frame to get dimensions.
ret, frame = cap.read()
if not ret:
    cap.release()
    exit("Error reading video frame.")

height, width = frame.shape[:2]
fps = cap.get(cv2.CAP_PROP_FPS)

# ---- Calculate scaling factor based on a 4k base resolution (3840x2160) ----
BASE_WIDTH = 3840
scalingFactor = width / BASE_WIDTH

# ---- Scale parameters accordingly ----
# These values were tuned for a 4k video. Now they are adjusted relative to the current resolution.
minLineLength_scaled = int(500 * scalingFactor)
maxLineGap_scaled    = int(100 * scalingFactor)
hough_threshold_scaled = int(700 * scalingFactor)

# For the morphological kernel, scale the kernel size.
# We make sure the kernel size remains at least 3 and odd (if needed) for proper morphology operations.
kernel_size = max(3, int(3 * scalingFactor))
if kernel_size % 2 == 0:
    kernel_size += 1

# Set up VideoWriter to save the processed video.
fourcc = cv2.VideoWriter_fourcc(*'avc1')
out = cv2.VideoWriter("pitch_lines.mp4", fourcc, fps, (width, height))

# Reset the capture to the beginning.
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# ---- Process Each Frame in the Video ----
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to HSV color space.
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Define the HSV range for white colors.
    lower_white = np.array([20, 20, 140])
    upper_white = np.array([100, 100, 255])
    mask_white = cv2.inRange(hsv, lower_white, upper_white)

    # Apply morphological operations to reduce noise.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
    mask_clean = cv2.morphologyEx(mask_white, cv2.MORPH_CLOSE, kernel, iterations=2)
    
    # Apply Canny edge detection on the cleaned mask.
    edges = cv2.Canny(mask_clean, 50, 100)

    # Detect lines using the probabilistic Hough transform with scaled parameters.
    lines = cv2.HoughLinesP(mask_clean,
                            rho=1.0,
                            theta=np.pi/360,
                            threshold=hough_threshold_scaled,
                            minLineLength=minLineLength_scaled,
                            maxLineGap=maxLineGap_scaled)

    # If lines are found, draw them on the frame.
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            cv2.line(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
    
    # Write the processed frame to the output video.
    out.write(frame)

# Release the resources.
cap.release()
out.release()

# Optionally, play the video (depending on your environment)
Video("pitch_lines.mp4")


Goal Post detection

In [6]:
import cv2
import numpy as np

# Open the video
video_path = "fpsAdjusted.mp4"
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error opening video:", video_path)
    exit()

# Read the first frame to get dimensions.
ret, frame = cap.read()
if not ret:
    cap.release()
    exit("Error reading video frame.")

height, width = frame.shape[:2]
fps = cap.get(cv2.CAP_PROP_FPS)

# ---- Calculate scaling factor based on a 4k base resolution (3840x2160) ----
BASE_WIDTH = 3840
scalingFactor = width / BASE_WIDTH

# ---- Scale parameters accordingly ----
rho_scaled = 1.0 * scalingFactor                  # Scale rho parameter
hough_threshold_scaled = int(100 * scalingFactor)   # Scale Hough threshold
minLineLength_scaled = int(500 * scalingFactor)       # Scale minimum line length
maxLineGap_scaled = int(100 * scalingFactor)          # Scale maximum line gap
line_thickness_scaled = max(1, int(50 * scalingFactor))  # Scale line thickness

# For the morphological kernel, scale the kernel size.
kernel_size = max(3, int(3 * scalingFactor))
if kernel_size % 2 == 0:  # Ensure an odd kernel size if needed
    kernel_size += 1

# Set up VideoWriter to save the processed video.
fourcc = cv2.VideoWriter_fourcc(*'avc1')
out = cv2.VideoWriter("goalpost.mp4", fourcc, fps, (width, height))

# Reset the capture to the beginning.
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# ---- Process Each Frame in the Video ----
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to HSV color space.
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Define a broad range for white colors.
    lower_white = np.array([0, 0, 150])
    upper_white = np.array([200, 50, 255])
    mask_white = cv2.inRange(hsv, lower_white, upper_white)

    # Apply morphological operations to reduce noise in the mask.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
    mask_clean = cv2.morphologyEx(mask_white, cv2.MORPH_CLOSE, kernel, iterations=2)
    
    # Apply Canny edge detection on the cleaned mask.
    edges = cv2.Canny(mask_clean, 50, 100)

    # Detect lines in the edge image using probabilistic Hough transform with scaled parameters.
    lines = cv2.HoughLinesP(edges,
                            rho=rho_scaled,
                            theta=np.pi/180,
                            threshold=hough_threshold_scaled,
                            minLineLength=minLineLength_scaled,
                            maxLineGap=maxLineGap_scaled)

    # If lines are found, draw them on the frame.
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            cv2.line(frame, (x1, y1), (x2, y2), (255, 0, 0), line_thickness_scaled)
    
    # Write the processed frame to the output video.
    out.write(frame)
    # Optionally, if you want to see the edge map, you can convert and write it:
    # edges_bgr = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
    # out.write(edges_bgr)

# Release resources.
cap.release()
out.release()

# Optionally, display the processed video in your environment:
Video("goalpost.mp4")


Player Detection:

In [12]:
# Load YOLOv8 **segmentation model**
model = YOLO("yolo11x-seg.pt")  # or yolov8s-seg.pt, yolov8m-seg.pt, etc.

# Open video
video_path = "fpsAdjusted.mp4"
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise Exception("Error opening video")

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter("yolo_seg_ball_centered.mp4", cv2.VideoWriter_fourcc(*'avc1'), fps, (width, height))

# Dummy ball center (replace with actual tracking)
def get_ball_center(frame):
    return (width // 2, height // 2)

scalingFactor = width / BASE_WIDTH


CROP_SIZE = 800

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Get ball center and define crop area
    cx, cy = get_ball_center(frame)
    x1 = max(cx - CROP_SIZE // 2, 0)
    y1 = max(cy - CROP_SIZE // 2, 0)
    x2 = min(cx + CROP_SIZE // 2, width)
    y2 = min(cy + CROP_SIZE // 2, height)
    cropped_frame = frame[y1:y2, x1:x2]

    # Run instance segmentation on crop
    results = model(cropped_frame, task='segment', conf=0.2, iou=0.4, verbose=False)[0]

    # Loop over all detected masks
    if results.masks is not None:
        masks = results.masks.data.cpu().numpy()  # shape: [N, H, W]
        boxes = results.boxes.xyxy.cpu().numpy()
        classes = results.boxes.cls.cpu().numpy()
        confs = results.boxes.conf.cpu().numpy()

        for mask, box, cls, conf in zip(masks, boxes, classes, confs):
            if int(cls) != 0:
                continue  # skip non-persons

            # Resize mask to match crop location in full frame
            mask_resized = np.zeros((height, width), dtype=np.uint8)
            mask_bin = cv2.resize((mask * 255).astype(np.uint8), (x2 - x1, y2 - y1), interpolation=cv2.INTER_NEAREST)
            mask_resized[y1:y2, x1:x2] = mask_bin

            # Optional: color fill the player mask
            colored = cv2.bitwise_and(frame, frame, mask=mask_resized)
            frame = cv2.addWeighted(frame, 1.0, colored, 0.6, 0)

            # Draw green contour
            contours, _ = cv2.findContours(mask_resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            cv2.drawContours(frame, contours, -1, (0, 255, 0), 2)

            # Draw class label
            x1_box = int(box[0]) + x1
            y1_box = int(box[1]) + y1
            cv2.putText(frame, f"Player {conf:.2f}", (x1_box, y1_box - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
 
    # Optional: show crop window and ball
    cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 1)
    cv2.circle(frame, (cx, cy), 5, (0, 0, 255), -1)

    out.write(frame)

cap.release()
out.release()
cv2.destroyAllWindows()

Video("yolo_seg_ball_centered.mp4")