In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import DBSCAN


In [2]:
# Path to the video file
path = "robots.mp4"

In [3]:
# Parameters for good feature detection (finds good corners for tracking)
feature_params = dict(maxCorners=500, qualityLevel=0.1, minDistance=5, blockSize=7)

# Parameters for Lucas-Kanade Optical Flow (for tracking features across frames)
lk_params = dict(winSize=(31, 31), maxLevel=1, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 30, 0.01))

# Threshold for movement (in pixels). Points moving less than this will be ignored.
movement_threshold = 2 

# Threshold for the clustering algorithm (maximum distance between points to form a cluster)
distance_threshold = 150  # Adjust this value based on your scene

# Sparse Optical Flow

In [4]:
# Load the video
cap = cv2.VideoCapture(path)

# Read the first frame of the video
ret, old_frame = cap.read()

if ret:
    # Convert the first frame to grayscale (necessary for feature detection)
    old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)

    # Detect features in the first frame using goodFeaturesToTrack (corners)
    p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params)

    # Create a mask image for drawing the tracks (initially a blank image)
    mask = np.zeros_like(old_frame)

    # Loop over the video frames
    while cap.isOpened():

        # Read the next frame
        ret, frame = cap.read()
        if not ret:
            break # Break the loop if the video is over

        # Convert the current frame to grayscale
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Calculate optical flow using Lucas-Kanade method
        p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)

        # Select only the points that were successfully tracked (st == 1)
        good_new = p1[st == 1]
        good_old = p0[st == 1]

        # Loop through all tracked points
        for i, (new, old) in enumerate(zip(good_new, good_old)):
            # Get the x, y coordinates of the new and old points
            a, b = new.ravel() # new point in the current frame
            c, d = old.ravel() # old point in the previous frame

            # Calculate Euclidean distance between old and new points
            movement = np.sqrt((a - c) ** 2 + (b - d) ** 2)

            # Only display points that moved more than the threshold
            if movement > movement_threshold:
                # Draw a line from the old point to the new point (track of the feature)
                mask = cv2.line(mask, (int(a), int(b)), (int(c), int(d)), (0, 255, 0), 2)
                # Draw a circle at the new point to indicate the current position
                frame = cv2.circle(frame, (int(a), int(b)), 5, (0, 0, 255), -1)

        # Overlay the tracks (lines) onto the current frame
        img = cv2.add(frame, mask)

        # Display the current frame with the tracked points using OpenCV
        cv2.imshow('Robot Tracking', img)

        # Update the previous frame to the current frame (for the next iteration)
        old_gray = frame_gray.copy()

        # Update the previous points to the current points (for the next iteration)
        p0 = good_new.reshape(-1, 1, 2)

        # Exit loop when 'q' is pressed
        if cv2.waitKey(30) & 0xFF == ord('q'):
            break

# Release the video capture and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()

# Sparse Optical Flow with bounding box

In [5]:
# Load the video
cap = cv2.VideoCapture(path)

# Read the first frame of the video
ret, old_frame = cap.read()
if ret:
    # Convert the first frame to grayscale (necessary for feature detection)
    old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)

    # Detect features in the first frame using goodFeaturesToTrack (corners)
    p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params)

    # Create a mask image for drawing the tracks (initially a blank image)
    mask = np.zeros_like(old_frame)

    # Loop over all video frames
    while cap.isOpened():
        # Read the next frame
        ret, frame = cap.read()
        if not ret:
            break  # Exit the loop if no more frames are available

        # Convert the current frame to grayscale (necessary for optical flow calculation)
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Calculate optical flow to track the detected features in the new frame
        p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)

        # Select only the points that were successfully tracked (st == 1)
        good_new = p1[st == 1]
        good_old = p0[st == 1]

        # Store moving points (after thresholding) to calculate the bounding box
        moving_points = []

        # Loop through all tracked points
        for i, (new, old) in enumerate(zip(good_new, good_old)):
            # Get the x, y coordinates of the new and old points
            a, b = new.ravel()  # new point in the current frame
            c, d = old.ravel()  # old point from the previous frame

            # Calculate the Euclidean distance between old and new points (movement)
            movement = np.sqrt((a - c) ** 2 + (b - d) ** 2)

            # Only draw the points that have moved more than the threshold
            if movement > movement_threshold:
                # Add the moving point to the list for clustering
                moving_points.append([a, b])

                # Draw a line from the old point to the new point (track of the feature)
                mask = cv2.line(mask, (int(a), int(b)), (int(c), int(d)), (0, 255, 0), 2)
                # Draw a circle at the new point to indicate the current position
                frame = cv2.circle(frame, (int(a), int(b)), 5, (0, 0, 255), -1)

        # Only perform clustering if there are moving points
        if moving_points:
            # Convert the moving points to a numpy array
            moving_points = np.array(moving_points)

            # Use DBSCAN to cluster the moving points based on their proximity
            clustering = DBSCAN(eps=distance_threshold, min_samples=2).fit(moving_points)

            # Get unique cluster labels
            labels = clustering.labels_

            # For each unique cluster, draw a bounding box
            for cluster_label in np.unique(labels):
                if cluster_label == -1:
                    # Noise points are labeled as -1 (ignore these)
                    continue

                # Get the points belonging to the current cluster
                cluster_points = moving_points[labels == cluster_label]

                # Get the minimum and maximum x and y coordinates (bounding box)
                x_min, y_min = np.min(cluster_points, axis=0)
                x_max, y_max = np.max(cluster_points, axis=0)

                # Draw the bounding box around the clustered points
                frame = cv2.rectangle(frame, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (255, 0, 0), 2)

        # Overlay the tracks (lines) onto the current frame
        img = cv2.add(frame, mask)

        # Display the current frame with the tracked points and bounding boxes using OpenCV
        cv2.imshow('Robot Tracking', img)

        # Update the previous frame to the current frame (for the next iteration)
        old_gray = frame_gray.copy()

        # Update the previous points to the current points (for the next iteration)
        p0 = good_new.reshape(-1, 1, 2)

        # Exit the loop if the 'q' key is pressed
        if cv2.waitKey(30) & 0xFF == ord('q'):
            break

# Release the video capture object and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()

# Dense optical flow

In [6]:
# Load the video
cap = cv2.VideoCapture(path)

# Read the first frame
ret, old_frame = cap.read()

if ret:
    # Convert the first frame to grayscale
    old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)

    # Loop over the video frames
    while cap.isOpened():
        # Read the next frame
        ret, frame = cap.read()
        if not ret:
            break

        # Convert the current frame to grayscale
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Calculate dense optical flow using the Farneback method
        flow = cv2.calcOpticalFlowFarneback(old_gray, frame_gray, None, 0.5, 3, 15, 3, 5, 1.5, 0)

        # Compute the magnitude and angle of the 2D flow vectors
        magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])

        # Create a mask for visualization (Hue = angle, Value = magnitude)
        hsv_mask = np.zeros_like(old_frame)
        hsv_mask[..., 1] = 255  # Saturation to maximum
        hsv_mask[..., 0] = angle * 180 / np.pi / 2  # Hue from the angle
        hsv_mask[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)  # Value from magnitude

        # Convert HSV to BGR for display
        rgb_flow = cv2.cvtColor(hsv_mask, cv2.COLOR_HSV2BGR)

        # Blend the original frame with the optical flow visualization
        overlaid_frame = cv2.addWeighted(frame, 0.7, rgb_flow, 0.3, 0)

        # Display the overlaid frame
        cv2.imshow('Dense Optical Flow Overlay', overlaid_frame)

        # Update the previous frame
        old_gray = frame_gray.copy()

        # Exit loop when 'q' is pressed
        if cv2.waitKey(30) & 0xFF == ord('q'):
            break

# Release the video capture and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()