## Optical Flow for Horizontal Rolling Text Detection

In [1]:
import cv2
import numpy as np

#### To see optical flow refinement during the entire video (just for visualization)   
Press 'q' to quit the video. 

In [2]:
def draw_horizontal_flow(img, flow, step=30):
    h, w = img.shape[:2]
    y, x = np.mgrid[step//2:h:step, step//2:w:step].reshape(2, -1).astype(int)
    fx = flow[y, x, 0]
    fy = flow[y, x, 1]

    # Keep only leftward motion vectors (negative horizontal flow)
    fx[fx > 0] = 0

    lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2)
    lines = np.int32(lines + 0.5)

    vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    for (x1, y1), (x2, y2) in lines:
        cv2.arrowedLine(vis, (x1, y1), (x2, y2), (0, 255, 0), 1, tipLength=0.5)
    return vis

def detect(video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get total number of frames and calculate halfway point
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    halfway_frame = total_frames // 2

    ret, first_frame = cap.read()
    if not ret:
        print("Failed to read the video file.")
        return

    prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)

    # Initialize a mask for consistent leftward movement
    consistent_motion_mask = None
    consistency_threshold = 0.7  # Consistent movement threshold (percentage of frames)
    num_frames = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 5, 15, 3, 5, 1.2, 0)
        
        # Calculate the mask of leftward movement
        leftward_motion_mask = flow[..., 0] < 0
        
        # Initialize the consistent motion mask
        if consistent_motion_mask is None:
            consistent_motion_mask = np.zeros_like(leftward_motion_mask, dtype=np.float32)
        
        # Update the consistent leftward movement mask
        consistent_motion_mask[leftward_motion_mask] += 1
        num_frames += 1

        # Calculate the ratio of consistent leftward movement
        consistent_ratio = consistent_motion_mask / num_frames
        consistent_leftward_regions = consistent_ratio > consistency_threshold

        # Apply morphological operations to fill in the gaps
        consistent_leftward_mask = consistent_leftward_regions.astype(np.uint8)
        kernel = np.ones((5, 5), np.uint8)
        consistent_leftward_mask = cv2.morphologyEx(consistent_leftward_mask, cv2.MORPH_CLOSE, kernel)

        # Create an image for highlighting
        consistent_leftward_img = np.zeros_like(frame)
        consistent_leftward_img[consistent_leftward_mask == 1] = [0, 255, 0]  # Green

        vis = draw_horizontal_flow(gray, flow)

        # Combine visualization with consistent leftward movement highlight
        combined_vis = cv2.addWeighted(vis, 0.7, consistent_leftward_img, 0.3, 0)
        cv2.imshow('Horizontal Optical Flow', combined_vis)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        
        prev_gray = gray.copy()

    cap.release()
    cv2.destroyAllWindows()

# Example usage
video_path = 'stationary.mp4'
detect(video_path)

#### We see that the refined optical flow view is best around halfway through the video, when there is enough motion information to be confident about consistent horizontal movement. Save a screengrab of the video with optical flow hilights at the halfway frame.  
This won't display the video anymore. Still may take several seconds to run.  

In [61]:
def draw_horizontal_flow(img, flow, step=30):
    """Draw optical flow vectors on the video."""
    h, w = img.shape[:2]
    y, x = np.mgrid[step//2:h:step, step//2:w:step].reshape(2, -1).astype(int)
    fx = flow[y, x, 0]
    fy = flow[y, x, 1]

    # Keep only leftward motion vectors (negative horizontal flow)
    fx[fx > 0] = 0

    lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2)
    lines = np.int32(lines + 0.5)

    vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    for (x1, y1), (x2, y2) in lines:
        cv2.arrowedLine(vis, (x1, y1), (x2, y2), (0, 255, 0), 1, tipLength=0.5)
    return vis

def detect_and_save_screengrab(video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get total number of frames and calculate halfway point
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    halfway_frame = total_frames // 2

    ret, first_frame = cap.read()
    if not ret:
        print("Failed to read the video file.")
        return

    prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)

    # Initialize a mask for consistent leftward movement
    consistent_motion_mask = None
    consistency_threshold = 0.8  # Consistent movement threshold (percentage of frames)
    num_frames = 0

    saved_screengrab = False
    screengrab_frame = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 5, 15, 3, 5, 1.2, 0)
        
        # Calculate the mask of leftward movement
        leftward_motion_mask = flow[..., 0] < 0
        
        # Initialize the consistent motion mask
        if consistent_motion_mask is None:
            consistent_motion_mask = np.zeros_like(leftward_motion_mask, dtype=np.float32)
        
        # Update the consistent leftward movement mask
        consistent_motion_mask[leftward_motion_mask] += 1
        num_frames += 1

        # Calculate the ratio of consistent leftward movement
        consistent_ratio = consistent_motion_mask / num_frames
        consistent_leftward_regions = consistent_ratio > consistency_threshold

        # Apply morphological operations to fill in the gaps
        consistent_leftward_mask = consistent_leftward_regions.astype(np.uint8)
        kernel = np.ones((5, 5), np.uint8)
        consistent_leftward_mask = cv2.morphologyEx(consistent_leftward_mask, cv2.MORPH_CLOSE, kernel)

        # Create an image for highlighting
        consistent_leftward_img = np.zeros_like(frame)
        consistent_leftward_img[consistent_leftward_mask == 1] = [0, 255, 0]  # Green

        vis = draw_horizontal_flow(gray, flow)

        # Combine visualization with consistent leftward movement highlight
        combined_vis = cv2.addWeighted(vis, 0.7, consistent_leftward_img, 0.3, 0)

        # Save screengrab at halfway point
        if num_frames == halfway_frame and not saved_screengrab:
            screengrab_frame = combined_vis.copy()
            saved_screengrab = True
        
        prev_gray = gray.copy()

    # Save the screengrab outside the loop to avoid delay
    if screengrab_frame is not None:
        vidname = str(video_path)
        cv2.imwrite(f'{vidname}_screengrab_halfway.png', screengrab_frame)
    
    cap.release()
    cv2.destroyAllWindows()

########## CALL TO FUNCTION:
vids_to_process = ['stationary.mp4']
for vid in vids_to_process:
    detect_and_save_screengrab(vid)

#### Draw a bounding box around the detected regions

In [62]:
def extract_bounding_boxes(screengrab_path, height_increase):
    screengrab = cv2.imread(screengrab_path)
    if screengrab is None:
        print("Failed to read the screengrab.")
        return

    # Convert to HSV and create a mask for the green color
    hsv = cv2.cvtColor(screengrab, cv2.COLOR_BGR2HSV)
    lower_green = np.array([40, 40, 40])
    upper_green = np.array([80, 255, 255])
    mask = cv2.inRange(hsv, lower_green, upper_green)

    # Find contours
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Draw bounding boxes around significant motion regions
    for contour in contours:
        if cv2.contourArea(contour) > 700:  # Adjust the threshold for minimum area as needed
            x, y, w, h = cv2.boundingRect(contour)
            # Increase the height of the bounding box
            y = max(0, y - height_increase // 2)
            h = h + height_increase
            # Ensure the bounding box is within the image boundaries
            h = min(h, screengrab.shape[0] - y)
            cv2.rectangle(screengrab, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Save and display the image with bounding boxes
    cv2.imwrite('screengrab_with_bounding_boxes.png', screengrab)

########## CALL TO FUNCTION:
for vid in ['stationary.mp4']:
    extract_bounding_boxes(f'{vid}_screengrab_halfway.png', height_increase=0) # Add extra height to ensure no text is cut off from the top/bottom