In [2]:
import cv2
from cv2 import VideoCapture
import numpy as np
from tqdm import tqdm
from typing import Callable

In [3]:
def calcBackground(
    video_path: str,
    num_samples: int = 100,
    image_transform: Callable[[np.ndarray], np.ndarray] = None,
) -> np.ndarray:
    # Open Video
    cap = VideoCapture(video_path)

    # Get Video Length
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Randomly select up to 50 frames
    frame_ids = length * np.random.uniform(size=min(length, num_samples))

    # Store selected frames in an array
    frames = []
    for fid in tqdm(frame_ids, desc="reading frames"):
        cap.set(cv2.CAP_PROP_POS_FRAMES, fid)
        ret, frame = cap.read()

        # Apply transform if needed
        if image_transform is not None:
            frame = image_transform(frame)

        # Convert to grayscale
        frames.append(frame)

    # Calculate the median along the time axis
    median_frame = np.median(frames, axis=0).astype(dtype=np.uint8)

    return median_frame

In [4]:
def toGrayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

In [5]:
def findBoxes(
    image: np.ndarray,
    background: np.ndarray,
    min_diff_thresh: int = 75,
    min_size_thresh: int = 40,
) -> np.ndarray:
    # Calculate difference between background and image
    diff = np.abs(image.astype(np.int32) - background.astype(np.int32)).astype(np.uint8)

    # Turn differences mask to black & white according to a threshold value
    _, mask = cv2.threshold(diff, min_diff_thresh, 255, cv2.THRESH_BINARY)

    # find contours in the binary mask
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Populate bounding boxes
    bbox_list = []
    for c in contours:
        box = cv2.boundingRect(c)

        # Skip too small bounding boxes
        if box[2] < min_size_thresh and box[3] < min_size_thresh:
            continue

        bbox_list.append(box)

    # Turn our bboxes into 2d ndarray
    bboxes = np.asanyarray(bbox_list)
    return bboxes

In [6]:
def resizeBoxes(bboxes: np.ndarray, new_width: int = 200, new_height: int = 200) -> np.ndarray:
    n = bboxes.shape[0]

    # Create width and heigh arrays
    new_width = np.full(shape=[n], fill_value=new_width)
    new_height = np.full(shape=[n], fill_value=new_height)

    # Unpack columns
    x, y, w, h = bboxes.T

    # Calc center of bbox
    x_mid = x + w // 2
    y_mid = y + h // 2

    # Calc upper left corner
    new_x = x_mid - new_width // 2
    new_y = y_mid - new_height // 2

    # Join the columns back
    new_bboxes = np.column_stack((new_x, new_y, new_width, new_height))

    # Fix out of bounds coords
    new_bboxes[new_bboxes < 0] = 0

    return new_bboxes

In [7]:
def removeOverlappingBoxes(bboxes: np.ndarray) -> np.ndarray:
    # Magical stuff are happening here, ty stackoverflow
    n = bboxes.shape[0]

    # Calculate left, right, top, bottom limits
    left = np.expand_dims(bboxes[:, 0], axis=1)
    right = np.expand_dims(bboxes[:, 0] + bboxes[:, 2], axis=1)
    top = np.expand_dims(bboxes[:, 1], axis=1)
    bottom = np.expand_dims(bboxes[:, 1] + bboxes[:, 3], axis=1)

    # Check for left limit intrusions, right limit intrusions, ...
    check_l = (left <= left.T) & (left.T <= right)
    check_r = (left <= right.T) & (right.T <= right)
    check_t = (top <= top.T) & (top.T <= bottom)
    check_b = (top <= bottom.T) & (bottom.T <= bottom)

    # Check for combinations of left-top intrusions, left-bottom intrusions, ...
    check_lt = check_l & check_t
    check_lb = check_l & check_b
    check_rt = check_r & check_t
    check_rb = check_r & check_b

    # Get all combinations; get rid of self identical matches
    check = check_lt | check_lb | check_rt | check_rb
    check = np.bitwise_xor(check, np.eye(n, dtype=bool))
    check = np.argwhere(check)

    # Get unique indices of bad bboxes
    bad_indices = np.unique(check)

    # Get indices of good bboxes
    good_indices = np.arange(n)
    good_indices = good_indices[np.in1d(good_indices, bad_indices, invert=True)]

    # Take only the good bboxes
    good_bboxes = np.take(bboxes, good_indices, axis=0)

    return good_bboxes

In [14]:
def FindRois(
    video_path: str,
    video_background: np.ndarray,
    background_diff_thresh: int = 60,
    bbox_size_thresh: int = 40,
    image_transform: Callable[[np.ndarray], np.ndarray] = None,
):
    cap = VideoCapture(video_path)

    while True:
        ret, image = cap.read()
        if ret == False:
            break

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

        if image_transform is not None:
            img_trans = image_transform(image)

        bboxes = findBoxes(
            img_trans,
            background=video_background,
            min_diff_thresh=background_diff_thresh,
            min_size_thresh=bbox_size_thresh,
        )

        bboxes = resizeBoxes(bboxes, new_width=250, new_height=250)
        bboxes = removeOverlappingBoxes(bboxes)

        # Draw bboxes
        for box in bboxes:
            x, y, w, h = box

            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 3)

        cv2.imshow("BBOXES", image)

    cap.release()
    cv2.destroyAllWindows()

In [10]:
bg_gray = calcBackground("worms.avi", num_samples=100, image_transform=toGrayscale)

reading frames: 100%|██████████| 100/100 [04:09<00:00,  2.50s/it]


In [15]:
FindRois("worms.avi", bg_gray, image_transform=toGrayscale)

In [None]:
cv2.destroyAllWindows()