# Homography with ArUco markers

In [1]:
from pathlib import Path
import cv2
import numpy as np
import matplotlib.pyplot as plt
from moviepy.editor import *

print(f"OpenCV version: {cv2.__version__}")

OpenCV version: 4.8.1


In [2]:
aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_6X6_250)
marker_ids = [10, 17, 63, 50]  # Top Left, Top Right, Bottom Right, Bottom Left
BORDER_FRACTION = 1 / 5  # Size of the white border around the ArUco marker relative to the size of the marker

video_dest_path = "data/destination_video.mp4"
video_src_path = "data/source_video.mp4"
output_path = f"output/output_ar{Path(video_dest_path).suffix}"

## 00. Generate Markers

In [None]:
plt.figure(figsize=(18, 10))
for idx, marker_id in enumerate(marker_ids):
    marker = cv2.aruco.generateImageMarker(dictionary=aruco_dict, id=marker_id, sidePixels=200)

    plt.subplot(1, len(marker_ids), idx + 1)
    plt.imshow(marker, cmap="gray")
    plt.title(f"Marker ID: {marker_ids[idx]}")
    plt.axis("off")

    cv2.imwrite(f"markers/marker_{marker_ids[idx]}.png", marker)

## 01. Input Data

### 01.1 Display Destination Video

In [None]:
clip = VideoFileClip(filename=video_dest_path)
clip.ipython_display(width=1000)

### 01.2 Display Source Video

In [None]:
# Display source video
clip = VideoFileClip(filename=video_src_path)
clip.ipython_display(width=1000)

### 01.3 Read Video Files

In [20]:
video_cap_dest = cv2.VideoCapture(video_dest_path)
if not video_cap_dest.isOpened():
    print(f"Error opening video stream or file: {video_dest_path}")

video_cap_src = cv2.VideoCapture(video_src_path)
if not video_cap_src.isOpened():
    print(f"Error opening video stream or file: {video_src_path}")

assert video_cap_dest.isOpened() and video_cap_src.isOpened()

fps = int(video_cap_dest.get(cv2.CAP_PROP_FPS))
frame_width = int(video_cap_dest.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video_cap_dest.get(cv2.CAP_PROP_FRAME_HEIGHT))

print(f"Width: {frame_width}\tHeight: {frame_height}\tFPS: {fps}")

Width: 1920	Height: 1080	FPS: 25


## 02. Output Data

In [21]:
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
video_writer = cv2.VideoWriter(output_path, fourcc, fps, (2 * frame_width, frame_height))

## 03. Processing frames

In [22]:
from typing import List
from numpy.typing import NDArray


def extract_points(marker_ids: List[int], ids: NDArray, corners: NDArray) -> NDArray:
    """
    Function for extracting Region Of Interest (ROI) from destination video.

    Args:
        marker_ids:
            Marker IDs which were used during capturing the destination video.
        ids:
            Detected marker ids.
        corners:
            Corners of all detected markers.

    Returns:
        List containing the 4 corner coordinates of ROI in the destination video.
    """

    points = []

    for marker_idx, marker_id in enumerate(marker_ids):
        idx = np.squeeze(np.where(ids == marker_id))
        marker_corners = np.squeeze(corners[idx])

        marker_width = abs(marker_corners[0][0] - marker_corners[1][0]) + 10
        marker_height = abs(marker_corners[0][1] - marker_corners[2][1]) + 10

        if marker_idx == 0:  # Top Left
            point = marker_corners[0] + [-marker_width * BORDER_FRACTION, -marker_height * BORDER_FRACTION]
        elif marker_idx == 1:  # Top Right
            point = marker_corners[1] + [marker_width * BORDER_FRACTION, -marker_height * BORDER_FRACTION]
        elif marker_idx == 2:  # Bottom Right
            point = marker_corners[2] + [marker_width * BORDER_FRACTION, marker_height * BORDER_FRACTION]
        elif marker_idx == 3:  # Bottom Left
            point = marker_corners[3] + [-marker_width * BORDER_FRACTION, marker_height * BORDER_FRACTION]

        points.append(point)

    return np.asarray(points)

In [23]:
print("Processing frames ...")
while True:
    # Read frames until the end of the source or destination video
    has_frame, frame_dest = video_cap_dest.read()
    if not has_frame:
        break

    has_frame, frame_src = video_cap_src.read()
    if not has_frame:
        break

    # Detect markers
    corners, ids, rejected = cv2.aruco.detectMarkers(image=frame_dest, dictionary=aruco_dict)

    # Extract ROI corners from marker corners
    points_dest = extract_points(marker_ids=marker_ids, ids=np.squeeze(ids), corners=corners)

    # Corners of source video
    points_src = np.asarray(
        [[0, 0], [frame_src.shape[1], 0], [frame_src.shape[1], frame_src.shape[0]], [0, frame_src.shape[0]]]
    )

    # Calculate the homography matrix
    h, mask = cv2.findHomography(srcPoints=points_src, dstPoints=points_dest, method=cv2.RANSAC)

    # Warp source image onto the destination image
    warped_image = cv2.warpPerspective(frame_src, h, (frame_dest.shape[1], frame_dest.shape[0]))

    # Create ROI mask which is used to add the source video to the destination video
    mask = np.zeros([frame_dest.shape[0], frame_dest.shape[1]], dtype=np.uint8)
    cv2.fillConvexPoly(mask, np.int32([points_dest]), 1, cv2.LINE_AA)
    mask_BGR = cv2.merge([mask, mask, mask])

    # Create black region in destination frame ROI.
    frame_masked = frame_dest * (1 - mask_BGR)

    frame_result = cv2.bitwise_or(warped_image, frame_masked)
    frame_out = cv2.hconcat([frame_dest, frame_result])

    frame_out = cv2.line(
        img=frame_out,
        pt1=(int(frame_out.shape[1] / 2), 0),
        pt2=(int(frame_out.shape[1] / 2), frame_out.shape[0]),
        color=(255, 255, 255),
        thickness=8,
    )

    video_writer.write(frame_out)

print("Finished processing frames ...")
video_writer.release()

Processing frames ...
Finished processing frames ...


## 04. Display Result

In [None]:
clip = VideoFileClip(filename=output_path)
clip.ipython_display(width=2000)