In [2]:
import numpy as np
from PIL import Image
import cv2
from sklearn.neighbors import NearestNeighbors

In [3]:
def set_corr_manual():
    """
    Specify correspondences between images

    Returns
    -------
    p1 : ndarray of shape (n, 2)
        Matched keypoint locations in image 1
    p2 : ndarray of shape (n, 2)
        Matched keypoint locations in image 2
    """
    # TODO
    p1 = np.array(
        [
            [0, 0],
            [0, 1000],
            [1000, 0],
            [1000, 1000],
        ]
    )
    p2 = np.array(
        [
            [328, 79],
            [455, 81],
            [327, 196],
            [455, 199],
        ]
    )

    return p1, p2

In [4]:
def compute_H(p1, p2):
    """
    Estimate the homography between images

    Parameters
    ----------
    p1 : ndarray of shape (n, 2)
        Matched keypoint locations in image 1
    p2 : ndarray of shape (n, 2)
        Matched keypoint locations in image 2

    Returns
    -------
    H : ndarray of shape (3, 3)
        The estimated homography
    """
    A = []
    for i in range(p1.shape[0]):
        x1, y1 = p1[i]
        x2, y2 = p2[i]
        A.append([x1, y1, 1, 0, 0, 0, -x2 * x1, -x2 * y1, -x2])
        A.append([0, 0, 0, x1, y1, 1, -y2 * x1, -y2 * y1, -y2])
    A = np.array(A)
    _, _, Vh = np.linalg.svd(A)
    H = Vh[-1].reshape(3, 3)

    return H

In [5]:
def insert_image(base_img, input_img, H):
    """
    Insert projected input_img in base_img with estimated homography

    Parameters
    ----------
    base_img : ndarray of shape (h, w, 3), base image
    input_img : ndarray of image to be projected and inserted
    H : ndarray of shape (3, 3)
        The estimated homography

    Returns
    -------
    merged_img : ndarray of shape (h, w, 3)
    """
    # TODO

    merged_img = base_img

    for h in range(input_img.shape[0]):
        for w in range(input_img.shape[1]):
            # Get the pixel coordinates in the input image
            pixel = np.array([w, h, 1])
            # Compute the corresponding pixel in the base image
            new_pixel = H @ pixel
            new_pixel /= new_pixel[2]
            new_x, new_y = int(new_pixel[0]), int(new_pixel[1])

            merged_img[new_y, new_x] = input_img[h, w]

    return merged_img

In [6]:
# Q 2-1

# Read images
base_img = Image.open("00002.png").convert("RGB")
input_img = Image.open("00003.jpg").convert("RGB")
base_img = np.array(base_img)
input_img = np.array(input_img)

# Set correspondences manually
p1, p2 = set_corr_manual()

# Estimate the homography between images
H = compute_H(p1, p2)

# Insert warped input_img to base_img
merged_img = insert_image(base_img, input_img, H)
Image.fromarray(np.uint8(merged_img)).save("output_2-1.png")

In [21]:
def match_sift(loc1, des1, loc2, des2, distance_ratio):
    """
    Find the matches of SIFT features between two images

    Parameters
    ----------
    loc1 : ndarray of shape (n1, 2)
        Keypoint locations in image 1
    des1 : ndarray of shape (n1, 128)
        SIFT descriptors of the keypoints image 1
    loc2 : ndarray of shape (n2, 2)
        Keypoint locations in image 2
    des2 : ndarray of shape (n2, 128)
        SIFT descriptors of the keypoints image 2
    distance_ratio : threshold for the ratio test

    Returns
    -------
    x1 : ndarray of shape (n, 2)
        Matched keypoint locations in image 1
    x2 : ndarray of shape (n, 2)
        Matched keypoint locations in image 2
    """
    loc1 = np.array([l.pt for l in loc1])
    loc2 = np.array([l.pt for l in loc2])
    # Forward matching: des1 -> des2
    nn1 = NearestNeighbors(n_neighbors=2, algorithm="auto").fit(des2)
    dists1, idxs1 = nn1.kneighbors(des1)
    ratio_mask1 = dists1[:, 0] < distance_ratio * dists1[:, 1]

    # Backward matching: des2 -> des1
    nn2 = NearestNeighbors(n_neighbors=2, algorithm="auto").fit(des1)
    dists2, idxs2 = nn2.kneighbors(des2)
    ratio_mask2 = dists2[:, 0] < distance_ratio * dists2[:, 1]

    # Bi-directional consistency
    matches = []
    for i1, (i2, valid1) in enumerate(zip(idxs1[:, 0], ratio_mask1)):
        if not valid1:
            continue
        # Check if the best match in des2 points back to this descriptor in des1
        if ratio_mask2[i2] and idxs2[i2, 0] == i1:
            matches.append((i1, i2))

    idx1, idx2 = zip(*matches)
    x1 = loc1[list(idx1)]
    x2 = loc2[list(idx2)]

    return x1, x2

In [39]:
def compute_H_ransac(x1, x2, ransac_n_iter, ransac_thr):
    """
    Estimate the homography between images using RANSAC

    Parameters
    ----------
    x1 : ndarray of shape (n, 2)
        Matched keypoint locations in image 1
    x2 : ndarray of shape (n, 2)
        Matched keypoint locations in image 2
    ransac_n_iter : int
        Number of RANSAC iterations
    ransac_thr : float
        Error threshold for RANSAC

    Returns
    -------
    H : ndarray of shape (3, 3)
        The estimated homography
    """
    n = x1.shape[0]
    if n < 4:
        return np.eye(3), np.array([], dtype=int)

    def normalize_points(points):
        # Center the points
        centroid = np.mean(points, axis=0)
        points_centered = points - centroid

        # Scale to have an average distance of sqrt(2) from origin
        avg_dist = np.mean(np.sqrt(np.sum(points_centered**2, axis=1)))
        scale = np.sqrt(2) / avg_dist if avg_dist > 0 else 1.0

        # Create normalization matrix
        T = np.array(
            [
                [scale, 0, -scale * centroid[0]],
                [0, scale, -scale * centroid[1]],
                [0, 0, 1],
            ]
        )

        return T, np.column_stack([points, np.ones(points.shape[0])]) @ T.T

    def dlt_homography(points_1, points_2):
        # Normalize coordinates for better numerical stability
        T1, norm_p1 = normalize_points(points_1)
        T2, norm_p2 = normalize_points(points_2)

        N = points_1.shape[0]
        A = []
        for i in range(N):
            x_1, y_1, _ = norm_p1[i]
            x_2, y_2, _ = norm_p2[i]
            A.append([x_1, y_1, 1, 0, 0, 0, -x_1 * x_2, -y_1 * x_2, -x_2])
            A.append([0, 0, 0, x_1, y_1, 1, -x_1 * y_2, -y_1 * y_2, -y_2])

        A = np.array(A)
        _, _, Vt = np.linalg.svd(A)
        H_norm = Vt[-1].reshape(3, 3)

        # Denormalize to get the actual homography
        H = np.linalg.inv(T2) @ H_norm @ T1
        return H / H[2, 2]

    best_inliers = []
    best_H = np.eye(3)

    for _ in range(ransac_n_iter):
        idx = np.random.choice(n, 4, replace=False)
        points_1 = x1[idx]
        points_2 = x2[idx]
        H = dlt_homography(points_1, points_2)

        # Project x1 using H
        x1_h = np.hstack([x1, np.ones((n, 1))])
        x2_proj = (H @ x1_h.T).T
        x2_proj = x2_proj[:, :2] / (x2_proj[:, 2:3] + 1e-10)

        # Check error
        errors = np.linalg.norm(x2_proj - x2, axis=1)
        inliers = np.where(errors < ransac_thr)[0]

        if len(inliers) > len(best_inliers):
            best_inliers = inliers
            best_H = H

    # Refine homography using best inliers
    if len(best_inliers) >= 4:
        best_H = dlt_homography(x1[best_inliers], x2[best_inliers])

    return best_H

In [None]:
def merge_image(img_1, img_2, H):
    """
    Merge projected input_img with base_img using estimated homography.

    Parameters
    ----------
    img_1 : ndarray of shape (h, w, 3), base image
    img_2 : ndarray of shape (h, w, 3), image to be projected and merged with base image
    H : ndarray of shape (3, 3)
        The estimated homography

    Returns
    -------
    merged_img : ndarray of shape (2h, 2w, 3), base image placed at the top-left corner
    """
    # Get image dimensions
    h1, w1 = img_1.shape[:2]
    h2, w2 = img_2.shape[:2]

    # Define the output dimensions (2x the base image size)
    output_h, output_w = 2 * h1, 2 * w1

    # Create the output image
    merged_img = np.zeros((output_h, output_w, 3), dtype=np.uint8)

    # Place the first image at the top-left corner
    merged_img[:h1, :w1] = img_1
    Image.fromarray(np.uint8(merged_img)).save("debug-output_2-2.png")

    # DEBUG: Check where img_2 corners end up
    # corners = np.array([[0, 0, 1], [0, h2 - 1, 1], [w2 - 1, 0, 1], [w2 - 1, h2 - 1, 1]])
    # warped_corners = (H @ corners.T).T
    # warped_corners = warped_corners[:, :2] / warped_corners[:, 2:]
    # print("Warped corners of img_2:", warped_corners)
    # print("canvas size", merged_img.shape)

    # Backward warping for pixel filling
    for y in range(output_h):
        for x in range(output_w):
            p = H @ np.array([x, y, 1])
            p = p / p[2]
            u, v = p[0], p[1]

            # Check if the point is inside img_2
            if 0 <= u < w2 and 0 <= v < h2:
                # Only preserve img_1 pixels if they're not black
                if x < w1 and y < h1 and np.any(merged_img[y, x] > 0):
                    continue

                u_int = min(int(round(u)), w2 - 1)
                v_int = min(int(round(v)), h2 - 1)
                merged_img[y, x] = img_2[v_int, u_int]

    return merged_img

In [42]:
# Q 2-2

# Hyperparmeters, feel free to modify
np.random.seed(0)
ransac_n_iter = 1000
ransac_thr = 5
distance_ratio = 0.8

# Read images
img_1 = Image.open("00004.jpg").convert("RGB")
img_2 = Image.open("00005.jpg").convert("RGB")
img_1 = np.array(img_1)
img_2 = np.array(img_2)

# Extract SIFT features
sift = cv2.SIFT_create()
loc1, des1 = sift.detectAndCompute(img_1, None)
loc2, des2 = sift.detectAndCompute(img_2, None)

# Find the matches between two images (x1 <--> x2)
x1, x2 = match_sift(loc1, des1, loc2, des2, distance_ratio)

# Estimate the homography between images using RANSAC
H = compute_H_ransac(x1, x2, ransac_n_iter, ransac_thr)

# Warp img_2 and merge with img_1
merged_img = merge_image(img_1, img_2, H)
Image.fromarray(np.uint8(merged_img)).save("output_2-2.png")

Warped corners of img_2: [[ 3869.10149078  2086.28792087]
 [ 4991.02424788 -1458.43067882]
 [  584.16150238   100.12087311]
 [  556.96448148   426.39034668]]
canvas size (1080, 1920, 3)
