In [None]:
# This script performs sparse keypoint detection and matching using a pretrained VGG CNN.

import torch
import torchvision
import cv2
import numpy as np
import matplotlib.pyplot as plt
import urllib.request
import os

Your code goes here

In [None]:
# Load Pretrained VGG Mode
model = torchvision.models.vgg16(pretrained=True).eval()
cnn_feat_extractor = None ## YOU CHOICE OF INTERMEDIATE LAYER GO HERE ##

In [None]:
# ImageNet normalization
mean = torch.tensor([0.485, 0.456, 0.406])
std  = torch.tensor([0.229, 0.224, 0.225])

def preprocess_for_cnn(bgr_img):
    rgb = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB)
    tensor = torch.from_numpy(rgb).float().permute(2, 0, 1) / 255.0
    tensor = (tensor - mean[:, None, None]) / std[:, None, None]
    return tensor.unsqueeze(0)  # Add batch dimension

In [None]:
# Saliency-based Keypoint Detection
def get_keypoints_from_saliency(saliency, max_points=500):
    neighborhood = np.ones((3, 3), np.uint8)
    local_max = cv2.dilate(saliency.astype(np.float32), neighborhood)
    peaks = (saliency == local_max)
    coords = np.column_stack(np.nonzero(peaks))
    values = saliency[coords[:, 0], coords[:, 1]]
    sorted_idx = np.argsort(values)[::-1]
    coords = coords[sorted_idx]
    if max_points and coords.shape[0] > max_points:
        coords = coords[:max_points]
    return coords

In [None]:
# Load and Process Images
def download_image(url, filename):
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)

In [None]:
def load_and_process_images(img1_path, img2_path):
    img1 = cv2.imread(img1_path)
    img2 = cv2.imread(img2_path)

    if img1 is None or img2 is None:
        raise FileNotFoundError("Could not load one or both images. Check the paths or upload the images.")

    img1_gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    img2_gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)

    tensor1 = preprocess_for_cnn(img1)
    tensor2 = preprocess_for_cnn(img2)

    with torch.no_grad():
        feat1 = cnn_feat_extractor(tensor1).squeeze(0)  # C x H x W
        feat2 = cnn_feat_extractor(tensor2).squeeze(0)

    sal1 = torch.norm(feat1, p=2, dim=0).cpu().numpy()
    sal2 = torch.norm(feat2, p=2, dim=0).cpu().numpy()

    coords1 = get_keypoints_from_saliency(sal1, max_points=500)
    coords2 = get_keypoints_from_saliency(sal2, max_points=500)

    H1, W1 = sal1.shape
    H2, W2 = sal2.shape
    keypoints1 = [(float(c * img1.shape[1] / W1), float(r * img1.shape[0] / H1)) for r, c in coords1]
    keypoints2 = [(float(c * img2.shape[1] / W2), float(r * img2.shape[0] / H2)) for r, c in coords2]

    descriptors1 = [feat1[:, r, c] / (feat1[:, r, c].norm() + 1e-8) for r, c in coords1]
    descriptors2 = [feat2[:, r, c] / (feat2[:, r, c].norm() + 1e-8) for r, c in coords2]

    desc1_np = torch.stack(descriptors1).cpu().numpy().astype(np.float32)
    desc2_np = torch.stack(descriptors2).cpu().numpy().astype(np.float32)

    return img1, img2, img1_gray, img2_gray, keypoints1, keypoints2, desc1_np, desc2_np

In [None]:
# Descriptor Matching
def match_descriptors(desc1, desc2, ratio_thresh=None):
    bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)
    knn_matches = bf.knnMatch(desc1, desc2, k=2)
    good_matches = [m for m, n in knn_matches if m.distance < ratio_thresh * n.distance]
    return good_matches

In [None]:
# Visualize Matches
def visualize_matches(img1, img2, kp1, kp2, matches, max_draw=30):
    kp_cv1 = [cv2.KeyPoint(float(x), float(y), 5) for (x, y) in kp1]
    kp_cv2 = [cv2.KeyPoint(float(x), float(y), 5) for (x, y) in kp2]
    img_matches = cv2.drawMatches(img1, kp_cv1, img2, kp_cv2, matches[:max_draw], None,
                                  flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
    plt.figure(figsize=(15, 8))
    plt.imshow(cv2.cvtColor(img_matches, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.title(f'Top {max_draw} Deep Feature Matches')
    plt.show()

Your code goes here

In [None]:
# Main Function
if __name__ == '__main__':
    # Automatically download sample images
    download_image("https://raw.githubusercontent.com/opencv/opencv/master/samples/data/box.png", "image1.jpg")
    download_image("https://raw.githubusercontent.com/opencv/opencv/master/samples/data/box_in_scene.png", "image2.jpg")

    img1_path = 'image1.jpg'
    img2_path = 'image2.jpg'

    ratio_thresh = 0.0 ## YOUR VALUES FOR RATIO GO HERE

    img1, img2, gray1, gray2, kp1, kp2, desc1, desc2 = load_and_process_images(img1_path, img2_path)
    matches = match_descriptors(desc1, desc2)
    print(f"Found {len(matches)} good matches")
    visualize_matches(img1, img2, kp1, kp2, matches)