In [75]:
import numpy as np
import cv2
import glob
import os
import math
from collections import defaultdict
import matplotlib.pyplot as plt
import cv2
import random   

In [None]:
output_folder = "panorama_results"
os.makedirs(output_folder, exist_ok=True)
def save_file(output_filename,my_image_to_save):
    full_path = os.path.join(output_folder, output_filename)
    cv2.imwrite(full_path, my_image_to_save)

In [77]:
def normalize_points(pts):
    # Used for later in normalizing ar images
    mean = np.mean(pts, axis=0)
    cx, cy = mean
    pts_centered = pts - mean
    avg_dist = np.mean(np.linalg.norm(pts_centered, axis=1))
    scale = np.sqrt(2) / avg_dist if avg_dist > 0 else 1.0
    #normalizer matrix
    T = np.array([
        [scale, 0, -scale * cx],
        [0, scale, -scale * cy],
        [0, 0, 1]
    ], dtype=np.float32)

    pts_h = np.hstack((pts, np.ones((pts.shape[0], 1))))
    pts_norm_h = (T @ pts_h.T).T
    pts_norm = pts_norm_h[:, :2] / pts_norm_h[:, 2, np.newaxis]
    # new point normalized
    return pts_norm, T

In [78]:
def sift(image_dir,printit,save_it=False):
    # same with every part
    ret=[]
    image_pattern = os.path.join(image_dir, '*.png') 
    image_files = glob.glob(image_pattern)
    for file_path in image_files:
        gray = cv2.imread(file_path, cv2.COLOR_BGR2GRAY)
        filename = os.path.basename(file_path)
        print(filename)
        sift = cv2.SIFT_create()
        # get key points using method
        kp, des = sift.detectAndCompute(gray,None)
        ret.append([kp,des,filename])
        if(printit):    
            # show the images
            img=cv2.drawKeypoints(gray,kp,gray)
            plt.imshow(img)
            plt.show()
        if save_it:
            img=cv2.drawKeypoints(gray,kp,gray)
            save_file("sift"+filename,img)
    return ret,image_files

In [79]:
def orb(image_dir,printit,save_it=False):
    ret=[]

    image_pattern = os.path.join(image_dir, '*.png') 
    image_files = glob.glob(image_pattern)
    for file_path in image_files:
        gray = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
        filename = os.path.basename(file_path)
        orb = cv2.ORB_create()
        kp = orb.detect(gray,None)
        kp, des = orb.compute(gray, kp)
        ret.append([kp,des,filename])
        if(printit):    
            # show the images
            img=cv2.drawKeypoints(gray,kp,gray)
            plt.imshow(img)
            plt.show()
        if save_it:
            img=cv2.drawKeypoints(gray,kp,gray)
            save_file("orb"+filename,img)
    return ret,image_files

In [80]:
image_dir="panorama_dataset\\v_bird"
list_sift=sift(image_dir,False,True)
list_orb=orb(image_dir,False,True)


1.png
2.png
3.png
4.png
5.png
6.png


In [81]:
def find_matches(keypoints1, descriptors1, keypoints2, descriptors2, norm_type):
    # checking key points and histogram to see if they are a match using distance
    matcher = cv2.BFMatcher(norm_type)
    candidate_matches = matcher.knnMatch(descriptors1, descriptors2, k=2)
    best_matches = []
    ratio_limit = 0.75
    for pair in candidate_matches:
        if len(pair) == 2: 
            match1, match2 = pair
            # if got a good match add it
            if match1.distance < ratio_limit * match2.distance:
                best_matches.append(match1)
                
    source_points = np.array([ keypoints1[match.queryIdx].pt for match in best_matches ], dtype=np.float32).reshape(-1, 2)
    destination_points = np.array([ keypoints2[match.trainIdx].pt for match in best_matches ], dtype=np.float32).reshape(-1, 2)
    
    # return matches source and destination
    return best_matches, source_points, destination_points

In [82]:
def matcher(lists,norm=cv2.NORM_L2):
    # from images get the matches
    image_matches=[]
    for i in range(len(lists)):
        kp1_list=lists[i][0]
        kp1_hist=lists[i][1]
        # use the keypoint list and hists
        for j in range(i+1,len(lists)):
            kp2_list=lists[j][0]
            kp2_hist=lists[j][1]
            # give the kp and hist go get the matching points
            good_matches,src_pts, dst_pts=find_matches(kp1_list,kp1_hist,kp2_list,kp2_hist,norm)
            # add all valid matches
            image_matches.append([i,j,good_matches,src_pts, dst_pts])
    return image_matches

In [83]:
list_sift, image_files = sift(image_dir, False)

image_matches_sift = matcher(list_sift, cv2.NORM_L2)
# after getting the match points showing them

for i, j, good_matches, src_pts, dst_pts in image_matches_sift:
    
    img1 = cv2.imread(image_files[i])
    img2 = cv2.imread(image_files[j])

    kp1 = list_sift[i][0]
    kp2 = list_sift[j][0]

    img_with_matches = cv2.drawMatches(
        img1, kp1,          # type: ignore
        img2, kp2,          # type: ignore
        good_matches,      
        None,              # type: ignore
        flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
    )  # type: ignore


    img_rgb = cv2.cvtColor(img_with_matches, cv2.COLOR_BGR2RGB)
    save_file(image_files[i],img_rgb)
    


1.png
2.png
3.png
4.png
5.png
6.png


In [84]:
list_orb, image_files = orb(image_dir, False)

image_matches_orb = matcher(list_orb, cv2.NORM_HAMMING)
# same with orb

for i, j, good_matches, src_pts, dst_pts in image_matches_orb:
    
    img1 = cv2.imread(image_files[i])
    img2 = cv2.imread(image_files[j])

    kp1 = list_orb[i][0]
    kp2 = list_orb[j][0]

    img_with_matches = cv2.drawMatches(
        img1, kp1,          # type: ignore
        img2, kp2,          # type: ignore
        good_matches,      
        None,              # type: ignore
        flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
    )  # type: ignore


    img_rgb = cv2.cvtColor(img_with_matches, cv2.COLOR_BGR2RGB)
    save_file(image_files[i],img_rgb)

In [85]:
def direct_transform(src_pts, dst_pts):
    src_norm, T_src = normalize_points(src_pts)
    dst_norm, T_dst = normalize_points(dst_pts)
    # create the A matrix for solition
    A = []
    for (x, y), (xp, yp) in zip(src_norm, dst_norm):
        row1 = [0, 0, 0, -x, -y, -1, yp * x, yp * y, yp]
        row2 = [x, y, 1, 0, 0, 0, -xp * x, -xp * y, -xp]
        A.append(row1)
        A.append(row2)

    A = np.array(A, dtype=np.float32)
    # get the H array solving A
    U, S, Vh = np.linalg.svd(A)
    h = Vh[-1]
    H_norm = h.reshape(3, 3)

    H = np.linalg.inv(T_dst) @ H_norm @ T_src
    # normalize the last var
    H = H / H[2, 2]

    return H

In [86]:
def RANSAC(source_points, destination_points, max_iterations=1000, distance_threshold=5.0):
    # get the inliers of matchrs using RANCSAC
    best_inlier_indices = []
    max_inlier_count = 0
    num_points = source_points.shape[0]
    
    if num_points < 4:
        # Not enough points, fail fast
        return None, []

    for i in range(max_iterations):
        # get 4 random points try to get a H matrix and test it
        # if gives good results keep it
        random_indices = random.sample(range(num_points), 4)
        source_sample = source_points[random_indices]
        destination_sample = destination_points[random_indices]
        
        # find solition
        try:
            # Assuming direct_transform is defined elsewhere
            H_candidate = direct_transform(source_sample, destination_sample) 
        except np.linalg.LinAlgError:
            continue

        current_inlier_indices = []
        for point_index in range(num_points):
            source_point_homogeneous = np.array([source_points[point_index, 0], source_points[point_index, 1], 1.0])
            destination_point_actual = destination_points[point_index]
            
            predicted_point_homogeneous = H_candidate @ source_point_homogeneous
            
            if predicted_point_homogeneous[2] == 0: 
                continue
                
            predicted_point = (predicted_point_homogeneous[0] / predicted_point_homogeneous[2], predicted_point_homogeneous[1] / predicted_point_homogeneous[2])
            
            error_distance = np.linalg.norm(np.array(predicted_point) - destination_point_actual)
            # test if it gets close the real point
            if error_distance < distance_threshold:
                current_inlier_indices.append(point_index)

        if len(current_inlier_indices) > max_inlier_count:
            max_inlier_count = len(current_inlier_indices)
            best_inlier_indices = current_inlier_indices

    if max_inlier_count < 4:
        # Failed to find a good model
        return None, [] 
    
    # using best inliers get the H matrix
    # Assuming direct_transform is defined elsewhere
    final_homography = direct_transform(source_points[best_inlier_indices], destination_points[best_inlier_indices])
    return final_homography, best_inlier_indices

In [87]:
for i, j, good_matches, src_pts, dst_pts in image_matches_orb:
    result = Ransac(src_pts, dst_pts)
    H_best, best = result
    if H_best is None:
        print("Bad points")
        continue
    else:
        print(len(best)/src_pts.shape[0])


0.9646017699115044
0.9459459459459459
0.9714285714285714
0.8205128205128205
0.625
0.9854014598540146
0.8679245283018868
0.8571428571428571
0.4444444444444444
0.9830508474576272
0.7777777777777778
0.9333333333333333
0.968503937007874
0.7272727272727273
0.8421052631578947


In [91]:
for i, j, good_matches, src_pts, dst_pts in image_matches_orb:
    
    result = Ransac(src_pts, dst_pts)
    H_best, best_inlier_indices = result
    
    img1 = cv2.imread(image_files[i])
    img2 = cv2.imread(image_files[j])
    kp1 = list_orb[i][0]
    kp2 = list_orb[j][0]

    if H_best is None:
        print(f"RANSAC FAILED for pair {i},{j}. Not enough inliers.")
        
    else:
        inlier_matches = []
        outlier_matches = []
        # find good and bad indices kps
        all_indices = set(range(len(good_matches)))
        inlier_indices = set(best_inlier_indices)
        outlier_indices = all_indices - inlier_indices
        for k in inlier_indices:
            inlier_matches.append(good_matches[k])
        for k in outlier_indices:
            outlier_matches.append(good_matches[k])
        # draw lines on inlers and outliers
        img_inliers = cv2.drawMatches(
            img1, kp1, img2, kp2, inlier_matches, None,
            matchColor=(0, 255, 0), 
            flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
        )

        img_viz = cv2.drawMatches(
            img1, kp1, img2, kp2, outlier_matches, img_inliers,
            matchColor=(255, 0, 0),  
            flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS | cv2.DrawMatchesFlags_DRAW_OVER_OUTIMG
        )
    file_i_name = os.path.basename(image_files[i])
    file_j_name = os.path.basename(image_files[j])
    
    # Create a clean, valid filename
    save_filename = f"matched_{i}_{file_i_name}_to_{j}_{file_j_name}"
    save_file(save_filename, img_viz)
    img_rgb = cv2.cvtColor(img_viz, cv2.COLOR_BGR2RGB)

In [None]:


def get_panorama(imageset, image_list=None, shiftlist=None, show_overlap=False):
    if image_list is None:
        print(imageset)
        list_sift, image_files = sift(imageset, False)
    else:
        image_files = image_list
        list_sift = shiftlist
    
    image_matches_sift = matcher(list_sift, cv2.NORM_L2) 
    
    
    img_ref = cv2.imread(image_files[0])
    h_ref, w_ref = img_ref.shape[:2]

    
    homographies = {} 
    
    # start wtih bounderis
    ref_corners = np.float32([[0, 0], [w_ref, 0], [w_ref, h_ref], [0, h_ref]]).reshape(-1, 1, 2)
    
    
    all_corners = [ref_corners]

    for i, j, good_matches, src_pts, dst_pts in image_matches_sift:
        
        if i != 0:
            continue
        
        if len(good_matches) < 4:
            print(f"Skipping image {j} (Only {len(good_matches)} matches)")
            continue

        
        H_j_to_i, best_inliers = Ransac(dst_pts, src_pts, iterations=2000, thresh=5.0)
        
        
        homographies[j] = H_j_to_i
        # get H best
        
        img_target = cv2.imread(image_files[j])
        h_t, w_t = img_target.shape[:2]
        
        # get the corner sizes
        target_corners = np.float32([[0, 0], [w_t, 0], [w_t, h_t], [0, h_t]]).reshape(-1, 1, 2)
        
        
        warped_corners = cv2.perspectiveTransform(target_corners, H_j_to_i)
        all_corners.append(warped_corners)

    
    all_pts = np.concatenate(all_corners, axis=0)
    
    # find bounding box
    x_min, y_min = np.int32(all_pts.min(axis=0).ravel() - 0.5)
    x_max, y_max = np.int32(all_pts.max(axis=0).ravel() + 0.5)
    
    
    translation_dist = [-x_min, -y_min]
    
    #sift the canvas to desiered
    canvas_w = x_max - x_min
    canvas_h = y_max - y_min

    print(f"Calculated Panorama Size: {canvas_w}x{canvas_h}")
    
    
    H_translation = np.array([
        [1, 0, translation_dist[0]],
        [0, 1, translation_dist[1]],
        [0, 0, 1]
    ], dtype=np.float32)

    
    
    pano_image = np.zeros((canvas_h, canvas_w, 3), dtype=np.uint8)
    
    
    pano_image = cv2.warpPerspective(img_ref, H_translation, (canvas_w, canvas_h))

   
    for j, H_raw in homographies.items():
        #warp the image and put it onto the overlap using weighed
        H_final = H_translation @ H_raw
        
        img_target = cv2.imread(image_files[j])
        warped_target = cv2.warpPerspective(img_target, H_final, (canvas_w, canvas_h))
        
        mask_warped = cv2.cvtColor(warped_target, cv2.COLOR_BGR2GRAY) > 0
        mask_ref = cv2.cvtColor(pano_image, cv2.COLOR_BGR2GRAY) > 0
        
        overlap_mask = mask_ref & mask_warped
        non_overlap_mask = mask_warped & ~mask_ref
        
        if show_overlap:
            overlap_visual = np.zeros(pano_image.shape, dtype=np.uint8)
            overlap_visual[overlap_mask] = pano_image[overlap_mask]
            plt.figure(figsize=(20, 10))
            plt.imshow(cv2.cvtColor(overlap_visual, cv2.COLOR_BGR2RGB))
            plt.title(f"Overlap Region: Image {j}")
            plt.axis('off')
            plt.show()

        pano_image[non_overlap_mask] = warped_target[non_overlap_mask]
        
        pano_image[overlap_mask] = cv2.addWeighted(
            pano_image[overlap_mask], 0.5, 
            warped_target[overlap_mask], 0.5, 0
        )

    print("All images merged.")
    name=imageset[17:]
    save_file("panorama"+name+".png",pano_image)

    return pano_image

In [None]:
image_dirs=["panorama_dataset\\v_bird","panorama_dataset\\v_boat","panorama_dataset\\v_circus","panorama_dataset\\v_graffiti","panorama_dataset\\v_soldiers","panorama_dataset\\v_weapons"]

for images in image_dirs:
    panorama=get_panorama(images,show_overlap=False)
    

2

In [None]:
def crop_to_aspect(image, ref_h, ref_w):
    ref_aspect = ref_w / ref_h
    img_h, img_w = image.shape[:2]
    img_aspect = img_w / img_h
    
    if img_aspect > ref_aspect:
        # image is wider than reference: Crop sides
        new_w = int(ref_aspect * img_h)
        x_start = (img_w - new_w) // 2
        return image[:, x_start:x_start + new_w]
    else:
        # image is taller than reference: Crop top/bottom
        new_h = int(img_w / ref_aspect)
        y_start = (img_h - new_h) // 2
        return image[y_start:y_start + new_h, :]


print("Setting up AR pipeline...")

sift = cv2.SIFT_create()

img_ref = cv2.imread("ar_dataset/cv_cover.jpg")
if img_ref is None:
    print("Error: Could not read ar_dataset/cv_cover.jpg")
else:
    gray_ref = cv2.cvtColor(img_ref, cv2.COLOR_BGR2GRAY)
    kp_ref, des_ref = sift.detectAndCompute(gray_ref, None)
    h_ref, w_ref = img_ref.shape[:2]
    print(f"Reference image loaded: {w_ref}x{h_ref}")

# open the TARGET video 
cap_target = cv2.VideoCapture("ar_dataset/book.mov")
cap_source = cv2.VideoCapture("ar_dataset/ar_source.mov")

w_target = int(cap_target.get(cv2.CAP_PROP_FRAME_WIDTH))
h_target = int(cap_target.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap_target.get(cv2.CAP_PROP_FPS)

output_filename = "ar_dynamic_result.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_filename, fourcc, fps, (w_target, h_target))


frame_count = 0
while cap_target.isOpened():
    
    ret_t, target_frame = cap_target.read()
    if not ret_t:
        print("End of target video.")
        break
    # get the frame
    ret_s, source_frame = cap_source.read()
    if not ret_s:
        cap_source.set(cv2.CAP_PROP_POS_FRAMES, 0)
        ret_s, source_frame = cap_source.read()
        if not ret_s:
            print("Error reading source video, even after looping.")
            break

    gray_frame = cv2.cvtColor(target_frame, cv2.COLOR_BGR2GRAY)
    kp_frame, des_frame = sift.detectAndCompute(gray_frame, None)
    
    good_matches, src_pts, dst_pts = match_features(kp_ref, des_ref, kp_frame, des_frame, cv2.NORM_L2)
    # get the book match square
    H_ref_to_target = None
    if len(good_matches) > 4:
        H_ref_to_target, _ = Ransac(src_pts, dst_pts, iterations=1000, thresh=5.0)

    if H_ref_to_target is not None:
        # get the part of the book and warp the video to that book cover
        cropped_source = crop_to_aspect(source_frame, h_ref, w_ref)
        
        resized_source = cv2.resize(cropped_source, (w_ref, h_ref))

        warped_source = cv2.warpPerspective(resized_source, H_ref_to_target, (w_target, h_target))
        
        mask = cv2.cvtColor(warped_source, cv2.COLOR_BGR2GRAY) > 0
        # than just paste the video frame 
        final_frame = target_frame.copy()
        
        final_frame[mask] = warped_source[mask]

    else:
        final_frame = target_frame
        print(f"Frame {frame_count}: Homography failed (matches={len(good_matches)})")

    out.write(final_frame)
    cv2.imshow("AR Result", final_frame)
    # get to the next frame and go on until end
    frame_count += 1
    print(frame_count)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap_target.release()
cap_source.release()
out.release()
cv2.destroyAllWindows()
print(f"AR processing complete. Saved to {output_filename}")