# Image Stitcher

This notebook is the core of the Compositing phase of the image stitching pipeline. Given a set of images, a set of homographies that project each image in a common reference frame, we output the resulting stitched image.

## Importing libraries

In [None]:
import cv2 as cv
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np

## Image Stitcher Class

In [None]:
# Class developed to project the images given a set of homographies and marge the the results using the "maximum" aggregation function
class ImageStitcher:
    
    # Stores the set of reference images
    def __init__(self,  
                 imgs   # Set of images
                ):
        self.imgs=imgs

    # Removes the normalization transformation from the homographies
    def __denormalize_homographies(self,
                                   H,      # Set of homographies
                                   T_norm  # Normalization matrix 
                                  ):
        H_denorm = [ np.linalg.inv(T_norm) @ h @ T_norm for h in H]
        return H_denorm
    
    # Computes the translation to be applied to the projected images. 
    # The value of the translation is computed tanking the position of the left-most and top-most pixels over all the projected images, 
    # in order to translate them in the left and top margins of the rendering area.
    # The goal of this translation is thus to fit the projected images to the rendering area (of shape "size").
    # Returns: Translation matrix (Ht), new rendering size (size)
    def __compute_translation(self, 
                              idxs,                # Set of indexes that specifies which images should be taken into consideration
                              idx_ref,             # Index of the reference image
                              H,                   # Set of homopraphies, one for each image
                              size =  [5000,10000] # Maximum size of the rendering space
                             ):
    
        # global values that represents the top-lest and bottom-right corners of the area of the projecting space containing the images.
        min_x = 0
        min_y = 0
        max_x = 0
        max_y = 0

        #For each image
        for i in idxs:

            ## Get the shape of the image
            (Height, Width, _) = self.imgs[i].shape

            # Matrix of initial coordinates of the corners of the image
            # Stored in the following format: [[x1, x2, x3, x4], [y1, y2, y3, y4], [1, 1, 1, 1]]
            # Where (xt, yt) is the coordinate of the i th corner of the image. 
            InitialMatrix = np.array([[0, Width - 1, Width - 1, 0],
                                      [0, 0, Height - 1, Height - 1],
                                      [1, 1, 1, 1]])

            # Find the final coordinates of the corners of the image after the projection.
            # Here, the coordinates of the corners of the image may go out of the 
            # rendering area (negative values). The translation is compute to avoid this behavior.
            FinalMatrix = np.dot(H[i], InitialMatrix)

            # Normalize the projected points.
            [x, y, c] = FinalMatrix
            x = np.divide(x, c)
            y = np.divide(y, c)

            # Update the corners of the projection region.
            min_x = min(np.concatenate([x,[min_x]]))
            min_y = min(np.concatenate([y,[min_y]]))
            max_x = max(np.concatenate([x,[max_x]]))
            max_y = max(np.concatenate([y,[max_y]]))
        
        # Size of the the projected images.
        dy = int(max_y - min_y) 
        dx = int(max_x - min_x) 
        
        # If the size is bigger than the specified value, we heuristically compute the rendering area.
        # The rendering area will have the reference image in the middle. 
        if (dy > size[0] or dx > size[1]):
            
            # Specify how large the rendering area will be
            mult = 1.5
            h, w, _ = self.imgs[idx_ref].shape
            
            # define a rectangle with the reference image in the middle
            min_x = -w * mult
            min_y = -h * mult
            max_x = w + w * mult
            max_y = h + h * mult
            
            # recompute the size of the area
            dy = int(max_y - min_y) 
            dx = int(max_x - min_x) 
        
        size = [dy,dx]
        
        # Coordinates of the corners of the projected images
        src_pts =  np.array([[min_x, max_x - 1, max_x - 1, min_x],
                            [min_y,  min_y,     max_y - 1, max_y - 1]]).transpose()
        
        # Coordinates of the corners of the rendering space
        dst_pts =  np.array([[0, dx - 1, dx - 1, 0],
                            [0, 0, dy - 1, dy - 1]]).transpose()
        
        # Compute the translation using the DLT
        Ht, _ = cv.findHomography(src_pts, dst_pts)
        
        return Ht, size
    
    # Projects the images using the given homographies and merge the the results using the "maximum" aggregation function
    # Returns: Translation matrix (Ht), stitched image (stitch)
    def stitch_images(self, 
                      H,         # Set of homographies, one for each image
                      idx_ref,   # Index of the reference image
                      T_norm,    # Normalization matrix 
                      imgs_translations = None, # Set of translations to be applied on the images before the projection
                      idxs=None,  # Set of indexes that specifies which images should be taken into consideration
                      beautify=True,  # Specify if a translation should be applied to better fit the projected images to the rendering space
                      size = [5000,10000] # Maximum size of the rendering space
                     ):
        
        num = len(self.imgs)
        
        # If no indexes are specified, all images are taken into consideration
        if not idxs:
            idxs = list(range(num))
        
        # Denormalize the homographies, since they have to be applied on the original images
        H_denorm = self.__denormalize_homographies(H, T_norm)
        
        # If the set of translations is specified, we combine the projection and the translation together
        if imgs_translations is not None:
            H_denorm = [h @ np.linalg.inv(ht) for h, ht in zip(H_denorm, imgs_translations)]
        
        # If beatify is True, we compute the translation. Otherwise no translation is performed, the identity is used. 
        if beautify:
            Ht, size = self.__compute_translation(idxs, idx_ref, H_denorm, size)
        else:
            Ht = np.eye(3)
        
        # Initialize the rendering space
        stitch = np.zeros(size + [3], dtype=np.uint8)
       
        #For each image
        for i in idxs:
            
            #Apply the projection and the final translation  
            img_proj = cv.warpPerspective(self.imgs[i], Ht @ H_denorm[i], size[::-1])

            #Use maximum as stitch operator (very simple stitching mechanism)
            stitch = np.maximum(stitch,img_proj)

        return Ht, stitch
    
    # Used for testing purposes
    def stitch_images_denorm(self, H, idx_ref, idxs=None, beautify=True, size = [5000,10000]):
        
        num = len(self.imgs)
        if not idxs:
            idxs = list(range(num))
        
        if beautify:
            Ht, size = self.__compute_translation(idxs, H, size)
        else:
            Ht = np.eye(3)
        
        stitch = np.zeros(size + [3], dtype=np.uint8)
       
        #For each image
        for i in idxs:

            #Apply the homography
            img_proj = cv.warpPerspective(self.imgs[i], Ht @  H[i] , size[::-1])


            #Use maximum as stitch operator (very simple stitching mechanism)
            stitch = np.maximum(stitch,img_proj)

        return Ht, stitch