Project #5: Video Stitching and Processing 

## CS445: Computational Photography - Spring 2020

### Setup


In [None]:
!pip uninstall opencv-python -y
# downgrade OpenCV a bit to use SIFT
# !pip install opencv-contrib-python==3.4.2.17 --force-reinstall
!pip install ffmpeg-python # for converting to video

import ffmpeg
import cv2
import numpy as np
import numpy.ma as ma
import os
import gc
from numpy.linalg import svd, inv
import utils
%matplotlib inline
from matplotlib import pyplot as plt
from typing import List, Any

In [None]:
# modify to where you store your project data including utils
datadir = "/home/abot/cs445_comp_photo/cs445_a5" 

utilfn = os.path.join(datadir, "utils.py")
!cp "$utilfn" .
imagesfn = os.path.join(datadir, "images")
!cp -r "$imagesfn" .

In [None]:
np.random.seed(182736745)

def _reference_homography_filepath():
  kRefHomographiesFileName = 'reference_homographies_arr'
  kHomographiesFilePath = os.path.join('images', kRefHomographiesFileName)
  return kHomographiesFilePath
  
def _homography_filepath():
  kHomographiesFileName = 'homographies_arr'
  kHomographiesFilePath = os.path.join('images', kHomographiesFileName)
  return kHomographiesFilePath

### Part I: Stitch two key frames 

#### This involves:
1. compute homography H between two frames; 
2. project each frame onto the same surface;
3. blend the surfaces.

Check that your homography is correct by plotting four points that form a square in frame 270 and their projections in each image.

In [None]:
def score_projection(pt1, pt2):
  '''
  Score corresponding to the number of inliers for RANSAC
  Input: pt1 and pt2 are 2xN arrays of N points such that pt1[:, i] and pt2[:,i] should
          be close in Euclidean distance if they are inliers
  Outputs: score (scalar count of inliers) and inliers (1xN logical array)
  '''
  kThreshold = 3.0
  _, N = pt1.shape
  
  euclid_dist = np.sqrt(np.sum(np.power(pt1 - pt2, 2), axis=0))
  # (N,) -> (1,N)
  euclid_dist = euclid_dist[np.newaxis, :]
  assert(euclid_dist.shape == (1,N))
  inliers = euclid_dist < kThreshold
  assert(inliers.shape == (1,N))
  score = np.sum(inliers)
  
  return score, inliers


def auto_homography(Ia,Ib, homography_func=None,normalization_func=None):
    '''
    Computes a homography that maps points from Ia to Ib

    Input: Ia and Ib are images
    Output: H is the homography

    '''
    if Ia.dtype == 'float32' and Ib.dtype == 'float32':
        Ia = (Ia*255).astype(np.uint8)
        Ib = (Ib*255).astype(np.uint8)
    
    Ia_gray = cv2.cvtColor(Ia,cv2.COLOR_BGR2GRAY)
    Ib_gray = cv2.cvtColor(Ib,cv2.COLOR_BGR2GRAY)

    # Initiate SIFT detector
    sift = cv2.xfeatures2d.SIFT_create()
    
    # find the keypoints and descriptors with SIFT
    kp_a, des_a = sift.detectAndCompute(Ia_gray,None)
    kp_b, des_b = sift.detectAndCompute(Ib_gray,None)    
    
    # BFMatcher with default params
    bf = cv2.BFMatcher()
    matches = bf.knnMatch(des_a,des_b, k=2)

    # Apply ratio test
    good = []
    for m,n in matches:
        if m.distance < 0.75*n.distance:
            good.append(m)
   
    numMatches = int(len(good))

    matches = good

    # Xa and Xb are 3xN matrices that contain homogeneous coordinates for the N
    # matching points for each image
    Xa = np.ones((3,numMatches))
    Xb = np.ones((3,numMatches))
    
    for idx, match_i in enumerate(matches):
        Xa[:,idx][0:2] = kp_a[match_i.queryIdx].pt
        Xb[:,idx][0:2] = kp_b[match_i.trainIdx].pt

    ## RANSAC
    niter = 1000
    best_score = 0
    n_to_sample = 4 # Put the correct number of points here

    for t in range(niter):
        # estimate homography
        subset = np.random.choice(numMatches, n_to_sample, replace=False)
        pts1 = Xa[:,subset]
        pts2 = Xb[:,subset]
        
        H_t = homography_func(pts1, pts2, normalization_func) # edit helper code below (computeHomography)

        
        # score homography
        Xb_ = np.dot(H_t, Xa) # project points from first image to second using H
        
        score_t, inliers_t = score_projection(Xb[:2,:]/Xb[2,:], Xb_[:2,:]/Xb_[2,:])

        if score_t > best_score:
            best_score = score_t
            H = H_t
            in_idx = inliers_t
    
    print('best score: {:02f}'.format(best_score), end='\r')

    # Optionally, you may want to re-estimate H based on inliers

    return H

In [None]:
def computeHomography(pts1, pts2, normalization_func=None):
    '''
    Compute homography that maps from pts1 to pts2 using SVD. Normalization is optional.
     
    Input: pts1 and pts2 are 3xN matrices for N points in homogeneous
    coordinates. 
    
    Output: H is a 3x3 matrix, such that pts2~=H*pts1
    '''
    kNumCols = 9
    three, N = pts1.shape
    assert(three == 3)
    assert(N == 4)
    orig_pts1 = pts1.copy()
    orig_pts2 = pts2.copy()
    
    # Normalize the points (x) matrices
    if normalization_func is not None:
      pts1, pts2, T, TP = normalization_func(pts1, pts2)
      # print(f'pts1\n {orig_pts1}\nvs. pts1_p\n{pts1}')
      # print(f'pts2\n {orig_pts2}\nvs. pts2_p\n{pts2}')
  
    # Aliases for indices that match lecture nomenclature. Homo coords [u, v, w]
    u, v = [0, 1]
    up, vp = [0, 1]
    
    A = np.zeros((2*N, kNumCols))
    A[::2,0] = -pts1[u, :]  # 0th col
    A[::2,1] = -pts1[v, :]  # 1st col
    A[::2,2] = -1.0         # 2nd col
    # next 3 cols are 0.
    A[::2,6] = pts1[u , :] * pts2[up, :]  # 6th col
    A[::2,7] = pts1[v , :] * pts2[up, :]  # 7th col
    A[::2,8] = pts2[up, :]                # 8th col
    
    # 1st 3 cols are 0.
    A[1::2,3] = -pts1[u, :]  # 3rd col
    A[1::2,4] = -pts1[v, :]  # 4th col
    A[1::2,5] = -1.0         # 5th col
    A[1::2,6] = pts1[u , :] * pts2[vp, :]  # 6th col
    A[1::2,7] = pts1[v , :] * pts2[vp, :]  # 7th col
    A[1::2,8] = pts2[vp, :]                # 8th col
   
    # S is sorted in descending order. diag(S)*V = (K,K) * (K,N) so last col is multiplied by smallest singular value.
    U, S, Vh = np.linalg.svd(A, compute_uv=True)
    h = Vh[-1, :]
    H = h.copy().reshape((three, three), order='C')
    assert(h[3] == H[1,0])  # Check reshape index correct.
    assert(h[5] == H[1,2])
    assert(h[7] == H[2,1])
    
    if normalization_func is not None:
      # H_prime back to H.
      H = np.linalg.inv(TP) @ H @ T

    # For scaling of H matrix by the w', or lamba homo coord see:
    # https://math.stackexchange.com/questions/494238/how-to-compute-homography-matrix-h-from-corresponding-points-2d-2d-planar-homog
    unscaled_homo = H @ pts1
    scaled_homo = unscaled_homo / unscaled_homo[-1, :]
    # if not np.allclose(orig_pts2, scaled_homo, rtol=0.01):
    #   print('WARNING:')
    #   print(f'orig_pts1\n {orig_pts1}')
    #   print(f'orig_pts2\n {orig_pts2}')
    #   print(f'H*pts1_unscaled \n{unscaled_homo}\n')
    #   print(f'H*pts1_scaled \n{scaled_homo}\n')
    #   print(f'orig_pts2 - H*pts1 diff {orig_pts2 - scaled_homo}\n')
    #   # assert(np.allclose(orig_pts2, scaled_homo, rtol=0.01))
    return H

def normalizeHomography(pts1, pts2):
  ''' Normalizes the x vectors in the equation x_p = H @ x
  
  Input: pts1 and pts2 are 3xN matrices for N points in homogeneous
  coordinates. 
  
  Output:
    pts1_p: transformed (0 mean, unit variance) version of pts1
    pts2_p: transformed (0 mean, unit variance) version of pts2
    T: the transform corresponding to pts1
    TP: the transform corresponding to pts2:
  
  We compute the homography on the transformed H so all points are the same scale
  around ~1. To invert the transform and recover original, unnormalized H, we can
  do the following:
      H = inv(TP) @ HP @T, 
 
  pts2~=H*pts1, so pts2 is x_p, and pts1 is x.
  
  ~x = T @ x
  ~x_p = Tp @ x_p
  
  Where ~x, and ~x_p are transformed versions of x and x_p with ~0 mean and unit
  variance.
  '''
  T = np.diag([1.0,1.0,1.0])
  TP = np.diag([1.0,1.0,1.0])
  sigma_T =  [1.0 / (e if e != 0.0 else 1e-9) for e in [np.std(pts1[0,:]), np.std(pts1[1,:]), 1.0]]  # x,y,1
  sigma_TP = [1.0 / (e if e != 0.0 else 1e-9) for e in [np.std(pts2[0,:]), np.std(pts2[1,:]), 1.0]]  # x,y,1
  sigma_T = np.diag(sigma_T)
  sigma_TP = np.diag(sigma_TP)
  
  mu_x, mu_y   = (np.mean(pts1[0,:]), np.mean(pts1[1,:]))  # x,y
  mu_xp, mu_yp = (np.mean(pts2[0,:]), np.mean(pts2[1,:]))  # x,y
  
  T[0, -1] = -mu_x
  T[1, -1] = -mu_y
  TP[0, -1] = -mu_xp
  TP[1, -1] = -mu_yp
  # print(f'T\n {T}')
  # print(f'TP\n {TP}')
  
  T = sigma_T @ T
  TP = sigma_TP @ TP
  # print(f'T  {T.shape}\n {T}')
  # print(f'TP {TP.shape}\n {TP}')
  
  pts1_p = T @ pts1
  pts2_p = TP @ pts2
  
  return pts1_p, pts2_p, T, TP

In [None]:
# images location
im1 = './images/input/frames/f0270.jpg'
im2 = './images/input/frames/f0450.jpg'

# Load an color image in grayscale
im1 = cv2.imread(im1)
im2 = cv2.imread(im2)

H = auto_homography(im1,im2, computeHomography, normalizeHomography)
# H = auto_homography(im1,im2, computeHomography)
print(H/H.max()) 

# plot the frames here
box_pts = np.array([[300, 400, 400, 300, 300], [100, 100, 200, 200, 100], [1, 1, 1, 1, 1]])
plt.figure()
plt.imshow(im1[:,:,[2,1,0]])
plt.plot(box_pts[0,:], box_pts[1, :], 'r-')

# TO DO: project points into im2 and display the projected lines on im2
unscaled_homo = H @ box_pts
scaled_homo = unscaled_homo / unscaled_homo[-1, :]
plt.figure()
plt.imshow(im2[:,:,[2,1,0]])
plt.plot(scaled_homo[0,:], scaled_homo[1, :], 'r-')



In [None]:
projectedWidth = 1600
projectedHeight = 600
Tr = np.array([[1, 0, 660], [0, 1, 120], [0, 0, 1]]).astype(np.float32)
canvas = np.zeros((projectedHeight, projectedWidth))

# Projects im1 and im2 according to T1 and T2 to an image of size WxH and then
# blends the projected images by filling any zero values in projIm2 with values
# from projIm1
projIm1 = cv2.warpPerspective(im1, np.dot(Tr,H), (projectedWidth, projectedHeight))
projIm2 = cv2.warpPerspective(im2, Tr, (projectedWidth, projectedHeight))
blendOut = utils.blendImages(projIm1, projIm2) 

plt.figure(figsize=(25,20))
plt.imshow(blendOut[:,:,[2,1,0]])


### Part II: Panorama using five key frames

Produce a panorama by mapping five key frames [90, 270, 450, 630, 810] onto the same reference frame 450.  


In [None]:
key_frames_idx = np.array([90, 270, 450, 630, 810])-1

frames = np.zeros((len(key_frames_idx), im1.shape[0], im1.shape[1], im1.shape[2]),dtype='uint8')
for n in range(len(key_frames_idx)):
  frames[n] = cv2.imread("./images/input/frames/f0{num}.jpg".format(num=str(key_frames_idx[n]+1).zfill(3)))

N = len(frames)
kKeyFrameHomographies = np.zeros((N, 3, 3), dtype=np.float32)

def warp_frames(frames: List[np.ndarray]) -> np.ndarray:
  _, oH, oW, _ = frames.shape
  projectedWidth = 1600
  projectedHeight = 600
  Tr = np.array([[1, 0, 660], [0, 1, 120], [0, 0, 1]]).astype(np.float32)
  canvas = np.zeros((projectedHeight, projectedWidth, 3))
  
  # We reference everything to the 450th frame. Add 1 so we keep the black
  # pixels when using blendImages.
  kRefFrame = frames[N//2].copy()
  prev_H = np.diag([1.0, 1.0, 1.0])
  prev_frame = kRefFrame.copy()
  count = 0
  for i, f in enumerate(frames[N//2-1::-1]):
    plt.figure(figsize=(10,8))
    plt.imshow(prev_frame[:,:,[2,1,0]])
    plt.title(f'PrevFrame {count}')
    plt.show()
    plt.figure(figsize=(10,8))
    plt.imshow(f[:,:,[2,1,0]])
    plt.title(f'Frame {count}')
    plt.show()
    c = prev_frame.copy()
    c[prev_frame == 0] = 1
    H = prev_H @ auto_homography(f, prev_frame, computeHomography, normalizeHomography)
    p1 = cv2.warpPerspective(f.copy(), np.dot(Tr, H), (projectedWidth, projectedHeight))
    p_ref = cv2.warpPerspective(c, np.dot(Tr, prev_H), (projectedWidth, projectedHeight))
    blendOut = utils.blendImages(p1.copy(), p_ref.copy())  # Ref is second argument.
    plt.figure(figsize=(25,20))
    plt.imshow(blendOut[:,:,[2,1,0]])
    plt.title(f'BlendOut {count}')
    plt.show()
    canvas = utils.blendImages(blendOut, canvas)
    
    inv_p1 = cv2.warpPerspective(p1.copy(), np.linalg.inv(np.dot(Tr, H)), (oW, oH))#, flags=cv2.WARP_INVERSE_MAP)
    inv_p_ref = cv2.warpPerspective(p_ref.copy(), np.linalg.inv(np.dot(Tr, prev_H)),
                                    (oW, oH))#, flags=cv2.WARP_INVERSE_MAP)
    plt.figure(figsize=(25,10))
    plt.subplot(121),plt.imshow(p1[:,:,[2,1,0]]),plt.title('Frame {count}')
    plt.subplot(122),plt.imshow(inv_p1[:,:,[2,1,0]]),plt.title(f'Inverse Frame {count}')
    plt.show()
    plt.subplot(121),plt.imshow(p_ref[:,:,[2,1,0]]),plt.title('Ref Frame {count}')
    plt.subplot(122),plt.imshow(inv_p_ref[:,:,[2,1,0]]),plt.title(f'Inverse Ref Frame {count}')
    plt.show()
    
    # Update loop variables.
    kKeyFrameHomographies[N//2-1-i, :, :] = H 
    prev_H = H.copy()
    prev_frame = f.copy()
    count += 1
    
  prev_H = np.diag([1.0, 1.0, 1.0])
  prev_frame = kRefFrame.copy()
  count = 0
  for i,f in enumerate(frames[N//2 + 1::1]):
    H = prev_H @ auto_homography(f, prev_frame, computeHomography, normalizeHomography)
    c = prev_frame.copy()
    c[prev_frame == 0] = 1
    p1 = cv2.warpPerspective(f, np.dot(Tr, H), (projectedWidth, projectedHeight))
    p_ref = cv2.warpPerspective(c, np.dot(Tr, prev_H), (projectedWidth, projectedHeight))
    blendOut = utils.blendImages(p1, p_ref)  # Ref is second argument.
    canvas = utils.blendImages(blendOut, canvas)
    # Update loop variables.
    kKeyFrameHomographies[N//2+1+i, :, :] = H
    prev_H = H.copy()
    prev_frame = f.copy()
    
  kKeyFrameHomographies[N//2, :, :] = np.diag([1.0, 1.0, 1.0])
  return canvas

canvas = warp_frames(frames)
plt.figure(figsize=(25,20))
plt.imshow(canvas[:,:,[2,1,0]])
cv2.imwrite('part2_panorama.jpg', canvas)

print(f'Key Frame Homographies \n{kKeyFrameHomographies}')
np.save(os.path.join('images', 'kKeyFrameHomographies_arr'), kKeyFrameHomographies, allow_pickle=True)
np.save(_reference_homography_filepath(), kKeyFrameHomographies, allow_pickle=True)

In [None]:
'''Cleanup'''
del im1
del im2
del frames
del kKeyFrameHomographies
gc.collect()

### Part 3: Map the video to the reference plane

Project each frame onto the reference frame (using same size panorama) to create a video that shows the portion of the panorama revealed by each frame

In [None]:
RUN_LONG_STUFF = True 

In [None]:
# read all the images
dir_frames = 'images/input/frames'
filenames = []
filesinfo = os.scandir(dir_frames)

filenames = [f.path for f in filesinfo if f.name.endswith(".jpg")]
filenames.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))

frameCount = len(filenames)
frameHeight, frameWidth, frameChannels = cv2.imread(filenames[0]).shape
frames = np.zeros((frameCount, frameHeight, frameWidth, frameChannels),dtype='uint8')

for idx, file_i in enumerate(filenames):
  frames[idx] = cv2.imread(file_i)

In [None]:
# TO DO part 3 solution
# create your video (see tips)

  
def save_homographies(homographies: np.ndarray):
  '''
  Input:
    homographies: (N,3,3)
  '''
  print(f'Saving homographies.')
  np.save(_homography_filepath(), homographies, allow_pickle=True)
  
def get_homographies() -> np.ndarray:
  homographies = np.load(_homography_filepath() + '.npy', allow_pickle=True)
  print(f'Homographies shape {homographies.shape}')
  return homographies

def _video_filepath():
  kVideoFileName = 'video_frames_arr'
  kVideoFilePath = os.path.join('images', kVideoFileName)
  return kVideoFilePath

def save_video(video_frames: np.ndarray):
  '''
  Input:
    video_frames: (900,3,3)
  '''
  print(f'Saving video.')
  np.save(_video_filepath(), video_frames, allow_pickle=True)
  
def get_video_frames() -> np.ndarray:
  video_frames = np.load(_video_filepath() + '.npy', allow_pickle=True)
  print(f'video_frames.shape {video_frames.shape}')
  return video_frames

def get_closest_key_frame(frame_index):
  kKeyFrameIndices = np.array([90, 270, 450, 630, 810])-1
  assert(frame_index >= 0 and frame_index < 900)
  closest_index = np.argmin(np.abs(kKeyFrameIndices - frame_index))
  return kKeyFrameIndices[closest_index], closest_index

if RUN_LONG_STUFF:
  # Set up the Ref frames. We find homography to closest one of these.
  N, H, W, C = frames.shape

  # Set up the canvas.
  projectedWidth = 1600
  projectedHeight = 600
  Tr = np.array([[1, 0, 660], [0, 1, 120], [0, 0, 1]]).astype(np.float32)
  canvas = np.zeros((projectedHeight, projectedWidth, 3))

  # Arrays to store transformed frames.
  reference_homographies = np.load(_reference_homography_filepath() + '.npy', allow_pickle=True)
  homographies = np.zeros((N, 3, 3))
  video_frames = np.zeros((N,) + canvas.shape, dtype='uint8')
  # (len(np.arange(0,N,100)),)
  print(f'Video Frames shape {video_frames.shape}, type: {video_frames.dtype}')

  count = 0
  debug_3 = False
  # for i in np.arange(0,N,100):
  for i in np.arange(0,N,1):
    print(f'Building video ... processing frame:{i:4}, count:{count:4}', end='\r')
    f = frames[i]
    ref_idx, ref_arr_idx = get_closest_key_frame(i)
    ref = frames[ref_idx].copy()
    ref_H = reference_homographies[ref_arr_idx, ...]
    H = ref_H @ auto_homography(f, ref, computeHomography, normalizeHomography)
    ref[ref == 0] = 1

    if debug_3:
      plt.figure(figsize=(10,8))
      plt.imshow(ref[:,:,[2,1,0]])
      plt.title(f'Ref frame {count}')
      plt.show()
      plt.figure(figsize=(10,8))
      plt.imshow(f[:,:,[2,1,0]])
      plt.title(f'Frame {count}')
      plt.show()

    p1 = cv2.warpPerspective(f, np.dot(Tr, H), (projectedWidth, projectedHeight))
    p_ref = cv2.warpPerspective(ref, np.dot(Tr, ref_H), (projectedWidth, projectedHeight))
    blendOut = utils.blendImages(p1, p_ref)  # Ref is second argument.
    canvas = utils.blendImages(blendOut, canvas)

    if debug_3:
      plt.figure(figsize=(25,20))
      plt.imshow(blendOut[:,:,[2,1,0]])
      plt.title(f'BlendOut {count}')
      plt.show()
      plt.figure(figsize=(25,20))
      plt.imshow(canvas[:,:,[2,1,0]])
      plt.title(f'Canvas')
      plt.show()

    homographies[i, :, :] = H
    video_frames[i, ...] = p1
    count += 1


In [None]:
# Save the video to disk
if RUN_LONG_STUFF:
  save_homographies(homographies)
  save_video(video_frames)

In [None]:
if RUN_LONG_STUFF:
  # Convert BGR to RGB, ffmpeg wants RGB.
  video_frames = get_video_frames()
  utils.vidwrite_from_numpy('part3_video_out.mp4',video_frames[...,[2,1,0]], framerate=30, vcodec='libx264')

In [None]:
# Load the video from disk
# video_ref_homography = np.load(_reference_homography_filepath() + '.npy',allow_pickle=True)
# video_homography = get_homographies()
# video = get_video()

In [None]:
'''Cleanup memory'''
del frames
del video_frames
del homographies
del reference_homographies
del canvas
gc.collect()

### Part 4: Create background panorama

Create a background panorama based on the result from Part 3.


In [None]:
RUN_LONG_STUFF_PT4 = True 

In [None]:
# Load the video from disk
if RUN_LONG_STUFF_PT4:
  video_ref_homographies = np.load(_reference_homography_filepath() + '.npy',allow_pickle=True)
  video_homographies = get_homographies()
  frames = get_video_frames()

In [None]:
if RUN_LONG_STUFF_PT4:
  video_frame_medians = np.zeros(frames.shape[1:], dtype=np.uint8)
  # frames_masked = ma.masked_array(frames, frames > 1).astype(np.uint8)
  # video_frame_medians[:,:,:] = ma.median(frames_masked, axis=0).astype(np.uint8)
  for r in range(frames.shape[1]):
    # for c in range(frames.shape[2]):
    frames_masked = ma.masked_array(frames[:, r, :, :], frames[:, r, :, :] <= 1).astype(np.uint8)
    video_frame_medians[r, :, :] = ma.median(frames_masked, axis=0).astype(np.uint8)
    print(f'row {r}.', end='\r')
  # assert(video_frame_medians.shape == frames.shape[1:])
  print(f'medians.shape {video_frame_medians.shape}')
  video_frame_medians = video_frame_medians[np.newaxis, ...]
  video_frame_medians = np.repeat(video_frame_medians, repeats=(900,), axis=0)
  print(f'video_frame_medians shape: {video_frame_medians.shape}')

  video_frame_medians_diff = np.abs(frames - video_frame_medians, dtype=np.uint8)
  # If the pixel is abs(kThreshold) from the median we replace it with the median.
  kThreshold = 5
  print(f'is_bg_mask {np.sum(video_frame_medians_diff < kThreshold)} of {video_frame_medians_diff.size} total are 1=unmasked.')

  modified_frames = frames.copy()
  print(f'modified shape {video_frame_medians.shape}')
  # Set modified = values where mask == 1, else modified unchanged.
  np.putmask(modified_frames, mask=(video_frame_medians_diff > kThreshold), values=video_frame_medians)

  gc.collect()

In [None]:
if RUN_LONG_STUFF_PT4:
  # Set up the Ref frames. We find homography to closest one of these.
  N, H, W, C = modified_frames.shape
  # Set up the canvas.
  projectedWidth = 1600
  projectedHeight = 600
  Tr = np.array([[1, 0, 660], [0, 1, 120], [0, 0, 1]]).astype(np.float32)
  canvas = np.zeros((projectedHeight, projectedWidth, 3))
  canvas2 = np.zeros((projectedHeight, projectedWidth, 3))
  for i in np.arange(0,N,10):
      debug_3 = True
      print(f'Rendering modified frames ... processing frame:{i:4}, count:{i:4}')
      f = modified_frames[i]
      f2 = frames[i]
      # ref_idx, ref_arr_idx = get_closest_key_frame(i)
      # ref = frames[ref_idx].copy()
      # ref_H = video_ref_homographies[ref_arr_idx, ...]
      # H = ref_H @ video_homographies[i, ...]
      # ref[ref == 0] = 1

      # if debug_3:
      #   plt.figure(figsize=(10,8))
      #   plt.imshow(ref[:,:,[2,1,0]])
      #   plt.title(f'Ref frame {i}')
      #   plt.show()
      #   plt.figure(figsize=(10,8))
      #   plt.imshow(f[:,:,[2,1,0]])
      #   plt.title(f'Frame {i}')
      #   plt.show()

      # p1 = cv2.warpPerspective(f, np.dot(Tr, H), (projectedWidth, projectedHeight))
      # p_ref = cv2.warpPerspective(ref, np.dot(Tr, ref_H), (projectedWidth, projectedHeight))
      # blendOut = utils.blendImages(p1, p_ref)  # Ref is second argument.
      canvas = utils.blendImages(f, canvas)
      canvas2 = utils.blendImages(f2, canvas2)

      if debug_3:
        # plt.figure(figsize=(25,20))
        # plt.imshow(blendOut[:,:,[2,1,0]])
        # plt.title(f'BlendOut {i}')
        # plt.show()
        plt.figure(figsize=(25,20))
        plt.imshow(canvas[:,:,[2,1,0]])
        plt.title(f'Canvas')
        plt.show()

  cv2.imwrite('part4_background_panorama.jpg', canvas)
  cv2.imwrite('part4_all_panorama.jpg', canvas2)

### Part 5: Create background movie

Generate a movie that looks like the input movie but shows only background pixels. For each frame of the movie, you need to estimate a projection from the panorama to that frame. Your solution can use the background image you created in Part 4 and the per-frame homographies you created in Part 3. 


In [None]:
RUN_LONG_STUFF_PT5 = True
del frames
gc.collect()

In [None]:
# Load the video from disk
if RUN_LONG_STUFF_PT5:
  video_ref_homographies = np.load(_reference_homography_filepath() + '.npy',allow_pickle=True)
  video_homographies = get_homographies()
  # frames = get_video_frames()
  panorama_bg = cv2.imread('part4_background_panorama.jpg')
  panorama_all = cv2.imread('part4_all_panorama.jpg')
  
  dir_frames = 'images/input/frames'
  filenames = []
  filesinfo = os.scandir(dir_frames)

  filenames = [f.path for f in filesinfo if f.name.endswith(".jpg")]
  filenames.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))

  frameCount = len(filenames)
  frameHeight, frameWidth, frameChannels = cv2.imread(filenames[0]).shape
  frames = np.zeros((frameCount, frameHeight, frameWidth, frameChannels),dtype='uint8')
  
  assert(frameCount == video_homographies.shape[0])

In [None]:
# if RUN_LONG_STUFF_PT5:
#   video_frame_means = np.zeros(frames.shape[1:], dtype=np.uint8)
#   # frames_masked = ma.masked_array(frames, frames > 1).astype(np.uint8)
#   # video_frame_means[:,:,:] = ma.median(frames_masked, axis=0).astype(np.uint8)
#   for r in range(frames.shape[1]):
#     # for c in range(frames.shape[2]):
#     frames_masked = ma.masked_array(frames[:, r, :, :], frames[:, r, :, :] <= 1).astype(np.uint8)
#     video_frame_means[r, :, :] = ma.median(frames_masked, axis=0).astype(np.uint8)
#     print(f'row {r}.')
#   # assert(video_frame_means.shape == frames.shape[1:])
#   print(f'means.shape {video_frame_means.shape}')
#   video_frame_means = video_frame_means[np.newaxis, ...]
#   video_frame_means = np.repeat(video_frame_means, repeats=(900,), axis=0)
#   print(f'video_frame_means shape: {video_frame_means.shape}')

#   video_frame_means_diff = np.abs(frames - video_frame_means, dtype=np.uint8)
#   kThreshold = 5
#   print(f'is_bg_mask {np.sum(video_frame_means_diff < kThreshold)} of {video_frame_means_diff.size} total are 1=unmasked.')

#   modified_frames = frames.copy()
#   print(f'modified shape {video_frame_means.shape}')
#   # Set modified = values where mask == 1, else modified unchanged.
#   np.putmask(modified_frames, mask=(video_frame_means_diff > kThreshold), values=video_frame_means)

#   gc.collect()

In [None]:
img = cv2.imread('sudokusmall.jpg')
rows,cols,ch = img.shape

pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])
pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])

M = cv2.getPerspectiveTransform(pts1,pts2)

dst = cv2.warpPerspective(img,M,(300,300))
 
plt.subplot(121),plt.imshow(img),plt.title('Input')
plt.subplot(122),plt.imshow(dst),plt.title('Output')
plt.show()

_, inv_M = cv2.invert(M)
# inverted = cv2.warpPerspective(dst, M, img.shape[:2], flags=cv2.WARP_INVERSE_MAP)
inverted = cv2.warpPerspective(dst, inv_M, img.shape[:2])
plt.subplot(121),plt.imshow(dst),plt.title('Output')
plt.subplot(122),plt.imshow(inverted),plt.title('Inverted')
plt.show()

In [None]:
def get_closest_key_frame(frame_index):
  kKeyFrameIndices = np.array([90, 270, 450, 630, 810])-1
  assert(frame_index >= 0 and frame_index < 900)
  closest_index = np.argmin(np.abs(kKeyFrameIndices - frame_index))
  return kKeyFrameIndices[closest_index], closest_index

if RUN_LONG_STUFF_PT5:
  # Set up the Ref frames. We find homography to closest one of these.
  N, Height, Width, C = frames.shape
  # Set up the canvas.
  projectedWidth, projectedHeight, _ = panorama_bg.shape
  Tr = np.array([[1, 0, 660], [0, 1, 120], [0, 0, 1]]).astype(np.float32)
  
  print(f'Tr \n{Tr}')
  print(f'Tr Inv \n{np.linalg.inv(Tr)}')
  # canvas = np.zeros((projectedHeight, projectedWidth, 3))
  for i in np.arange(0,N,1):
      debug_5 = True
      print(f'Rendering modified frames ... processing frame:{i:4}, count:{i:4}')
      # ref_idx, ref_arr_idx = get_closest_key_frame(i)
      # ref_H = video_ref_homographies[ref_arr_idx, ...]
      # H = ref_H @ video_homographies[i, ...]
      H = video_homographies[i, ...]

      # if debug_3:
      #   plt.figure(figsize=(10,8))
      #   plt.imshow(ref[:,:,[2,1,0]])
      #   plt.title(f'Ref frame {i}')
      #   plt.show()
      #   plt.figure(figsize=(10,8))
      #   plt.imshow(f[:,:,[2,1,0]])
      #   plt.title(f'Frame {i}')
      #   plt.show()

      # p1 = cv2.warpPerspective(f, np.dot(Tr, H), (projectedWidth, projectedHeight))
      # p_ref = cv2.warpPerspective(ref, np.dot(Tr, ref_H), (projectedWidth, projectedHeight))
      # blendOut = utils.blendImages(p1, p_ref)  # Ref is second argument.
      # canvas = utils.blendImages(f, canvas)
      fwd_H = np.dot(Tr, H)
      inv_H = np.dot(np.linalg.inv(H), np.linalg.inv(Tr))
      # print(f'H \n{H}')
      # print(f'H inv\n{np.linalg.inv(H)}')
      # print(f'fwd_H \n{fwd_H}')
      # print(f'inv_H \n{inv_H}')
      # print(f'fwd inv dot ~= 1\n{np.dot(inv_H, fwd_H)}')
      ret, cv2_inv_H = cv2.invert(np.dot(Tr, H))
      assert(np.allclose(inv_H, cv2_inv_H))
      # inverted = cv2.warpPerspective(panorama, cv2_inv_H,
      #                                (frameWidth, frameHeight))
      inverted = cv2.warpPerspective(panorama_bg, fwd_H,
                                     (frameWidth, frameHeight), flags=cv2.WARP_INVERSE_MAP)
      frames[i, ...] = inverted

      if debug_5 and (i % 50) == 0:
        # plt.figure(figsize=(25,20))
        # plt.imshow(blendOut[:,:,[2,1,0]])
        # plt.title(f'BlendOut {i}')
        # plt.show()
        plt.figure(figsize=(25,20))
        plt.imshow(frames[i][:,:,[2,1,0]])
        plt.title(f'Frame Inverted {i}')
        plt.show()

  np.save('part5_frames_arr.npy', frames, allow_pickle=True)

In [None]:
if RUN_LONG_STUFF_PT5:
  video_frames = np.load('part5_frames_arr.npy', allow_pickle=True)
  # Convert BGR to RGB, ffmpeg wants RGB.
  utils.vidwrite_from_numpy('part5_video_out.mp4', video_frames[...,[2,1,0]], framerate=30, vcodec='libx264')

In [None]:
if RUN_LONG_STUFF_PT5:
  del frames
  del video_frames

### Part 6: Create foreground movie

In the background video, moving objects are removed. In each frame, those pixels that are different enough than the background color are considered foreground. For each frame determine foreground pixels and generate a movie that emphasizes or includes only foreground pixels.

In [None]:
RUN_LONG_STUFF_PT6 = True

In [None]:
if RUN_LONG_STUFF_PT6:
  video_ref_homographies = np.load(_reference_homography_filepath() + '.npy',allow_pickle=True)
  video_homographies = get_homographies()
  frames = get_video_frames()

In [None]:
# if RUN_LONG_STUFF_PT6:
#   video_frame_means = np.zeros(frames.shape[1:], dtype=np.uint8)
#   # frames_masked = ma.masked_array(frames, frames > 1).astype(np.uint8)
#   # video_frame_means[:,:,:] = ma.median(frames_masked, axis=0).astype(np.uint8)
#   for r in range(frames.shape[1]):
#     # for c in range(frames.shape[2]):
#     frames_masked = ma.masked_array(frames[:, r, :, :], frames[:, r, :, :] <= 1).astype(np.uint8)
#     video_frame_means[r, :, :] = ma.median(frames_masked, axis=0).astype(np.uint8)
#     print(f'row {r}.', end='\r')
#   # assert(video_frame_means.shape == frames.shape[1:])
#   print(f'means.shape {video_frame_means.shape}')
#   video_frame_means = video_frame_means[np.newaxis, ...]
#   video_frame_means = np.repeat(video_frame_means, repeats=(900,), axis=0)
#   print(f'video_frame_means shape: {video_frame_means.shape}')

#   video_frame_means_diff = np.abs(frames - video_frame_means, dtype=np.uint8)
#   # If the pixel is abs(kThreshold) from the median we replace it with the median.
#   kThreshold = 20
#   print(f'is_bg_mask {np.sum(video_frame_means_diff > kThreshold)} of {video_frame_means_diff.size} total are 1=unmasked.')

#   modified_frames = frames.copy()
#   print(f'modified shape {video_frame_means.shape}')
#   # Set modified = values where mask == 1, else modified unchanged.
#   np.putmask(modified_frames, mask=(video_frame_means_diff < kThreshold), values=video_frame_means)

#   gc.collect()

In [None]:
# if RUN_LONG_STUFF_PT6:
#   # Set up the Ref frames. We find homography to closest one of these.
#   N, H, W, C = modified_frames.shape
#   # Set up the canvas.
#   projectedWidth = 1600
#   projectedHeight = 600
#   Tr = np.array([[1, 0, 660], [0, 1, 120], [0, 0, 1]]).astype(np.float32)
#   canvas = np.zeros((projectedHeight, projectedWidth, 3))
#   canvas2 = np.zeros((projectedHeight, projectedWidth, 3))
#   for i in np.arange(0,N,100):
#       debug_3 = True
#       print(f'Rendering modified frames ... processing frame:{i:4}, count:{i:4}')
#       f = modified_frames[i]
#       f2 = frames[i]
#       # ref_idx, ref_arr_idx = get_closest_key_frame(i)
#       # ref = frames[ref_idx].copy()
#       # ref_H = video_ref_homographies[ref_arr_idx, ...]
#       # H = ref_H @ video_homographies[i, ...]
#       # ref[ref == 0] = 1

#       # if debug_3:
#       #   plt.figure(figsize=(10,8))
#       #   plt.imshow(ref[:,:,[2,1,0]])
#       #   plt.title(f'Ref frame {i}')
#       #   plt.show()
#       #   plt.figure(figsize=(10,8))
#       #   plt.imshow(f[:,:,[2,1,0]])
#       #   plt.title(f'Frame {i}')
#       #   plt.show()

#       # p1 = cv2.warpPerspective(f, np.dot(Tr, H), (projectedWidth, projectedHeight))
#       # p_ref = cv2.warpPerspective(ref, np.dot(Tr, ref_H), (projectedWidth, projectedHeight))
#       # blendOut = utils.blendImages(p1, p_ref)  # Ref is second argument.
#       canvas = utils.blendImages(f, canvas)
#       canvas2 = utils.blendImages(f2, canvas2)

#       if debug_3:
#         # plt.figure(figsize=(25,20))
#         # plt.imshow(blendOut[:,:,[2,1,0]])
#         # plt.title(f'BlendOut {i}')
#         # plt.show()
#         plt.figure(figsize=(25,20))
#         plt.imshow(canvas[:,:,[2,1,0]])
#         plt.title(f'Canvas')
#         plt.show()

#   cv2.imwrite('part6_foreground_panorama.jpg', canvas)
#   cv2.imwrite('part6_all_panorama.jpg', canvas2)

In [None]:
if RUN_LONG_STUFF_PT6: 
  video_ref_homographies = np.load(_reference_homography_filepath() + '.npy',allow_pickle=True)
  video_homographies = get_homographies()
  panorama_bg = cv2.imread('part4_background_panorama.jpg')
  panorama_all = cv2.imread('part4_all_panorama.jpg')
  
  dir_frames = 'images/input/frames'
  filenames = []
  filesinfo = os.scandir(dir_frames)

  filenames = [f.path for f in filesinfo if f.name.endswith(".jpg")]
  filenames.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))

  frameCount = len(filenames)
  frameHeight, frameWidth, frameChannels = cv2.imread(filenames[0]).shape
  frames = np.zeros((frameCount, frameHeight, frameWidth, frameChannels),dtype='uint8')
  
  orig_frames = np.zeros((frameCount, frameHeight, frameWidth, frameChannels),dtype='uint8')
  for n in range(len(key_frames_idx)):
    orig_frames[n] = cv2.imread("./images/input/frames/f0{num}.jpg".format(num=str(key_frames_idx[n]+1).zfill(3)))


In [None]:
if RUN_LONG_STUFF_PT6: 
  video_ref_homographies = np.load(_reference_homography_filepath() + '.npy',allow_pickle=True)
  video_homographies = get_homographies()
  panorama_bg = cv2.imread('part4_background_panorama.jpg')
  panorama_all = cv2.imread('part4_all_panorama.jpg')
  
  dir_frames = 'images/input/frames'
  filenames = []
  filesinfo = os.scandir(dir_frames)

  filenames = [f.path for f in filesinfo if f.name.endswith(".jpg")]
  filenames.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))

  frameCount = len(filenames)
  frameHeight, frameWidth, frameChannels = cv2.imread(filenames[0]).shape
  frames = np.zeros((frameCount, frameHeight, frameWidth, frameChannels),dtype='uint8')
  
  orig_frames = np.zeros((frameCount, frameHeight, frameWidth, frameChannels),dtype='uint8')
  for idx, file_i in enumerate(filenames):
    orig_frames[idx] = cv2.imread(file_i)

  
# TO DO part 6
def get_closest_key_frame(frame_index):
  kKeyFrameIndices = np.array([90, 270, 450, 630, 810])-1
  assert(frame_index >= 0 and frame_index < 900)
  closest_index = np.argmin(np.abs(kKeyFrameIndices - frame_index))
  return kKeyFrameIndices[closest_index], closest_index

if RUN_LONG_STUFF_PT5:
  # Set up the Ref frames. We find homography to closest one of these.
  N, Height, Width, C = frames.shape
  # Set up the canvas.
  projectedWidth, projectedHeight, _ = panorama_bg.shape
  Tr = np.array([[1, 0, 660], [0, 1, 120], [0, 0, 1]]).astype(np.float32)
  
  print(f'Tr \n{Tr}')
  print(f'Tr Inv \n{np.linalg.inv(Tr)}')
  # canvas = np.zeros((projectedHeight, projectedWidth, 3))
  for i in np.arange(0,N,1):
      debug_5 = False 
      print(f'Rendering modified frames ... processing frame:{i:4}, count:{i:4}', end='\r')
      ref_idx, ref_arr_idx = get_closest_key_frame(i)
      # ref_H = video_ref_homographies[ref_arr_idx, ...]
      # H = ref_H @ video_homographies[i, ...]
      H = video_homographies[i, ...]

      # if debug_3:
      #   plt.figure(figsize=(10,8))
      #   plt.imshow(ref[:,:,[2,1,0]])
      #   plt.title(f'Ref frame {i}')
      #   plt.show()
      #   plt.figure(figsize=(10,8))
      #   plt.imshow(f[:,:,[2,1,0]])
      #   plt.title(f'Frame {i}')
      #   plt.show()

      # p_ref = cv2.warpPerspective(ref, np.dot(Tr, ref_H), (projectedWidth, projectedHeight))
      # blendOut = utils.blendImages(p1, p_ref)  # Ref is second argument.
      # canvas = utils.blendImages(f, canvas)
      fwd_H = np.dot(Tr, H)
      inv_H = np.dot(np.linalg.inv(H), np.linalg.inv(Tr))
      # print(f'H \n{H}')
      # print(f'H inv\n{np.linalg.inv(H)}')
      # print(f'fwd_H \n{fwd_H}')
      # print(f'inv_H \n{inv_H}')
      # print(f'fwd inv dot ~= 1\n{np.dot(inv_H, fwd_H)}')
      ret, cv2_inv_H = cv2.invert(np.dot(Tr, H))
      assert(np.allclose(inv_H, cv2_inv_H))
      # inverted = cv2.warpPerspective(panorama, cv2_inv_H,
      #                                (frameWidth, frameHeight))
      inverted_bg = cv2.warpPerspective(panorama_bg, fwd_H,
                                     (frameWidth, frameHeight), flags=cv2.WARP_INVERSE_MAP)
      # inverted_all = cv2.warpPerspective(panorama_all, fwd_H,
      #                                (frameWidth, frameHeight), flags=cv2.WARP_INVERSE_MAP)
      frames[i, ...] = orig_frames[i, ...] - inverted_bg
      mean_bg_colors = np.mean(inverted_bg, axis=(0,1)).astype(np.uint8)
      frames[i, ...] += mean_bg_colors

      if debug_5 and (i % 50) == 0:
        f = frames[i, ...]
        print(f'min: {np.min(f)}, max: {np.max(f)}, '
              f'mean: {np.mean(f)}, median: {np.median(f)}')
        print(f)
        plt.figure(figsize=(25,20))
        plt.imshow(orig_frames[i][:,:,[2,1,0]])
        plt.title(f'Frame Original {i}')
        plt.show()
        plt.figure(figsize=(25,20))
        plt.imshow(inverted_bg[:,:,[2,1,0]])
        plt.title(f'Frame Inverted {i}')
        plt.show()
        plt.figure(figsize=(25,20))
        plt.imshow(frames[i][:,:,[2,1,0]])
        plt.title(f'Frame Foreground {i}')
        plt.show()

  np.save('part6_frames_arr.npy', frames, allow_pickle=True)

In [None]:
del frames
del orig_frames
gc.collect()

In [None]:
if RUN_LONG_STUFF_PT5:
  video_frames = np.load('part6_frames_arr.npy', allow_pickle=True)
  # Convert BGR to RGB, ffmpeg wants RGB.
  utils.vidwrite_from_numpy('part6_video_out.mp4', video_frames[...,[2,1,0]], framerate=30, vcodec='libx264')

## Bells and whistles