The **Part 1** of the project is divided into three sections:

1- Feature Extraction (Using SIFT)

2- Outlier Removal (Using RANSAC)

3- Computing the Homographies (Using DLT)


**pip install opencv-python**

**pip install opencv-contrib-python**

**Part 1**

In [1]:
#Imports
from numpy.linalg import eig
import numpy as np
import cv2
import os

In [2]:
#Display the video
capture = cv2.VideoCapture(os.path.abspath('trymefirst_lisbon.mp4'))
while(capture.isOpened()):
    ret, frame = capture.read()
    cv2.imshow('frame',frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):  ##press q if you want the video to stop 
        break
capture.release()
cv2.destroyAllWindows()

The following code **extracts SIFT features** from each frame of the input video

In [8]:

capture = cv2.VideoCapture(os.path.abspath('trymefirst_lisbon.mp4'))
kp_list = []
sift_points = [] #nome a definir no config
t = 0 
sift = cv2.SIFT_create(5000) #number of sift points
while True:
    t = t + 1
    if t == 5: break 
    success, frame = capture.read() #read the video
    if success:
        frame_points = []
        gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) #convert image to gray
        kp, des = sift.detectAndCompute(gray,None) #kp = keypoint, des = descriptor
        kp_list.append(kp)
        frame_points = ([kp[0].pt[0],kp[0].pt[1]]+des[0].tolist())
        for i in range(1,len(kp)):
             list = ([kp[i].pt[0],kp[i].pt[1]]+des[i].tolist())
             frame_points = np.column_stack((frame_points,list))  
    sift_points.append(frame_points) #append everything into a list 
print(des.shape)
print(len(sift_points))
#The keypoint is a point of interest in the image, the descriptor is a vector that describes the image patch around the keypoint

(5000, 128)


The following code **matches SIFT features** between the frames

In [13]:
#Brute force method
bf = cv2.BFMatcher(crossCheck=True) #crossCheck is set to true so that the match is symmetric
all_matches = []
match = []
for s in range(len(sift_points)-1):
    point_matches = []

    des1 = (((sift_points[s])[2:,:])).astype('float32')  # descriptors of the first frame
    des2 = (((sift_points[s+1])[2:,:])).astype('float32')  # descriptors of the second
    des1 = np.reshape(des1,(np.shape(des1)[1],128))
    des2 = np.reshape(des2,(np.shape(des2)[1],128))

    if np.shape(des1)[0] > np.shape(des2)[0]:
             des1 = des1[:-abs(np.shape(des1)[0]-np.shape(des2)[0]),:]  # we are removing the last points so that we have an equal amount of SIFT features between two frames
    if np.shape(des1)[0] < np.shape(des2)[0]:
             des2 = des2[:-abs(np.shape(des1)[0]-np.shape(des2)[0]),:]
    matches = bf.match(des1,des2)  # an error occurs if two frames have different amounts of SIFT features
    # try:
    # except: 
    #       print('Error!')        
             
    for i in range(len(matches)):
        match.append(matches)
        point_matches.append([matches[i].queryIdx,matches[i].trainIdx])

    all_matches.append(point_matches)
#Feature detection: opencv
#Matching : sklearn , numpy
#RANSAC: numpy
#Create Homography: numpy

The following code **computes the Homography** between the frames of the video

In [375]:
from sklearn import preprocessing
kp1 = kp_list[0]
kp2 = kp_list[1]
src_pts = np.float32([ kp1[q.queryIdx].pt for q in match[0] ]).reshape(-1,1,2)
dst_pts = np.float32([ kp2[t.trainIdx].pt for t in match[1] ]).reshape(-1,1,2)
src = np.reshape(src_pts,(np.shape(src_pts)[0],2))
dst = np.reshape(dst_pts,(np.shape(dst_pts)[0],2))
src = preprocessing.normalize(src)   #Normalization
dst = preprocessing.normalize(dst)

A = []
for p, q in zip(src, dst):
            x1 = p[0]
            y1 = p[1]
            x2 = q[0]
            y2 = q[1]
            A.append([-x1, -y1, -1, 0, 0, 0, x2*x1, x2*y1, x2])
            A.append([0, 0, 0, -x1, -y1, -1, y2*x1, y2*y1, y2])
_, _, Vt = np.linalg.svd(A, full_matrices=True)

eigenvalue,eigenvector=eig(np.matmul(np.transpose(A),A))

H = np.reshape(eigenvector[0],(3,3))
print(np.linalg.cond(H))

H2 =  Vt[-1,:].reshape(3, 3)
print(np.linalg.cond(H2))

1.6427274690329576
256563.54696650486


In [381]:
idx = eigenvalue.argsort()[::-1]   
eigenValues = eigenvalue[idx]
eigenVectors = eigenvector[:,idx]
eigenValues[-1]

0.11775348223449139

In [382]:
def Comp_H(src,dst):
        A = []
        for p, q in zip(src, dst):
            x1 = p[0]
            y1 = p[1]
            x2 = q[0]
            y2 = q[1]
            A.append([-x1, -y1, -1, 0, 0, 0, x2*x1, x2*y1, x2])
            A.append([0, 0, 0, -x1, -y1, -1, y2*x1, y2*y1, y2])

        eigenvalue,eigenvector=eig(np.matmul(np.transpose(A),A))
        idx = eigenvalue.argsort()[::-1]   
        eigenValues = eigenvalue[idx]
        eigenVectors = eigenvector[:,idx]
        eigenValues[-1]
        #_, _, Vt = np.linalg.svd(A, full_matrices=True)
        #x = Vt[-1]
        x=eigenVectors[-1]
        
        H = x.reshape(3, -1) / x[-1]
        return H

def RANSAC(Comp_H,src,dst,iter,threshold):
      best_homography = None
      inliers = [0]
      for t in range(iter):
            sample_indices = np.random.choice(int(len(src)), size=8, replace=False)
            #int(len(src)*0.1)
            # Compute the Homography
            H = Comp_H(src[sample_indices],dst[sample_indices])
            inl = 0
            for p, q in zip(src, dst):
                x1 = p[0]
                y1 = p[1]
                x2 = q[0]
                y2 = q[1]
            # Transform the point using the estimated homography
                transformed_point = np.dot(H, np.array([x1, y1, 1]))

            # Normalize the transformed point
                transformed_point /= transformed_point[2]

            # Calculate the Euclidean distance between the transformed point and the actual point
                distance = np.linalg.norm(np.array([x2, y2, 1]) - transformed_point)

                if distance < threshold:
                   inl += 1
            if inl > inliers[0]:
                 best_homography = H
                 inliers[0] = inl
      return best_homography, inliers[0] 
      

In [388]:
H, inliers = RANSAC(Comp_H,src,dst,500,0.8)
print('condition:',np.linalg.cond(H),'inliers: ', inliers)
H

condition: 4.908776732800521 inliers:  699


array([[1.03552178, 0.01145026, 0.30020634],
       [0.78608688, 0.58811479, 0.11137041],
       [0.95088638, 0.15565331, 1.        ]])

In [389]:
picture2 = np.reshape(np.array([552,59,1]),(3,1))
picture1 = np.reshape(np.array([549,56,1]),(3,1))
pic_h = np.matmul(H,picture1)
pic_h/pic_h[2]

array([[1.07087813],
       [0.87372768],
       [1.        ]])

**Testing Zone**

In [342]:
kp1 = kp_list[0]
kp2 = kp_list[1]
src_pts = np.float32([ kp1[all_matches[0][i][0]].pt for i in range(len(all_matches[0])) ]).reshape(-1,1,2)
dst_pts = np.float32([ kp2[all_matches[0][i][1]].pt for i in range(len(all_matches[0])) ]).reshape(-1,1,2)
M2, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
np.linalg.cond(M2)

kp1 = kp_list[0]
kp2 = kp_list[1]
src_pts = np.float32([ kp1[q.queryIdx].pt for q in match[0] ]).reshape(-1,1,2)
dst_pts = np.float32([ kp2[t.trainIdx].pt for t in match[1] ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
np.linalg.cond(M)

1.1183369800502505

In [343]:
pic_h2 = np.matmul(M,picture2)
pic_h2/pic_h2[2]

array([[552.38505024],
       [ 60.09752571],
       [  1.        ]])

In [213]:
def getPerspectiveTransform(src, dst):
    if len(src) == len(dst):
        # Make homogeneous coordiates if necessary
        if src.shape[1] == 2:
            src = np.hstack((src, np.ones((len(src), 1), dtype=src.dtype)))
        if dst.shape[1] == 2:
            dst = np.hstack((dst, np.ones((len(dst), 1), dtype=dst.dtype)))

        # Solve 'Ax = 0'
        A = []
        for p, q in zip(src, dst):
            A.append([0, 0, 0, q[2]*p[0], q[2]*p[1], q[2]*p[2], -q[1]*p[0], -q[1]*p[1], -q[1]*p[2]])
            A.append([q[2]*p[0], q[2]*p[1], q[2]*p[2], 0, 0, 0, -q[0]*p[0], -q[0]*p[1], -q[0]*p[2]])

        eigenvalue,eigenvector=eig(np.matmul(np.transpose(A),A))
        #_, _, Vt = np.linalg.svd(A, full_matrices=True)
        #x = Vt[-1]
        x = 

        # Reorganize `x` as a matrix
        H = x.reshape(3, -1) / x[-1] # Normalize the last element as 1
        return H
    

H_slides = getPerspectiveTransform(np.reshape(src_pts,(np.shape(src_pts)[0],2)), np.reshape(dst_pts,(np.shape(dst_pts)[0],2)))
np.linalg.cond(H_slides)

67711165.49974936

In [231]:
picture2 = np.reshape(np.array([552,59,1]),(3,1))
picture1 = np.reshape(np.array([549,56,1]),(3,1))
pic_h = np.matmul(H2,picture1)
pic_h/pic_h[2]

array([[0.78435055],
       [0.61151436],
       [1.        ]])

In [219]:
pic_h3 = np.matmul(H_slides,picture2)
pic_h3/pic_h3[2]

array([[492.55113247],
       [293.19723376],
       [  1.        ]])

In [117]:
def select_point(event,x,y,flags,param):
    global ix,iy
    if event == cv2.EVENT_LBUTTONDBLCLK: # captures left button double-click
        print('x = %d, y = %d'%(x, y))


In [None]:
capture = cv2.VideoCapture(os.path.abspath('trymefirst_lisbon.mp4'))
framenr = 0 
list_points = []
while True:
    success, frame = capture.read()
    if success:
        print('Current Frame!')
        cv2.namedWindow('frame')
        cv2.setMouseCallback('frame', select_point)
        cv2.imshow('frame',frame)
        if cv2.waitKey(0) & 0xFF == ord('q'):  ##press q if you want the video to stop 
             break
        key = cv2.waitKey(0) & 0xFF == ord('k')
        
        print('New Frame!')

capture.release()
cv2.destroyAllWindows()