# Structure from motion example using a video

In [13]:
# Packages
import numpy as np
import cv2 as cv

## Load video and extract grey-scale images

In [191]:
video_file = r"C:\Fotos y videos\test SfM\video1.mp4"
video = cv.VideoCapture(video_file)

delta_t = 100 # ms

In [192]:
ret = True
current_time_ms = 0

images_list = []
while ret:
    video.set(cv.CAP_PROP_POS_MSEC, current_time_ms)
    ret, frame = video.read()
    key = cv.waitKey(0)
    if key == 27 or ret is False:
        break
    cv.imshow('press esc to stop', frame)
    images_list.append(cv.cvtColor(frame, cv.COLOR_BGR2GRAY))
    current_time_ms += delta_t

cv.destroyAllWindows()

### Display loaded images

In [193]:
for im in images_list:
    cv.imshow('Press Esc to stop', im)
    key = cv.waitKey(0)
    if key == 27:
        break
cv.destroyAllWindows()

## Extract image descriptors

In [194]:
sift = cv.SIFT_create()
kps_des_list = []
for im in images_list:
    kps, descriptors = sift.detectAndCompute(im, None) # No mask
    kps_des_list.append({'kps': kps,
                         'des': descriptors})
    

### Display found descriptors

In [195]:
for im, des_kps in zip(images_list, kps_des_list):
    im = cv.drawKeypoints(im, des_kps['kps'], im)
    cv.imshow('Press Esc to stop', im)
    key = cv.waitKey(0)
    if key == 27:
        break
cv.destroyAllWindows()

## Match found descriptos using k-nearest neighbours

In [200]:
# ratio of the nearest to the second nearest neighbour
ratio = 0.3

n_images = len(images_list)
matcher = cv.BFMatcher()

matches_list = []

for i in range(n_images - 1):
    good_matches = []
    matches = matcher.knnMatch(kps_des_list[i]['des'], kps_des_list[i+1]['des'], k=2)
    for m, n in matches:
        if m.distance < ratio*n.distance:
            good_matches.append(m)
    matches_list.append(good_matches)

### display good matches

In [201]:
for i, matches in enumerate(matches_list):
    im = cv.drawMatches(images_list[i], kps_des_list[i]['kps'], images_list[i+1], kps_des_list[i+1]['kps'], matches, im, 
                        flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
    cv.imshow('Press Esc to stop', im)
    key = cv.waitKey(0)
    if key == 27:
        break
cv.destroyAllWindows()

## Essential matrices estimation using RANSAC

In [202]:
# Camera matrix
camera_matrix = np.eye(3) # asume the center is in the upright corner and focal distance is 1 m
# f = 27 
# camera_matrix = np.array([[  f,   0,  images_list[0].shape[1]//2],
#                           [  0,   f,  images_list[0].shape[0]//2],
#                           [  0,   0,  1],])

reproj_E_masks_list = []
masked_matches_list = []

for i in range(n_images - 1):
    src_points = np.array([kps_des_list[i]['kps'][m.queryIdx].pt for m in matches_list[i]], dtype='float32')
    dst_points = np.array([kps_des_list[i+1]['kps'][m.trainIdx].pt for m in matches_list[i]], dtype='float32') 
    E, mask = cv.findEssentialMat(src_points, dst_points, camera_matrix, method=cv.RANSAC, threshold=.001)
    reproj_E_masks_list.append({"E": E, 
                                "mask": mask})
    # mask matches from reprojection error
    masked_matches = []
    for i_match in range(len(matches_list[i])):
        if mask[i_match] == 1:
            masked_matches.append(matches_list[i][i_match])
    masked_matches_list.append(masked_matches)


### Display matches after removing outliers

In [203]:
for i, matches in enumerate(matches_list):            
    im = cv.drawMatches(images_list[i], kps_des_list[i]['kps'], images_list[i+1], kps_des_list[i+1]['kps'], 
                        masked_matches_list[i], 
                        im, 
                        flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
    cv.imshow('Press Esc to stop', im)
    key = cv.waitKey(0)
    if key == 27:
        break
cv.destroyAllWindows()

## Camera relative poses estimation

In [204]:

R_list, t_list = [], []
for i in range(n_images - 1):
    src_points = np.array([kps_des_list[i]['kps'][m.queryIdx].pt for m in matches_list[i]], dtype='float32')
    dst_points = np.array([kps_des_list[i+1]['kps'][m.trainIdx].pt for m in matches_list[i]], dtype='float32')
    retval, R, t, mask = cv.recoverPose(
        E=reproj_E_masks_list[i]['E'],
        points1=src_points, points2=dst_points, 
        mask=reproj_E_masks_list[i]['mask'], 
        cameraMatrix=camera_matrix)
    R_list.append(R)
    t_list.append(t)


In [205]:
camera_positions = np.zeros((n_images, 3))

for i in range(n_images-1):
    camera_positions[i + 1, :] = camera_positions[i, :] + t_list[i].T

# Plots!

In [206]:
import plotly.express as px
import pandas as pd

df = pd.DataFrame(columns=['x', 'y', 'z'], data=camera_positions)
fig = px.scatter_3d(df, x='x', y='y', z='z')
fig.show()

# Triangulate points

In [207]:
points_3d_list = []
for i in range(n_images - 1):
    src_points = np.array([kps_des_list[i]['kps'][m.queryIdx].pt for m in matches_list[i]], dtype='float')
    dst_points = np.array([kps_des_list[i+1]['kps'][m.trainIdx].pt for m in matches_list[i]], dtype='float')
    
    projection_matrix1 = np.hstack((R_list[i-1], t_list[i-1])) if i > 0 else np.hstack((np.eye(3), np.zeros((3, 1))))
    projection_matrix2 = np.hstack((R_list[i], t_list[i]))
    
    points_4d_homogeneous = cv.triangulatePoints(projection_matrix1, projection_matrix2, src_points.T, dst_points.T)

    points_3d = cv.convertPointsFromHomogeneous(points_4d_homogeneous.T).reshape((-1,3))
    points_3d_list.append(points_3d)

points_3d_arr = np.vstack(points_3d_list)



In [208]:
points_3d_arr.shape

(6604, 3)

In [209]:
import plotly.express as px
import pandas as pd

df = pd.DataFrame(columns=['x', 'y', 'z'], data=points_3d_arr)
fig = px.scatter_3d(df, x='x', y='y', z='z')
fig.show()