In [1]:
import numpy as np
from numpy import linalg as LA
from scipy.io import loadmat
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.pyplot import cm
import matplotlib as mpl
import cv2
import computer_vision as cv
from icecream import ic
from tqdm import trange
import time
from get_dataset_info import *

# %load_ext snakeviz
# %matplotlib inline
%matplotlib qt
%config InlineBackend.figure_format = 'retina'
from matplotlib import rc
rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
rc('text', usetex=True)

In [2]:
def estimate_E_robust_old(K, x1_norm, x2_norm, n_its, n_samples, err_threshold_px, alpha):
    
    err_threshold = err_threshold_px / K[0,0]
    best_inliers = None
    best_E = None
    max_inliers = 0
    epsilon = 0
    T = n_its
    n_points = x1_norm.shape[1]

    for t in trange(n_its):

        rand_mask = np.random.choice(np.size(x1_norm,1), n_samples, replace=False)
        E = cv.estimate_E_DLT(x1_norm[:,rand_mask], x2_norm[:,rand_mask], enforce=True, verbose=False)

        D1, D2 = cv.compute_epipolar_errors(E, x1_norm, x2_norm)
        inliers = ((D1**2 + D2**2) / 2) < err_threshold**2

        n_inliers = np.sum(inliers)

        if n_inliers > max_inliers:
            best_inliers = np.copy(inliers)
            best_E = np.copy(E)
            max_inliers = n_inliers
            print('No. inliers:', np.sum(inliers), end='\r')

            # Extract R and T

            # epsilon = max_inliers / n_points
            # T = cv.compute_ransac_iterations(alpha, epsilon, n_samples)
            # print('New T:', T, 'New epsilon:', epsilon)
            # if t >= 4*T-1:
            #     print('Bailout at iteration:', t, T)
            #     break
        
    return best_E, best_inliers

In [3]:
def compute_valid_inliers(P1, P2, X, inliers):

    x1_norm_valid = P1 @ X
    x2_norm_valid = P2 @ X
    valid_coords_P1 = x1_norm_valid[-1,:] > 0
    valid_coords_P2 = x2_norm_valid[-1,:] > 0
    valid_coords = valid_coords_P1 * valid_coords_P2
    valid_inliers = inliers * valid_coords

    return valid_inliers

In [4]:
def estimate_H_DLT(img1_pts, img2_pts, verbose=False):

    n = np.size(img1_pts,1)
    M = []

    for i in range(n):

        x = img1_pts[0,i]
        y = img1_pts[1,i]

        u = img2_pts[0,i]
        v = img2_pts[1,i]

        m = np.array([[x, y, 1, 0, 0, 0, -u*x, -u*y, -u],
                      [0, 0, 0, x, y, 1, -v*x, -v*y, -v]])

        M.append(m)

    M = np.concatenate(M, 0)
    U, S, VT = LA.svd(M, full_matrices=False)
    H = np.stack([VT[-1, i:i+3] for i in range(0, 9, 3)], 0)

    if verbose:
        M_approx = U @ np.diag(S) @ VT
        v = VT[-1,:] # last row of VT because optimal v should be last column of V
        Mv = M @ v
        print('\n||Mv||:', (Mv @ Mv)**0.5)
        print('||v||^2:', v @ v)
        print('max{||M - M_approx||}:', np.max(np.abs(M - M_approx)))
        print('S:', S)

    return H

In [5]:
def homography_to_RT(H, x1, x2):
    def unitize(a, b):
        denom = 1.0 / np.sqrt(a**2 + b**2)
        ra = a * denom
        rb = b * denom
        return ra, rb

    # Check the right sign for H
    if LA.det(H) < 0:
        H *= -1 
        
    N = x1.shape[1]
    if x1.shape[0] != 3:
        x1 = np.vstack([x1, np.ones((1, N))])
    if x2.shape[0] != 3:
        x2 = np.vstack([x2, np.ones((1, N))])

    positives = np.sum(np.sum(x2 * (H @ x1), axis=0) > 0)
    if positives < (N / 2):
        H *= -1

    U, S, VT = np.linalg.svd(H, full_matrices=False)
    V = VT.T
    s1 = S[0] / S[1]
    s3 = S[2] / S[1]
    zeta = s1 - s3
    a1 = np.sqrt(1 - s3**2)
    b1 = np.sqrt(s1**2 - 1)
    a, b = unitize(a1, b1)
    c, d = unitize(1+s1*s3, a1*b1)
    e, f = unitize(-b/s1, -a/s3)
    v1, v3 = V[:, 0], V[:, 2]
    n1 = b * v1 - a * v3
    n2 = b * v1 + a * v3
    R1 = U @ np.array([[c, 0, d], [0, 1, 0], [-d, 0, c]]) @ VT
    R2 = U @ np.array([[c, 0, -d], [0, 1, 0], [d, 0, c]]) @ VT
    t1 = e * v1 + f * v3
    t2 = e * v1 - f * v3
    if n1[2] < 0:
        t1 = -t1
        n1 = -n1
    if n2[2] < 0:
        t2 = -t2
        n2 = -n2

    # Move from Triggs' convention H = R*(I - t*n') to H&Z notation H = R - t*n'
    t1 = R1 @ t1
    t2 = R2 @ t2

    # Verify that we obtain the initial homography back
    # H /= S[1]
    # print(np.linalg.norm(R1 - zeta * np.outer(t1, n1) - H), np.linalg.norm(R2 - zeta * np.outer(t2, n2) - H))

    return R1, t1, R2, t2

# Example usage:
# H is the homography matrix, x1 and x2 are the corresponding 2D points
# R1, t1, R2, t2 = homography_to_RT(H, x1, x2)

In [6]:
def compute_point_point_distance(x_proj, x_img):
    distance_arr = LA.norm(x_proj - x_img, axis=0)
    return distance_arr

In [7]:
# P1 = cv.get_canonical_camera()
# best_P2 = None
# best_X = None
# max_inliers = 0
# if epsilon > epsilon_E:

#     # P2_arr = cv.extract_P_from_E(E)
#     # X_arr = cv.compute_triangulated_X_from_extracted_P2_solutions(P1, P2_arr, x1_norm, x2_norm)
#     # P2_valid, X_valid = cv.extract_valid_camera_and_points(P1, P2_arr, X_arr)
#     # valid_inliers = compute_valid_inliers(P1, P2_valid, X_valid, inliers)
#     # n_valid_inliers = np.sum(valid_inliers)
#     # print(n_valid_inliers, n_inliers)

#     # best_P2 = np.copy(P2_valid)
#     # best_X = np.copy(X_valid)
#     best_E = np.copy(E)
#     best_inliers = np.copy(inliers)
#     epsilon_E = epsilon
#     print('No. inliers:', n_inliers, end='\r')

#     # if n_valid_inliers > max_inliers:
#     #     best_inliers = np.copy(valid_inliers)
#     #     best_P2 = np.copy(P2_valid)
#     #     max_inliers = n_valid_inliers
#     #     print('No. valid inliers:', n_valid_inliers, end='\r')

#         # epsilon = max_inliers / n_points
#         # T = cv.compute_ransac_iterations(alpha, epsilon, n_samples)
#         # print('New T:', T, 'New epsilon:', epsilon)
#         # if t >= 4*T-1:
#         #     print('Bailout at iteration:', t, T)
#         #     break

# valid_inliers = compute_valid_inliers(P1, best_P2, best_X, best_inliers)
# n_valid_inliers = np.sum(valid_inliers)
# print('No. valid inliers:', n_valid_inliers, 'No. inliers:', np.sum(best_inliers))


In [8]:
def compute_E_validity(E):
    rank = LA.matrix_rank(E)
    valid = True if rank == 2 else False
    return valid

In [9]:
def compute_E_inliers(E, x1_norm, x2_norm, err_threshold):
    
    distance1_arr, distance2_arr = cv.compute_epipolar_errors(E, x1_norm, x2_norm)
    inliers = ((distance1_arr**2 + distance2_arr**2) / 2) < err_threshold**2
    n_inliers = np.sum(inliers)
    epsilon_E = n_inliers / x1_norm.shape[1]

    return epsilon_E, inliers

In [10]:
def verbose_E_robust(t, T_E, T_H, epsilon_E, epsilon_H, inliers, method):
    print('Iteration:', t, 'T_E:', T_E, 'T_H:', T_H, 'epsilon_E:', np.round(epsilon_E, 2), 'epsilon_H:', np.round(epsilon_H, 2), 'No. inliers:', np.sum(inliers), 'From:', method)

In [16]:
def estimate_E_robust(K, x1_norm, x2_norm, min_its, max_its, scale_its, alpha, err_threshold_px, verbose=False):
    
    err_threshold = err_threshold_px / K[0,0]
    best_E = None
    best_inliers = None
    n_points = x1_norm.shape[1]
    n_E_samples = 8
    n_H_samples = 4
    best_epsilon_E = 0
    best_epsilon_H = 0
    T_E = max_its
    T_H = max_its

    t = 0
    while t < T_E and t < T_H:
        t += 1

        rand_mask = np.random.choice(n_points, n_E_samples, replace=False)
        E = cv.estimate_E_DLT(x1_norm[:,rand_mask], x2_norm[:,rand_mask], enforce=True, verbose=False)
        E_valid = compute_E_validity(E)

        if E_valid:
            epsilon_E, inliers = compute_E_inliers(E, x1_norm, x2_norm, err_threshold)
                
            if epsilon_E > best_epsilon_E:
                best_E = np.copy(E)
                best_inliers = np.copy(inliers)
                best_epsilon_E = epsilon_E
                T_E = cv.compute_ransac_iterations(alpha, best_epsilon_E, n_E_samples, min_its, max_its, scale_its)

                if verbose:
                    verbose_E_robust(t, T_E, T_H, best_epsilon_E, best_epsilon_H, best_inliers, method='E 8-point alg.')
        
        rand_mask = np.random.choice(n_points, n_H_samples, replace=False)
        H = estimate_H_DLT(x1_norm[:,rand_mask], x2_norm[:,rand_mask], verbose=False)
        x2_norm_proj = cv.dehomogenize(H @ x1_norm)
        distance_arr = compute_point_point_distance(x2_norm_proj, x2_norm)
        inliers = distance_arr**2 < err_threshold**2
        n_inliers = np.sum(inliers)
        epsilon_H = n_inliers / n_points

        if epsilon_H > best_epsilon_H:
            
            # num, Rs, Ts, Ns = cv2.decomposeHomographyMat(H, np.eye(3))
            R1, T1, R2, T2 = homography_to_RT(H, x1_norm, x2_norm)
            E1 = cv.compute_E_from_R_and_T(R1, T1)
            E2 = cv.compute_E_from_R_and_T(R2, T2)

            E1_valid = compute_E_validity(E1)
            E2_valid = compute_E_validity(E2)

            if E1_valid:
                epsilon_E, inliers = compute_E_inliers(E1, x1_norm, x2_norm, err_threshold)
                    
                if epsilon_E > best_epsilon_E:
                    best_E = np.copy(E1)
                    best_inliers = np.copy(inliers)
                    best_epsilon_E = epsilon_E
                    best_epsilon_H = epsilon_H
                    T_E = cv.compute_ransac_iterations(alpha, best_epsilon_E, n_E_samples, min_its, max_its, scale_its)
                    T_H = cv.compute_ransac_iterations(alpha, best_epsilon_H, n_H_samples, min_its, max_its, scale_its)

                    if verbose:
                        verbose_E_robust(t, T_E, T_H, best_epsilon_E, best_epsilon_H, best_inliers, method='H 4-point alg.')

            if E2_valid:
                epsilon_E, inliers = compute_E_inliers(E2, x1_norm, x2_norm, err_threshold)
                    
                if epsilon_E > best_epsilon_E:
                    best_E = np.copy(E2)
                    best_inliers = np.copy(inliers)
                    best_epsilon_E = epsilon_E
                    best_epsilon_H = epsilon_H
                    T_E = cv.compute_ransac_iterations(alpha, best_epsilon_E, n_E_samples, min_its, max_its, scale_its)
                    T_H = cv.compute_ransac_iterations(alpha, best_epsilon_H, n_H_samples, min_its, max_its, scale_its)
                    
                    if verbose:
                        verbose_E_robust(t, T_E, T_H, best_epsilon_E, best_epsilon_H, best_inliers, method='H 4-point alg.')
    
    print('Bailout at iteration:', t)
    return best_E, best_inliers

In [12]:
data_set = 0
K, img_names, init_pair, pixel_threshold = get_dataset_info(data_set)
K_inv = LA.inv(K)
imgs = cv.load_image(img_names, multi=True)
n_imgs = imgs.shape[0]
n_camera_pairs = n_imgs - 1
img1_init = imgs[init_pair[0]]
img2_init = imgs[init_pair[1]]

In [13]:
# Compute and save SIFT points
sift = False
marg = 0.7

if sift:
    
    # SIFT points for rotation averaging
    for i in range(n_camera_pairs):
        print("\nCamera pair:", i+1, "/", n_camera_pairs)
        img1 = imgs[i]
        img2 = imgs[i+1]
        x1, x2, _, _, _, _, _ = cv.compute_sift_points(img1, img2, marg, verbose=True)
        np.save('data/dataset_{}_RA_x1_{}.npy'.format(data_set, i), x1)
        np.save('data/dataset_{}_RA_x2_{}.npy'.format(data_set, i), x2)
        

    # SIFT points for translation registration
    x1, x2, kp1, kp2, des1, des2, _ = cv.compute_sift_points(img1_init, img2_init, marg, verbose=True)
    np.save('data/dataset_{}_TR_x1_{}.npy'.format(data_set, init_pair[1]), x1)
    np.save('data/dataset_{}_TR_x2_{}.npy'.format(data_set, init_pair[1]), x2)

    for i in range(n_imgs):

        if i != init_pair[0] and i != init_pair[1]:
            
            print("\nImage:", i+1, "/", n_imgs)
            img2 = imgs[i]
            x1, x2 = cv.compute_sift_points_sequential(kp1, des1, img2, marg, verbose=True)
            np.save('data/dataset_{}_TR_x1_{}.npy'.format(data_set, i), x1)
            np.save('data/dataset_{}_TR_x2_{}.npy'.format(data_set, i), x2)

In [37]:
def compute_sift_points(img1, img2, marg, verbose=False):
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)

    sift = cv2.SIFT_create()
    kp1, des1 = sift.detectAndCompute(img1, None)
    kp2, des2 = sift.detectAndCompute(img2, None)

    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
    search_params = dict(checks=50)   # or pass empty dictionary

    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(des1, des2, k=2)

    good_matches = []
    for m, n in matches:
        if m.distance < marg*n.distance:
            good_matches.append([m])

    draw_params = dict(matchColor=(255,0,255), singlePointColor=(0,255,0), matchesMask=None, flags=cv2.DrawMatchesFlags_DEFAULT)
    img_match = cv2.drawMatchesKnn(img1, kp1, img2, kp2, good_matches, None, **draw_params)

    x1 = np.stack([kp1[match[0].queryIdx].pt for match in good_matches],1)
    x2 = np.stack([kp2[match[0].trainIdx].pt for match in good_matches],1)
    x1 = cv.homogenize(x1, multi=True)
    x2 = cv.homogenize(x2, multi=True)

    des1 = np.stack([des1[match[0].queryIdx] for match in good_matches],0)
    des2 = np.stack([des2[match[0].trainIdx] for match in good_matches],0)

    if verbose:
        print('Number of matches:', np.size(matches,0))
        print('Number of good matches:', np.size(x1,1))

    return x1, x2,  des1, des2, img_match


x1, x2, des1, des2, _ = compute_sift_points(img1_init, img2_init, 0.6, verbose=True)

np.save('data/dataset_{}_TR_x1_{}.npy'.format(data_set, init_pair[1]), x1)
np.save('data/dataset_{}_TR_x2_{}.npy'.format(data_set, init_pair[1]), x2)

for i in range(n_imgs):

    if i != init_pair[0] and i != init_pair[1]:
        
        print("\nImage:", i+1, "/", n_imgs)
        img2 = imgs[i]
        x1, x2 = cv.compute_sift_points_sequential(kp1, des1, img2, marg, verbose=True)
        np.save('data/dataset_{}_TR_x1_{}.npy'.format(data_set, i), x1)
        np.save('data/dataset_{}_TR_x2_{}.npy'.format(data_set, i), x2)

<class 'numpy.ndarray'>
(44542, 128) (47937, 128) 44542 47937
(14019, 128) (14019, 128)
[ 16.   5.   0.   1.  97. 102.   5.  10. 104.  14.   0.   0.   8.   9.
   4. 103.  49.  14.   0.   1.  32.  24.   8.  62.  11.   9.  13.   4.
  44.  34.  10.  25.  40.  24.   5.  43. 120.  10.   1.   4. 120.  32.
   6.   7.  20.   2.   2.  74.  44.  15.  31.  38.  29.  32.   9.  45.
   4.   8.  16.  10.  67.  79.   3.   5.  13.  11.   2.  25.  95. 106.
  16.   6.  90. 120.  57.  10.  18.  13.   0.   2.  63.  70. 114.  57.
   2.   1.   0.  40.  43.  17.  10.   7.  53.  44.   2.  30.   0.   0.
   0.   0.  84. 120.  13.   1.  37.  30.  11.   2.  59. 102.   1.   1.
 118.  86.  20.   0.   0.   2.   1.  38.  15.  52.  20.   1.  31.  16.
   0.  18.]
Number of matches: 44542
Number of good matches: 14019


In [14]:
# Load SIFT points

# SIFT points for rotation averaging
x1s_norm_RA = []
x2s_norm_RA = []

for i in range(n_camera_pairs):

    x1 = np.load('data/dataset_{}_RA_x1_{}.npy'.format(data_set, i))
    x2 = np.load('data/dataset_{}_RA_x2_{}.npy'.format(data_set, i))
    x1_norm = cv.dehomogenize(K_inv @ x1)
    x2_norm = cv.dehomogenize(K_inv @ x2)
    x1s_norm_RA.append(x1_norm)
    x2s_norm_RA.append(x2_norm)

x1s_norm_RA = np.array(x1s_norm_RA)
x2s_norm_RA = np.array(x2s_norm_RA)


# SIFT points for translation registration
x1s_norm_TR = []
x2s_norm_TR = []

for i in range(n_imgs):

    if i != init_pair[0]:

        x1 = np.load('data/dataset_{}_TR_x1_{}.npy'.format(data_set, i))
        x2 = np.load('data/dataset_{}_TR_x2_{}.npy'.format(data_set, i))
        x1_norm = cv.dehomogenize(K_inv @ x1)
        x2_norm = cv.dehomogenize(K_inv @ x2)
        x1s_norm_TR.append(x1_norm)
        x2s_norm_TR.append(x2_norm)

x1s_norm_TR = np.array(x1s_norm_TR)
x2s_norm_TR = np.array(x2s_norm_TR)

In [17]:
# Compute rotation averaging

min_its = 0
max_its = 10000
scale_its = 1
alpha = 0.95
P1 = cv.get_canonical_camera()
abs_rots = [P1[:,:-1]]
abs_trans = [P1[:,-1]]

for i in range(n_camera_pairs):    
    
    x1_norm = x1s_norm_RA[i]
    x2_norm = x2s_norm_RA[i]
    E, inliers = estimate_E_robust(K, x1_norm, x2_norm, min_its, max_its, scale_its, alpha, pixel_threshold, verbose=True)
    x1_norm_inliers = x1_norm[:,inliers]
    x2_norm_inliers = x2_norm[:,inliers]

    P2_arr = cv.extract_P_from_E(E)
    X_arr = cv.compute_triangulated_X_from_extracted_P2_solutions(P1, P2_arr, x1_norm_inliers, x2_norm_inliers)
    P2, _ = cv.extract_valid_camera_and_points(P1, P2_arr, X_arr, verbose=True)

    R1 = abs_rots[i]
    T1 = abs_trans[i]
    R2 = P2[:,:-1] @ R1
    T2 = P2[:,-1] + (R2 @ R1.T @ T1)

    abs_rots.append(R2)
    abs_trans.append(T2)


Iteration: 1 T_E: 10000 T_H: 10000 epsilon_E: 0.05 epsilon_H: 0 No. inliers: 792 From: E 8-point alg.
Iteration: 9 T_E: 10000 T_H: 10000 epsilon_E: 0.06 epsilon_H: 0.01 No. inliers: 970 From: H 4-point alg.
Iteration: 9 T_E: 10000 T_H: 10000 epsilon_E: 0.11 epsilon_H: 0.01 No. inliers: 1828 From: H 4-point alg.
Iteration: 449 T_E: 10000 T_H: 10000 epsilon_E: 0.14 epsilon_H: 0.02 No. inliers: 2312 From: H 4-point alg.
Iteration: 534 T_E: 10000 T_H: 10000 epsilon_E: 0.19 epsilon_H: 0.02 No. inliers: 3134 From: E 8-point alg.
Iteration: 3959 T_E: 10000 T_H: 10000 epsilon_E: 0.21 epsilon_H: 0.02 No. inliers: 3444 From: E 8-point alg.
Iteration: 4734 T_E: 10000 T_H: 10000 epsilon_E: 0.31 epsilon_H: 0.02 No. inliers: 5187 From: E 8-point alg.
Iteration: 8681 T_E: 2110.0 T_H: 10000 epsilon_E: 0.44 epsilon_H: 0.02 No. inliers: 7273 From: E 8-point alg.
Bailout at iteration: 8681
No. valid coords for each camera pair: [    0 14546  7273  7273]
Argmax(P2_arr): 1


In [20]:
# Reconstruct initial 3D points

x1_init_norm = x1s_norm_TR[init_pair[1]-1]
x2_init_norm = x2s_norm_TR[init_pair[1]-1]
E, inliers = estimate_E_robust(K, x1_init_norm, x2_init_norm, min_its, max_its, scale_its, alpha, pixel_threshold, verbose=True)
np.save('data/dataset_{}_TR_inliers_{}.npy'.format(data_set, init_pair[1]), inliers)

Iteration: 1 T_E: 10000 T_H: 10000 epsilon_E: 0.02 epsilon_H: 0.0 No. inliers: 407 From: H 4-point alg.
Iteration: 1 T_E: 10000 T_H: 10000 epsilon_E: 0.03 epsilon_H: 0.0 No. inliers: 416 From: H 4-point alg.
Iteration: 4 T_E: 10000 T_H: 10000 epsilon_E: 0.07 epsilon_H: 0.0 No. inliers: 1076 From: E 8-point alg.


Iteration: 47 T_E: 10000 T_H: 10000 epsilon_E: 0.07 epsilon_H: 0.0 No. inliers: 1231 From: H 4-point alg.
Iteration: 128 T_E: 10000 T_H: 10000 epsilon_E: 0.13 epsilon_H: 0.0 No. inliers: 2085 From: H 4-point alg.
Iteration: 132 T_E: 10000 T_H: 10000 epsilon_E: 0.13 epsilon_H: 0.01 No. inliers: 2106 From: H 4-point alg.
Iteration: 452 T_E: 10000 T_H: 10000 epsilon_E: 0.16 epsilon_H: 0.01 No. inliers: 2582 From: E 8-point alg.
Iteration: 1245 T_E: 10000 T_H: 10000 epsilon_E: 0.2 epsilon_H: 0.01 No. inliers: 3251 From: E 8-point alg.
Iteration: 1438 T_E: 10000 T_H: 10000 epsilon_E: 0.21 epsilon_H: 0.01 No. inliers: 3492 From: E 8-point alg.
Iteration: 2202 T_E: 10000 T_H: 10000 epsilon_E: 0.21 epsilon_H: 0.05 No. inliers: 3551 From: H 4-point alg.
Iteration: 2230 T_E: 10000 T_H: 10000 epsilon_E: 0.34 epsilon_H: 0.06 No. inliers: 5628 From: H 4-point alg.
Iteration: 4499 T_E: 10000 T_H: 10000 epsilon_E: 0.36 epsilon_H: 0.06 No. inliers: 5922 From: E 8-point alg.
Iteration: 7688 T_E: 7682.0

In [21]:
inliers = np.load('data/dataset_{}_TR_inliers_{}.npy'.format(data_set, init_pair[1]))
x1_init_norm_inliers = x1_init_norm[:,inliers]
x2_init_norm_inliers = x2_init_norm[:,inliers]

P2_arr = cv.extract_P_from_E(E)
X_arr = cv.compute_triangulated_X_from_extracted_P2_solutions(P1, P2_arr, x1_init_norm_inliers, x2_init_norm_inliers)
P2, X = cv.extract_valid_camera_and_points(P1, P2_arr, X_arr, verbose=True)

R1_init = abs_rots[init_pair[0]]
X = R1_init.T @ X[:-1,:]

No. valid coords for each camera pair: [12402     0  6201  6201]
Argmax(P2_arr): 0


In [22]:
def plot_cameras_and_3D_points(X, C_arr, axis_arr, s, path, save=False):
    
    fig = plt.figure()
    ax = plt.axes(projection='3d')

    ax.plot(X[0], X[1], X[2], '.', ms=1, color='magenta', label='Est. X')
    cv.plot_cameras_and_axes(ax, C_arr, axis_arr, s)

    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    ax.set_zlabel('$z$')
    ax.set_aspect('equal')
    ax.view_init(elev=-50, azim=-104, roll=20)
    ax.legend(loc="lower right")
    fig.tight_layout()
    if save:
        fig.savefig(path, dpi=300)
    plt.show()

In [23]:
P1 = np.column_stack((abs_rots[init_pair[0]], abs_trans[init_pair[0]]))
P2 = np.column_stack((abs_rots[init_pair[1]], abs_trans[init_pair[1]]))
P_arr = np.array([P1, P2])
C_arr, axis_arr = cv.compute_camera_center_and_normalized_principal_axis(P_arr, multi=True)
s = 2
plot_cameras_and_3D_points(X, C_arr, axis_arr, s, None, save=False)

In [None]:
def estimate_T_DLT_1(img_pts, verbose=False):

    n = img_pts.shape[1]
    M = []

    for i in range(n):

        x = img_pts[0,i]
        y = img_pts[1,i]

        m = np.array([[1, 0, -x],
                      [0, 1, -y]])
        
        M.append(m)

    M = np.concatenate(M, 0)
    U, S, VT = LA.svd(M, full_matrices=False)
    T = VT[-1, :3]
    print('vt last row shape', VT[-1,:].shape)

    if verbose:
        M_approx = U @ np.diag(S) @ VT
        v = VT[-1,:]
        Mv = M @ v
        print('\n||Mv||:', (Mv @ Mv)**0.5)
        print('||v||^2:', v @ v)
        print('max{||M - M_approx||}:', np.max(np.abs(M - M_approx)))
        print('S:', S)

    return T

In [None]:
def estimate_T_DLT_2(R, img_pts, verbose=False):

    n = img_pts.shape[1]
    M = []

    for i in range(n):

        xx = cv.create_skew_symmetric_matrix(img_pts[:,i])
        m = np.column_stack((xx, xx @ R))
        M.append(m)

    M = np.concatenate(M, 0)
    U, S, VT = LA.svd(M, full_matrices=False)
    T = VT[-1, :3]
    print('vt last row shape', VT[-1,:].shape)

    if verbose:
        M_approx = U @ np.diag(S) @ VT
        v = VT[-1,:]
        Mv = M @ v
        print('\n||Mv||:', (Mv @ Mv)**0.5)
        print('||v||^2:', v @ v)
        print('max{||M - M_approx||}:', np.max(np.abs(M - M_approx)))
        print('S:', S)

    return T

In [None]:
def estimate_T_robust(K, R, X, x_norm, min_its, max_its, scale_its, alpha, err_threshold_px, verbose=False):
    
    err_threshold = err_threshold_px / K[0,0]
    best_T = None
    best_inliers = None
    best_epsilon = 0
    n_points = x1_norm.shape[1]
    n_samples = 2
    ransac_its = max_its

    t = 0
    while t < ransac_its:
        t += 1

        rand_mask = np.random.choice(n_points, n_samples, replace=False)

        print(x_norm[:,rand_mask].shape)
        T1 = estimate_T_DLT_1(R, x_norm[:,rand_mask], verbose=False)
        T2 = estimate_T_DLT_2(R, x_norm[:,rand_mask], verbose=False)
        print(np.isclose(T1, T2))
        T = T1
        time.sleep(1)

        x_norm_proj = cv.dehomogenize(R @ X + T)
        distance_arr = compute_point_point_distance(x_norm_proj, x_norm)
        inliers = distance_arr**2 < err_threshold**2
        n_inliers = np.sum(inliers)
        epsilon = n_inliers / n_points

        if epsilon > best_epsilon:
            best_T = np.copy(T)
            best_inliers = np.copy(inliers)
            best_epsilon = epsilon
            ransac_its = cv.compute_ransac_iterations(alpha, best_inliers, n_samples, min_its, max_its, scale_its)
            if verbose:
                print('Iteration:', t, 'T:', ransac_its, 'epsilon:', np.round(best_epsilon, 2), 'No. inliers:', np.sum(inliers))
    
    print('Bailout at iteration:', t)
    return best_T, best_inliers

In [None]:
# Compute translation registration

# Is the contraint img < init_pair[0] necessary?
# How to pair X with x? x views only some of the 3D points.

min_its = 0
max_its = 10000
scale_its = 1
alpha = 0.95
P1 = cv.get_canonical_camera()

for i in range(n_imgs):
    
    if i != init_pair[0]:
        x1_norm = x1s_norm_TR[i]
        x2_norm = x2s_norm_TR[i]
        R = abs_rots[i]

        T = estimate_T_robust(K, R, X, x2_norm, min_its, max_its, scale_its, alpha, 3*pixel_threshold)