In [1]:
import numpy as np
from numpy import linalg as LA
from scipy.io import loadmat
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.pyplot import cm
import matplotlib as mpl
import cv2
import computer_vision as cv
from icecream import ic
from tqdm import trange
import time
from get_dataset_info import *

# %load_ext snakeviz
# %matplotlib inline
%matplotlib qt
%config InlineBackend.figure_format = 'retina'
from matplotlib import rc
rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
rc('text', usetex=True)

In [2]:
def estimate_E_robust(K, x1_norm, x2_norm, n_its, n_samples, err_threshold_px, alpha):
    
    err_threshold = err_threshold_px / K[0,0]
    best_inliers = None
    best_E = None
    max_inliers = 0
    epsilon = 0
    T = n_its
    n_points = x1_norm.shape[1]

    for t in trange(n_its):

        rand_mask = np.random.choice(np.size(x1_norm,1), n_samples, replace=False)
        E = cv.estimate_E_DLT(x1_norm[:,rand_mask], x2_norm[:,rand_mask], enforce=True, verbose=False)

        D1, D2 = cv.compute_epipolar_errors(E, x1_norm, x2_norm)
        inliers = ((D1**2 + D2**2) / 2) < err_threshold**2

        n_inliers = np.sum(inliers)

        if n_inliers > max_inliers:
            best_inliers = np.copy(inliers)
            best_E = np.copy(E)
            max_inliers = n_inliers
            print('No. inliers:', np.sum(inliers), end='\r')

            # Extract R and T

            # epsilon = max_inliers / n_points
            # T = cv.compute_ransac_iterations(alpha, epsilon, n_samples)
            # print('New T:', T, 'New epsilon:', epsilon)
            # if t >= 4*T-1:
            #     print('Bailout at iteration:', t, T)
            #     break
        
    return best_E, best_inliers

In [3]:
data_set = 0
K, img_names, init_pair, pixel_threshold = get_dataset_info(data_set)
K_inv = LA.inv(K)
imgs = cv.load_image(img_names, multi=True)
n_imgs = imgs.shape[0]
n_camera_pairs = n_imgs - 1
img1_init = imgs[init_pair[0]]
img2_init = imgs[init_pair[1]]

In [13]:
# Compute and save SIFT points
marg = 0.7


# SIFT points for relative orientation
for i in range(n_camera_pairs):
    print("\nCamera pair:", i+1, "/", n_camera_pairs)
    img1 = imgs[i]
    img2 = imgs[i+1]
    x1, x2, _, _, _, _, _ = cv.compute_sift_points(img1, img2, marg)
    np.save('data/dataset_{}_RO_x1_{}.npy'.format(data_set, i), x1)
    np.save('data/dataset_{}_RO_x2_{}.npy'.format(data_set, i), x2)
    

# SIFT points for camera resection
x1, x2, kp1, kp2, des1, des2, _ = cv.compute_sift_points(img1_init, img2_init, marg)
np.save('data/dataset_{}_CR_x1_{}.npy'.format(data_set, init_pair[1]), x1)
np.save('data/dataset_{}_CR_x2_{}.npy'.format(data_set, init_pair[1]), x2)

for i in range(n_imgs):

    if i != init_pair[0] and i != init_pair[1]:
        
        print("\nImage:", i+1, "/", n_imgs)
        img2 = imgs[i]
        x1, x2 = cv.compute_sift_points_sequential(kp1, des1, img2, marg)
        np.save('data/dataset_{}_CR_x1_{}.npy'.format(data_set, i), x1)
        np.save('data/dataset_{}_CR_x2_{}.npy'.format(data_set, i), x2)


Camera pair: 1 / 1
Number of matches: 44542
Number of good matches: 16509
Number of matches: 44542
Number of good matches: 16542


In [4]:
# Load SIFT points

# SIFT points for relative orientation
x1s_norm_RO = []
x2s_norm_RO = []

for i in range(n_camera_pairs):

    x1 = np.load('data/dataset_{}_RO_x1_{}.npy'.format(data_set, i))
    x2 = np.load('data/dataset_{}_RO_x2_{}.npy'.format(data_set, i))
    x1_norm = cv.dehomogenize(K_inv @ x1)
    x2_norm = cv.dehomogenize(K_inv @ x2)
    x1s_norm_RO.append(x1_norm)
    x2s_norm_RO.append(x2_norm)

x1s_norm_RO = np.array(x1s_norm_RO)
x2s_norm_RO = np.array(x2s_norm_RO)


# SIFT points for camera resectioning
x1s_norm_CR = []
x2s_norm_CR = []

for i in range(n_imgs):

    if i != init_pair[0]:

        x1 = np.load('data/dataset_{}_CR_x1_{}.npy'.format(data_set, i))
        x2 = np.load('data/dataset_{}_CR_x2_{}.npy'.format(data_set, i))
        x1_norm = cv.dehomogenize(K_inv @ x1)
        x2_norm = cv.dehomogenize(K_inv @ x2)
        x1s_norm_CR.append(x1_norm)
        x2s_norm_CR.append(x2_norm)

x1s_norm_CR = np.array(x1s_norm_CR)
x2s_norm_CR = np.array(x2s_norm_CR)

In [32]:
# Compute absolute rotations

n_its = 10000
n_samples = 8
alpha = 0.95
P1 = cv.get_canonical_camera()
abs_rots = [P1[:,:-1]]
abs_trans = [P1[:,-1]]

for i in range(n_camera_pairs):    
    
    x1_norm = x1s_norm_RO[i]
    x2_norm = x2s_norm_RO[i]
    E, inliers = estimate_E_robust(K, x1_norm, x2_norm, n_its, n_samples, pixel_threshold, alpha)
    x1_norm_inliers = x1_norm[:,inliers]
    x2_norm_inliers = x2_norm[:,inliers]

    P2_arr = cv.extract_P_from_E(E)
    X_arr = cv.get_triangulated_X_from_extracted_P2_solutions(P1, P2_arr, x1_norm_inliers, x2_norm_inliers)
    P2, _ = cv.extract_valid_camera_and_points(P1, P2_arr, X_arr)

    R1 = abs_rots[i]
    T1 = abs_trans[i]
    R2 = P2[:,:-1] @ R1
    T2 = P2[:,-1] + (R2 @ R1.T @ T1)

    abs_rots.append(R2)
    abs_trans.append(T2)


  0%|          | 17/10000 [00:00<01:01, 162.58it/s]

No. inliers: 583

  0%|          | 34/10000 [00:00<01:26, 115.52it/s]

  1%|          | 116/10000 [00:00<01:05, 150.65it/s]

No. inliers: 1663

  5%|▌         | 537/10000 [00:03<01:09, 135.41it/s]

No. inliers: 8763

 12%|█▏        | 1236/10000 [00:07<00:32, 271.99it/s]

No. inliers: 12095

100%|██████████| 10000/10000 [00:47<00:00, 208.63it/s]


[5.85172192e+02 5.85172192e+02 1.29096182e-13]
No. valid coords for each camera pair: [24190     0 12095 12095]
Argmax(P2_arr): 0


In [6]:
def plot_cameras_and_3D_points(X, C_arr, axis_arr, s, path, save=False):
    
    fig = plt.figure()
    ax = plt.axes(projection='3d')

    ax.plot(X[0], X[1], X[2], '.', ms=1, color='magenta', label='Est. X')
    cv.plot_cameras_and_axes(ax, C_arr, axis_arr, s)

    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    ax.set_zlabel('$z$')
    ax.set_aspect('equal')
    ax.view_init(elev=-50, azim=-104, roll=20)
    ax.legend(loc="lower right")
    fig.tight_layout()
    if save:
        fig.savefig(path, dpi=300)
    plt.show()

In [35]:
# Reconstruct initial 3D points

x1_init_norm = x1s_norm_CR[init_pair[1]-1]
x2_init_norm = x2s_norm_CR[init_pair[1]-1]
E, inliers = estimate_E_robust(K, x1_init_norm, x2_init_norm, n_its, n_samples, pixel_threshold, alpha)
np.save('data/dataset_{}_CR_inliers_{}.npy'.format(data_set, init_pair[1]), inliers)

  0%|          | 26/10000 [00:00<00:39, 254.88it/s]

No. inliers: 1501

  1%|          | 104/10000 [00:00<00:40, 245.38it/s]

No. inliers: 1787

  3%|▎         | 281/10000 [00:01<00:33, 288.64it/s]

No. inliers: 1858

  4%|▎         | 364/10000 [00:01<00:44, 216.96it/s]

No. inliers: 2818

  5%|▌         | 505/10000 [00:02<00:44, 211.39it/s]

No. inliers: 3270

 13%|█▎        | 1317/10000 [00:05<00:32, 266.65it/s]

No. inliers: 4804

 44%|████▎     | 4365/10000 [00:16<00:23, 242.35it/s]

No. inliers: 5537

 84%|████████▍ | 8421/10000 [00:44<00:09, 161.31it/s]

No. inliers: 5791

100%|██████████| 10000/10000 [00:53<00:00, 188.67it/s]


In [36]:
inliers = np.load('data/dataset_{}_CR_inliers_{}.npy'.format(data_set, init_pair[1]))
x1_init_norm_inliers = x1_init_norm[:,inliers]
x2_init_norm_inliers = x2_init_norm[:,inliers]

P2_arr = cv.extract_P_from_E(E)
X_arr = cv.get_triangulated_X_from_extracted_P2_solutions(P1, P2_arr, x1_norm_inliers, x2_norm_inliers)
P2, X = cv.extract_valid_camera_and_points(P1, P2_arr, X_arr)

R1_init = abs_rots[init_pair[0]]
T1_init = abs_trans[init_pair[0]]
X = R1_init.T @ X[:-1,:] - T1_init[:,np.newaxis] # Which way of rotating is correct? What about translation?

[1.19004459e+03 1.19004459e+03 6.84284213e-14]
No. valid coords for each camera pair: [12095 12095 24190     0]
Argmax(P2_arr): 2


In [37]:
P1 = np.column_stack((abs_rots[init_pair[0]], abs_trans[init_pair[0]]))
P2 = np.column_stack((abs_rots[init_pair[1]], abs_trans[init_pair[1]]))
P_arr = np.array([P1, P2])
C_arr, axis_arr = cv.compute_camera_center_and_normalized_principal_axis(P_arr, multi=True)
s = 2
plot_cameras_and_3D_points(X, C_arr, axis_arr, s, None, save=False)

In [16]:
alpha = 0.5
epsilon = 0.2
s = 8
np.ceil(np.log(1-alpha) / np.log(1-epsilon**s))

270761.0

In [12]:
plt_3D = True
save = False

x = np.linspace(0,10,11)
y = np.random.rand(11)
fig = plt.figure()
plt.plot(x,y)
fig.savefig('report-images/test.png', bbox_inches='tight')
img = cv.load_image('report-images/test.png')
plt.imshow(img)

<matplotlib.image.AxesImage at 0x25c6cbab9d0>