In [1]:
#%matplotlib tk
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.cm as cm
import numpy as np
import math
import importlib
np.random.seed(0)

In [2]:
from collections import Counter

In [3]:
import helper_utils.dlt_related as dlt_related
importlib.reload(dlt_related)
import helper_utils.annotate_images as img_helper
importlib.reload(img_helper)
import helper_utils.estimation_related as est_related
importlib.reload(est_related)

<module 'helper_utils.estimation_related' from '/home/anmolagarwal/Desktop/mr_assignment_3/helper_utils/estimation_related.py'>

## A lot of helper functions were overlapping across ipynb files and hence, I have implemented them in py files. Please find them in `helper_utils.estimation_related.py`

## --------------------------------------------------------


5. **Triangulation:** Given four configurations of relative camera pose, you will find the best camera pose by verifying through 3D point triangulation. Follow the below steps for the same:


### TODO: 5
1. Linear triangulation: Write a code that computes the 3D point given the correspondence, u ↔ v, and two camera projection matrices i.e. `[X] = LinearTriangulation(P1,u,P2,v)` Input: `P1, P2 ∈ R3×4` are two camera projection matrices, and `u ↔ v ∈ R2` are their 2D correspondence. Output: `X ∈ R3` is the triangulated 3D point. 


#### My notes:
Triangulation allows us to disambiguate four camera pose configuration obtained from the essential matrix.

##### Goal:
Given two camera poses, (C1, R1) and (C2, R2), and correspondences x1 ↔ x2, triangulate 3D
points using linear least squares.


### Design a custom experiment and dataset to check correctness
* I first generate world points ($X\_world$) at random,
* THen, I generate two valid extrinsic matrices for the 2 cameras.
* We already know the K matrix.
* Using K and the extrinsic matrices, I am able to find the $P$ matrix ie camera matrix for both the cameras ie $P\_mat\_1$ and $P\_mat\_1$
* Using the camera matrices, I am able to find the corresponding images for both the cameras ie $X_img_1$ and $X_img_2$
* Now, I use only $X\_img\_1$, $X\_img\_2$, $P\_mat\_1$ and $P\_mat\_2$ for triangulation and to get $X\_world\_predicted$.
* I show that my code is correct by verifying that $X\_world$ is almost same as $X\_world\_predicted$

<img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8debf2c38beb4c25685cd503a90d1a4edfb10a14" alt="image 1" width="400"/>

#### Let K matrix of both cameras be the same

In [4]:
f_x = 382.1996765136719
f_y = 381.83956909
c_x = 312.7102355957031
c_y = 247.72047424316406
scaling_fac = 1000.0

In [5]:
K_mat = np.array([
    [f_x, scaling_fac, c_x,0],
    [0, f_y, c_y,0],
    [0, 0, 1,0]
    
])

#### Generate 3d coordinates

In [6]:
from scipy.stats import ortho_group

#### For cameras 1 and 2, initialize their respective projection matrices

In [7]:
def fetch_random_projection_matrix(intrinsic_mat):
    R_cam =  np.array(ortho_group.rvs(dim=3))
    P_cam=np.zeros((4,4))
    P_cam[0:3, 3]=np.random.rand(3)
    P_cam[0:3, 0:3]= R_cam
    P_cam[-1, -1] = 1
    P_mat = intrinsic_mat@P_cam
    return R_cam, P_mat

In [8]:
R_cam_1, P_mat_1 = fetch_random_projection_matrix(K_mat)
R_cam_2, P_mat_2 = fetch_random_projection_matrix(K_mat)

In [9]:
with np.printoptions(suppress=True):
    print(P_mat_1)

[[ 671.19752298 -710.88868964  536.65206629 1360.36590024]
 [ 192.17827046 -404.94282779   79.09303679  579.23321439]
 [   0.47588238   -0.52388181   -0.70645864    0.96366276]]


In [10]:
with np.printoptions(suppress=True):
    print(P_mat_2)

[[ 268.04283581 -949.77707154 -519.55830926 1561.04176675]
 [ 151.46372412 -428.63419206  -22.32396809  571.64314462]
 [   0.88148876   -0.28601249    0.37573186    0.79915856]]


#### Generate random points in 3D world and project them in the images of the 2 cameras

In [11]:
NUM_POINTS = 120
X_world = []
X_img_1  = []
X_img_2  = []
for i in range(NUM_POINTS):
    
    # random point in the world
    sample_world_point=np.random.rand(4)
    sample_world_point/=sample_world_point[-1]
    
    # image for camera 1
    image_point_1=P_mat_1 @ sample_world_point
    
    # image for camera 2
    image_point_2=P_mat_2 @ sample_world_point
    
    
    X_world.append(sample_world_point)
    X_img_1.append(image_point_1)
    X_img_2.append(image_point_2)
    
    
X_world = np.array(X_world)
X_img_1 = np.array(X_img_1)
X_img_2 = np.array(X_img_2)

In [12]:
for a in X_img_1:
    a/=a[-1]
for a in X_img_2:
    a/=a[-1]

In [13]:
print("Shapes X_world are: ", X_world.shape)
print("Shapes X_img_1 are: ", X_img_1.shape)
print("Shapes X_img_2 are: ", X_img_2.shape)

Shapes X_world are:  (120, 4)
Shapes X_img_1 are:  (120, 3)
Shapes X_img_2 are:  (120, 3)


#### Fetch world point

In [14]:
idx_test = 0

In [15]:
X_world[idx_test]

array([0.7211505 , 1.21972736, 0.1848266 , 1.        ])

In [16]:
px1 = X_img_1[idx_test]

In [17]:
px2 = X_img_2[idx_test]

In [18]:
best_ptx  = est_related.fetch_triangulate_point(px1, px2, P_mat_1, P_mat_2)

In [19]:
best_ptx

array([0.41347533, 0.69933693, 0.10597128, 0.57335512])

In [20]:
X_world[idx_test]/best_ptx

array([1.74411976, 1.74411976, 1.74411976, 1.74411976])

#### Implement LinearTriangulation and check it's result
Solved using least squares to recovery the triangulated world points.
Some of the helper functions I have used are in: `helper_utils/estimated_related.py`

In [21]:
def LinearTriangulation(P_mat_1,x_img_1,P_mat_2,x_img_2):
    assert(P_mat_1.shape==(3,4))
    assert(P_mat_2.shape==(3,4))
    assert(x_img_1.shape == x_img_2.shape)
    
    num_points = x_img_1.shape[0]
    
    X_world_predicted = []
    
    for px1, px2 in zip(x_img_1, x_img_2):
        best_world_ptx = est_related.fetch_triangulate_point(px1, px2, P_mat_1, P_mat_2)
        best_world_ptx/=best_world_ptx[-1]
        X_world_predicted.append(best_world_ptx)
    return np.array(X_world_predicted)

In [22]:
X_img_1.shape

(120, 3)

In [23]:
X_world_predicted = LinearTriangulation(P_mat_1,X_img_1,P_mat_2,X_img_2)

In [24]:
X_world_predicted.shape

(120, 4)

In [25]:
with np.printoptions(suppress=True):
    print(X_world)

[[ 0.7211505   1.21972736  0.1848266   1.        ]
 [ 0.34571123  2.27816644  1.25849101  1.        ]
 [ 0.46541135  1.36204689  0.80246849  1.        ]
 [ 0.03045674  1.00113707  0.99215755  1.        ]
 [ 2.15944869  1.56011544  0.82261239  1.        ]
 [ 1.04025023  0.08980327  0.99422765  1.        ]
 [ 0.57843368  0.35447479  0.86725051  1.        ]
 [ 5.58770962  4.29812658  9.68568446  1.        ]
 [ 0.82464935  0.636853    2.57848392  1.        ]
 [ 4.22478076  2.21449857  1.44026619  1.        ]
 [ 1.77999671  0.37475866  0.53314061  1.        ]
 [ 8.54325527  1.01043584  8.71965442  1.        ]
 [ 1.61439481  0.77482793  1.61489348  1.        ]
 [ 6.15045532  0.32603089  2.35287066  1.        ]
 [ 0.71486037  0.28659987  0.7675877   1.        ]
 [ 0.24171076  2.6092673   2.13498075  1.        ]
 [ 0.56305843  0.1010878   0.61976633  1.        ]
 [ 0.44472547  0.93171162  0.18399114  1.        ]
 [14.39290952  9.11057771 29.16879709  1.        ]
 [ 3.0700576   0.01739014  2.51

#### Are the projected triangulated points close to the ground truth ?

In [26]:
np.isclose(X_world, X_world_predicted)

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
 

#### Yes, it is close to the ground truth

In [27]:
Counter(np.isclose(X_world, X_world_predicted).flatten().tolist())

Counter({True: 480})