# Exercise 1 - 3D - 2D

In [3]:
import numpy as np
import cv2 as cv2
from numpy.linalg import inv, pinv
import matplotlib.pyplot as plt

Recall from the slides the steps from Algorithm 3:

![title](algorithm_3.png)

![title](PnP.png)

# Exercise 1a)
The steps 1)-2.1) has already been done, and is saved in corresponding files. The exercise is to implement step 2.2) by filling in the missing code below

In [7]:
def featureTracking(prev_img, next_img, prev_points, world_points):
    """
    Use OpenCV to find the prev_points from the prev_img in the next_img
    Remember to remove points that could not be found from prev_points, next_points, and world_points
    hint: status == 1
    """
    params = dict(winSize=(21, 21), # Window size of LK
                 maxLevel=3,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 30, 0.01))
    print(prev_img.shape)
    print(type(prev_img))
    print(next_img.shape)
    print(type(next_img))
    print(prev_points.shape)
    print(type(prev_points))
    print("---------------------------------------------")
    
    # 1 Sparse optical flow: using LK (Lucas Kanada)
    next_points, status, _ = cv2.calcOpticalFlowPyrLK(prev_img, next_img, prev_points, None, **params)
    
    # 2 Remove points that was not detected by both images, from 2D and 3D!
    next_points = next_points[np.where(np.any(status==1, axis=1))[0], :]
    world_points = world_points[np.where(np.any(status==1, axis=1))[0], :]

    return world_points, prev_points, next_points

Hint: Exercise 4 in week 2

# Exercise 1b)
Continue the algorithm by implementing step 2.3)

In [8]:
K = np.array([[7.188560e+02, 0.000000e+00, 6.071928e+02], # camera matrix
              [0, 7.188560e+02, 1.852157e+02],
              [0, 0, 1]])

reference_img = np.load("img_" + str(0) + ".npy")

for t in range(1, 6):

    # the image at current time=t
    curImage = np.load("img_" + str(t) + ".npy")
    # the 3D landmarks in the world coordinates which have been computed in time=t-1
    landmark_3D = np.load("landmark_" + str(t-1) + ".npy")
    # the 2D coordinates of the 3D points in the previous frame at time=t-1
    reference_2D = np.load("reference_2D_" + str(t-1) + ".npy")
    
    # the 2D landmarks at the current time = t
    # (2.2)
    landmark_3D, reference_2D, tracked_2Dpoints = featureTracking(reference_img, 
                                                                  curImage, 
                                                                  reference_2D,
                                                                  landmark_3D)
    
    """
    Using OpenCV, implement PnP using Ransac
    """
    # (2.3) Get rotation and transcaltion from PnP. 
    # Rotation you get out from the solve pnp ransac is in camera frame you need to transform it into world frame
    _, rvec, tvec, inliers = cv2.solvePnPRansac(landmark_3D, tracked_2Dpoints, K, distCoeffs=None)

    """
    Transform the translation and rotation into the world frame
   
        - R_camera @ t_camera = t_world / t_world = inv(R_camera)@t_camera
        - What we get is a rotation vector not matrix. Use Rodrigues to transform.
            - We get exponential axis-rotations in rvec!
     """
    
    R_world = rvec
    t_camera = tvec
    R_matrix, _ = cv2.Rodrigues(rvec)
    t_world = np.linalg.inv(R_matrix)@t_camera # R_world is (3,1) matrix. 
    print(t_world[0], t_world[1], t_world[2], R_world[0], R_world[1], R_world[2])
    # update for next timestep
    reference_img = curImage

(376, 1241)
<class 'numpy.ndarray'>
(376, 1241)
<class 'numpy.ndarray'>
(631, 2)
<class 'numpy.ndarray'>
---------------------------------------------
[0.00110282] [0.00067164] [0.00078343] [-7.40069212e-05] [-7.35119065e-05] [9.84544279e-05]
(376, 1241)
<class 'numpy.ndarray'>
(376, 1241)
<class 'numpy.ndarray'>
(621, 2)
<class 'numpy.ndarray'>
---------------------------------------------
[0.00363949] [0.00875088] [-0.67580836] [-0.00216658] [0.00325854] [-0.00244333]
(376, 1241)
<class 'numpy.ndarray'>
(376, 1241)
<class 'numpy.ndarray'>
(602, 2)
<class 'numpy.ndarray'>
---------------------------------------------
[0.01096317] [0.01635688] [-1.37740874] [-0.00364614] [0.00751509] [-0.00099692]
(376, 1241)
<class 'numpy.ndarray'>
(376, 1241)
<class 'numpy.ndarray'>
(625, 2)
<class 'numpy.ndarray'>
---------------------------------------------
[0.03156638] [0.02560108] [-2.09967983] [-0.00509583] [0.01121646] [-0.00082978]
(376, 1241)
<class 'numpy.ndarray'>
(376, 1241)
<class 'numpy

Hint: The output should look similar to:

[-0.00110282] [-0.00067164] [-0.00078343] [-7.40069212e-05] [-7.35119065e-05] [9.84544279e-05]

[-0.00363946] [-0.00875075] [0.67580842] [-0.0021666] [0.00325853] [-0.00244333]

[-0.01096271] [-0.01635663] [1.3774094] [-0.00364615] [0.0075151] [-0.00099691]

[-0.0315663] [-0.02560111] [2.0996797] [-0.00509583] [0.01121646] [-0.00082978]

[-0.04971858] [-0.03532535] [2.8330071] [-0.00561424] [0.0161333] [0.00041981]

# Exercise 1c)
What approximate direction did the camera move in?

In [6]:
v1 = np.array([-0.00110282, -0.00067164, -0.00078343])
v2 = np.array([-0.00363946, -0.00875075, 0.67580842])
v3 = np.array([-0.01096271, -0.01635663, 1.3774094])
v4 = np.array([-0.0315663, -0.02560111, 2.0996797])
v5 = np.array([-0.04971858, -0.03532535, 2.8330071])

print("Total translation:", v1+v2+v3+v4+v5)

Total translation: [-0.09698987 -0.08670548  6.98512119]


**=>** Pretty much moves along the z-axis