# <center> Pose estimation of camera networks</center>

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch

from cam import *
from pgo import *
from plot import *
from linalg import *
from dataset import Dataset

# Edit path to the folder containing the renders
RENDER_PATH = "./small_room_render"
# Edit path to the folder containing the cube calibration images.
OBJ_RENDER_PATH = "./cube_calib_render/"
# Edit marker size (in meters) check render script to be sure
MARKER_SIZE = 0.47 * 0.575
# Check which IDs are used 
MARKER_IDS = list(map(str, range(24)))

dataset     = Dataset(root=RENDER_PATH)
obj_dataset = Dataset(root=OBJ_RENDER_PATH)

# 1. Calibrate cube

In [2]:
# Edit noise model here
k_r = lambda t : np.exp( -1.0 * np.linalg.norm(t, ord=2) )
k_t = lambda t : np.exp( -1.0 * np.linalg.norm(t, ord=2) )

aux = estimate_pose_mp(cams=obj_dataset.im_data['cam'],
                       im_filenames=obj_dataset.im_data['filename'],
                       aruco='DICT_4X4_1000',
                       marker_size=MARKER_SIZE,
                       corner_refine='CORNER_REFINE_APRILTAG',
                       marker_ids=MARKER_IDS,
                       flags='SOLVEPNP_IPPE_SQUARE',
                       brightness=-120,
                       contrast=120)

aux = {k:v for k,v in aux.items() if v['reprojected_err'] < 0.1}

cam_cube_edges = {}
for k, v in aux.items():
    v['k_r'] = k_r(v['pose'].t())
    v['k_t'] = k_t(v['pose'].t())
    cam_cube_edges['c' + k[0], k[1].split('_')[-1]] = v

pgo = PGO(edges=cam_cube_edges, dtype=np.float32)
cube_pose_est = pgo.optimize()
cube_pose_est = {k : v for k,v in cube_pose_est.items() if k[0][0] != 'c'}


Marker detection
Received 777 images.
Started pool of 96 workers.
Merging dictionaries...
Found markers in 777 images
Finished: 6234 markers detected.

----------------* PGO *----------------
Received 5390 edges.
Total of 799 nodes.
Final graph is connected.
	799 nodes
	5390 edges
Building SO(3) sparse block-matrix...


  0%|          | 0/5390 [00:00<?, ?it/s]

	SO(3) Eigenvalues: [6.1845071e-06 6.8076492e-06 8.2665711e-06 1.2621890e-01 1.2621886e-01]
	SO(3) Eigengap:    1.527e+04
Building SO(3) sparse block-matrix...


  0%|          | 0/5390 [00:00<?, ?it/s]

Done!


# 2 Detect markers, estimate camera-marker pose through P4P
**cam_marker_edges** is a dictionary where each key is a tuple (str: node, str: node) and the value is a dictionary with keys _'pose'_, _'corners'_, _'reprojected_err'_, _'im_filename'_. Alternatively load a previous file to avoid computations.

In [3]:
cam_marker_edges = estimate_pose_mp(cams=dataset.im_data['cam'],
                                    im_filenames=dataset.im_data['filename'],
                                    aruco='DICT_4X4_1000',
                                    marker_size=MARKER_SIZE,
                                    corner_refine='CORNER_REFINE_APRILTAG',
                                    marker_ids=MARKER_IDS,
                                    flags='SOLVEPNP_IPPE_SQUARE',
                                    brightness=-120,
                                    contrast=120)

torch.save(cam_marker_edges, os.path.join(RENDER_PATH, './cam_marker_edges.pt'))
#cam_marker_edges = torch.load(os.path.join(RENDER_PATH, './cam_marker_edges.pt'))



Marker detection
Received 77307 images.
Started pool of 96 workers.
Merging dictionaries...
Found markers in 77130 images
Finished: 473214 markers detected.


## 2.3 Optimization

In [4]:
# Select a subset of timesteps
edges = {k : v for k, v in cam_marker_edges.items() if int(k[1].split('_')[0]) < 5000}

pose_est = bipartite_se3sync(edges,
                             constraints=cube_pose_est,
                             noise_model_r=lambda pose : np.exp(-1.5 * np.linalg.norm(pose.t(), ord=2)),
                             noise_model_t=lambda pose : np.exp(-1.0 * np.linalg.norm(pose.t(), ord=2)), # use -1.0 for small room
                             edge_filter=lambda edge : edge['reprojected_err'] < 0.05,
                             maxiter=3,
                             lsqr_solver="conjugate_gradient",
                             dtype=np.float32)

Received graph with 91017 nodes 356735 edges
Applying constraints (5.017s).
Bipartite graph: 25 cameras, 4999 timesteps, 49958 edges.
Building 25x4999 adjacency and 75x14997 SO(3) sparse matrices (0.665s).
Building power graph (0.092s).


Optimizing:   0%|          | 0/3 [00:00<?, ?it/s]

Building sparse 409866x15072 incidence matrix (14.923s).
Solving sparse linear system (1.166s).
Done!


## 2.4 Numeric results: comparison with ground-truth

In [5]:
missing_cam_ids = [c for c in dataset.cams.keys() if c not in pose_est.keys()]
valid_cam_ids   = [c for c in dataset.cams.keys() if c in pose_est.keys()]

G = optimize_gauge_SE3([dataset.cams[c].extrinsics.inv() for c in valid_cam_ids],
                       [pose_est[c] for c in valid_cam_ids])
r_err  = []
t_err  = []
for c in valid_cam_ids:
    gt  = dataset.cams[c].extrinsics.inv()
    est = pose_est[c] @ G
    t_err.append(np.linalg.norm(gt.t() - est.t(), ord=2)*100)
    r_err.append(distance_SO3(gt.R(), est.R()))              

print("SO(3)\t min: {:.3f}deg  |  avg: {:.3f}deg  |  median: {:.3f}deg  |  max: {:.3f}deg".format(np.min(r_err), np.mean(r_err), np.median(r_err), np.max(r_err)))
print("E(3) \t min: {:.3f}cm   |  avg: {:.3f}cm   |  median: {:.3f}cm   |  max: {:.3f}cm".format(np.min(t_err), np.mean(t_err), np.median(t_err), np.max(t_err)))
print("Missing cam IDs: {}".format(missing_cam_ids))

SO(3)	 min: 0.000deg  |  avg: 0.051deg  |  median: 0.056deg  |  max: 0.093deg
E(3) 	 min: 3.235cm   |  avg: 24.376cm   |  median: 32.062cm   |  max: 44.623cm
Missing cam IDs: []


## 2.5 Top-down XY view plot

In [None]:
import json

fig = plt.figure(figsize=(15,15))
lgd = []

xy_est = []
for n in valid_cam_ids:
    est = (pose_est[n] @ G).inv().t()
    xy_est.append(est[:2])
xy_est = np.stack(xy_est, axis=0)
plt.scatter(xy_est[:,0], xy_est[:,1], 10, marker='x', c='blue')
lgd.append("Estimated cameras ({})".format(len(valid_cam_ids)))

xy_gt = []
for n in dataset.cams.keys():
    gt = dataset.cams[n].extrinsics.t()
    xy_gt.append(gt[:2])
xy_gt = np.stack(xy_gt, axis=0)
plt.scatter(xy_gt[:,0],  xy_gt[:,1],  10, marker='^', c='green')
lgd.append("Ground-truth cameras ({})".format(len(cams)))


xy_t = []
for i in range(8):
    with open(os.path.join(RENDER_PATH, 'aruco_cube_pose_{}.json').format(i)) as f:
        markers = json.load(f)
    for v in markers.values():
        xy_t.append(v['t'][:2])
xy_t = np.stack(xy_t, axis=0)
plt.scatter(xy_t[:,0],   xy_t[:,1],  10, marker='s', c=[1,0,0,0.2])
lgd.append("Object ({})".format(len(xy_t)))


plt.axis('equal')
plt.xlabel('x (m)')
plt.ylabel('y (m)')
plt.legend(lgd)