In [1]:
import sys
sys.executable

'/Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8'

In [9]:
import os
import argparse
import statistics
import numpy as np

from tqdm import tqdm
from pytorch3d.transforms import matrix_to_quaternion

import poselib

from solver import Up2P
from SolverPipeline import P3PBindingWrapperPipeline
from SolverPipeline import SolverPipeline as SP

In [10]:
VSP = "dataset/StMarysChurch_matches"
seq = [3, 5, 13]
SHOW_SINGLE = False

In [11]:
class Config:
    ransac_thresh = 13.
    max_side_length = 320
    max_ransac_iters = 10000
    
conf = Config()

In [12]:
p3pwrapper = P3PBindingWrapperPipeline(
    ransac_conf = {
       # 'max_reproj_error': args.ransac_thresh
       'min_iterations': min(100, conf.max_ransac_iters),
       'max_iterations': conf.max_ransac_iters,
       'progressive_sampling': True,
       'max_prosac_iterations': conf.max_ransac_iters
    },
    
    bundle_adj_conf = {
        'loss_scale' : 1.0,
    }                                              
)

In [13]:
camera = {'model': 'SIMPLE_RADIAL', 'width': 320, 'height': 180, 'params': [277.4716064453125, 160.0, 90.0, 0.0]}
pts2D = [
    np.array([192.12533569,  19.14378548]),
    np.array([91.60398102, 26.73556519]),
    np.array([180.32232666,  33.99654388]),
    np.array([192.33743286,  37.74715424]),
    np.array([188.43441772,  41.1788559 ])
]

pts3D = [
    np.array([ 11.86180782, -14.56327057,  -0.92378181]),
    np.array([ 6.79015875, -9.56949902, -1.78533459]),
    np.array([11.95058823, -0.89410073, -0.36948705]), 
    np.array([ 12.17275715, -13.31939125,  -0.34633577]),
    np.array([ 7.56372643, -2.60536647, -2.24980545])
]

In [52]:
class UP2PSolverPipeline(SP):
    
    def __init__(self):
        self.solver = Up2P()
    
    def __call__(self, pts2D, pts3D, camera_dict):
        
        w, h = camera_dict["width"], camera_dict["height"]
        params = camera_dict["params"]
        f, cx, cy, _ = params
        
        pts2D = np.concatenate([pts2D, np.ones(pts2D.shape[0])[:, np.newaxis]], axis=1)
        pts2D[:, 0] -= cx
        pts2D[:, 1] -= cy
        pts2D[:2] /= f
        pts2D /= np.linalg.norm(pts2D)
        
        solution, sol_err = None, float("+inf")
        
        for i in range(2, len(pts2D)):
            try:
                solv_res = self.solver(pts2D[i - 2 : i], pts3D[i - 2 : i])
                best_sol, err = None, float("+inf")

                for sol in solv_res:
                    R, t = sol
                    R, t = R.detach().cpu().numpy(), t.detach().cpu().numpy()
                    translated = R.T @ (pts3D[i] - t)
                    translated[:2] /= translated[2]
                    translated[:2] *= f
                    translated[0] += cx
                    translated[1] += cy
                    if np.linalg.norm(translated - pts2D[i]) < err:
                        err = np.linalg.norm(translated - pts2D[i])
                        best_sol = sol

                if err < sol_err:
                    sol_err = err
                    solution = best_sol    
            except Exception as ex:
                # print(ex)
                continue
        
        pose = poselib.CameraPose()
        pose.q = matrix_to_quaternion(solution[0])
        pose.t = solution[1]
        
        return pose
        
solv_pipe = UP2PSolverPipeline()

In [54]:
res = solv_pipe(
    np.array(pts2D),
    np.array(pts3D),
    camera
)
res, type(res)

([q: 0.876781        0 0.480889        0, t:  18.1798 -30.5427 -9.15747],
 poselib.CameraPose)

In [55]:
pose = p3pwrapper(
    np.array(pts2D),
    np.array(pts3D),
    camera
)
pose.t = - pose.R.T @ pose.t
pose

[q:  0.770395 -0.620412  0.145098 -0.022967, t:   12.4091   4.84072 -0.453523]

In [18]:
def process_file(executor: SP, path: str, conf, camera_dict, gts):
    data = np.load(path)
    pts2D = list(data[:, :2])
    pts3D = list(data[:, 2:])
    pp = "/".join(path.split("/")[-2:])
    pp = pp.replace("_matches", "")
    pp = pp.replace(".npy", ".png")
    camera_dict = camera_dict[pp]
    gt = gts[pp]
    c, r = gt[:3], gt[3:]

    pose = executor(np.array(pts2D), np.array(pts3D), camera_dict)

    pose.t = - pose.R.T @ pose.t          

    gt_pose = poselib.CameraPose()
    gt_pose.q = r / np.linalg.norm(r)

    rot_error = np.arccos((np.trace(np.matmul(gt_pose.R.transpose(), pose.R)) - 1.0) / 2.0) * 180.0 / np.pi

    if SHOW_SINGLE:
        print(np.trace(np.matmul(gt_pose.R.transpose(), pose.R)))
        print(" Position error: " + str(np.linalg.norm(c - pose.t)) + " orientation error: " + str(rot_error))
    if np.isnan(rot_error):
        return 1000000.0, 180.0
    else:
        return np.linalg.norm(c - pose.t), rot_error

In [19]:
def prepare_camera_dict(path: str, args):
    with open(path) as file:
        data = file.readlines()

    camera_dict = {}
    for _, line in enumerate(data):
        # image width, image height, focal length, x of pp, y of pp, radial distortion factor 
        path, cam_type, w, h, f, x, y, rd = line.split()
        scaling_factor = 320 / max(np.float32(w), np.float32(h))
  
        # camera = {'model': 'SIMPLE_PINHOLE', 'width': 1200, 'height': 800, 'params': [960, 600, 400]}
        camera_dict[path] = {
          'model': cam_type,
          'width': int(np.float32(w) * scaling_factor),
          'height': int(np.float32(h) * scaling_factor),
          'params': list(map(float, [np.float32(f) * scaling_factor,
                                 np.float32(x) * scaling_factor,
                                 np.float32(y) * scaling_factor,
                                 np.float32(rd)])),
        }
  
    return camera_dict

In [20]:
def prepare_gts(path: str):
    # ImageFile, Camera Position [X Y Z W P Q R]

    with open(path) as file:
        data = file.readlines()

    gts = {}
    for _, line in enumerate(data):
        try:
          # seq13/frame00158.png 25.317314 -0.228082 54.493720 0.374564 0.002123 0.915022 -0.149782
          path, x, y, z, w, p, q, r = line.split()
          rest = [x, y, z, w, p, q, r]
          rest = list(map(float, rest))
        except Exception as ex:
          # print(ex)
          continue
        gts[path] = rest

    return gts

In [21]:
camera_dict = prepare_camera_dict(
    "dataset/StMarysChurch_matches/st_marys_church_list_queries_with_intrinsics_simple_radial_sorted.txt",
    conf
)

gt_dict = prepare_gts(
    "dataset/StMarysChurch_matches/dataset_test.txt"
)

In [22]:
orientation_errors, pose_errors = [], []
for s in seq:
    p = f"{VSP}/seq{s}"
    for f in tqdm(os.listdir(p)):
        if f.split(".")[1] != "npy":
            continue
        pe, oe = process_file(p3pwrapper, f"{p}/{f}", conf, camera_dict, gt_dict)
        pose_errors.append(pe)
        orientation_errors.append(oe)

100%|██████████| 99/99 [00:00<00:00, 225.70it/s]
100%|██████████| 83/83 [00:00<00:00, 188.78it/s]
100%|██████████| 351/351 [00:02<00:00, 129.42it/s]


In [23]:
pos_errors = pose_errors
orient_errors = orientation_errors
print(" Couldn't localize " + str(orientation_errors.count(180.0)) + " out of " + str(len(orientation_errors)) + " images") 
print(" Median position error: " +  str(round(statistics.median(pos_errors),3)) + ", median orientation errors: " + str(round(statistics.median(orient_errors),2)))

med_pos = statistics.median(pos_errors)
med_orient = statistics.median(orient_errors)
counter = 0
for i in range(0, len(pose_errors)):
    if pose_errors[i] <= med_pos and orientation_errors[i] <= med_orient:
        counter += 1
print(" Percentage of poses within the median: " + str(100.0 * float(counter) / float(len(pose_errors))) + " % ")

 Couldn't localize 0 out of 530 images
 Median position error: 0.086, median orientation errors: 0.29
 Percentage of poses within the median: 41.132075471698116 % 


In [56]:
orientation_errors, pose_errors = [], []
for s in seq:
    p = f"{VSP}/seq{s}"
    for f in tqdm(os.listdir(p)):
        if f.split(".")[1] != "npy":
            continue
        pe, oe = process_file(solv_pipe, f"{p}/{f}", conf, camera_dict, gt_dict)
        pose_errors.append(pe)
        orientation_errors.append(oe)

100%|██████████| 99/99 [00:37<00:00,  2.64it/s]
100%|██████████| 83/83 [00:49<00:00,  1.66it/s]
100%|██████████| 351/351 [01:52<00:00,  3.13it/s]


In [57]:
pos_errors = pose_errors
orient_errors = orientation_errors
print(" Couldn't localize " + str(orientation_errors.count(180.0)) + " out of " + str(len(orientation_errors)) + " images") 
print(" Median position error: " +  str(round(statistics.median(pos_errors),3)) + ", median orientation errors: " + str(round(statistics.median(orient_errors),2)))

med_pos = statistics.median(pos_errors)
med_orient = statistics.median(orient_errors)
counter = 0
for i in range(0, len(pose_errors)):
    if pose_errors[i] <= med_pos and orientation_errors[i] <= med_orient:
        counter += 1
print(" Percentage of poses within the median: " + str(100.0 * float(counter) / float(len(pose_errors))) + " % ")

 Couldn't localize 0 out of 530 images
 Median position error: 30.337, median orientation errors: 103.41
 Percentage of poses within the median: 25.09433962264151 % 
