# metrics for foundpose results

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import os
import sys
module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import json
import math
import os
from pathlib import Path
from typing import List
import sys

import cv2
import dataclass_array as dca
import jax.numpy as jnp
import matplotlib.pyplot as plt
import mitsuba as mi
import numpy as np
from PIL import Image
import pycolmap
import pyrender
import trimesh
import visu3d as v3d

sys.path.append(os.path.abspath(os.path.join("..", "bop_toolkit")))
from bop_toolkit.bop_toolkit_lib.pose_error import vsd, mssd, mspd
from bop_toolkit.bop_toolkit_lib.misc import get_symmetry_transformations
from bop_toolkit.bop_toolkit_lib.renderer import create_renderer

import burybarrel.colmap_util as cutil
from burybarrel.image import render_v3d

In [None]:
renderer = create_renderer(1920, 875, renderer_type="vispy", mode="depth")

In [None]:
# if estimations have multiple hypotheses per image, settings this true will choose the
# best hypothesis with ground truth knowledge (this will obviously skew to better performance)
# otherwise, just choose the 0th hypothesis
rankbest_hyp = False
objpath = Path("/scratch/jeyan/barreldata/models3d/barrelsingle-scaled.ply")
objinfopath = Path("/scratch/jeyan/barreldata/models3d/model_info.json")

# barrelddt1
# caminfopath = Path("/scratch/jeyan/barreldata/divedata/dive8/barrelddt1/camera.json")
# gtposepath = Path("/scratch/jeyan/barreldata/divedata/dive8/barrelddt1/gt_obj2cam.json")
# masksdir = Path("/scratch/jeyan/barreldata/divedata/dive8/barrelddt1/mask")
# savepath = Path("/scratch/jeyan/barreldata/results/barrelddt1/metricresults.json")
# barrel4
# caminfopath = Path("/scratch/jeyan/barreldata/divedata/dive8/barrel4/camera.json")
# gtposepath = Path("/scratch/jeyan/barreldata/divedata/dive8/barrel4/gt_obj2cam.json")
# masksdir = None
# savepath = Path("/scratch/jeyan/barreldata/results/barrel4/metricresults.json")
# barrel2
caminfopath = Path("/scratch/jeyan/barreldata/divedata/dive8/barrel2/camera.json")
gtposepath = Path("/scratch/jeyan/barreldata/divedata/dive8/barrel2/gt_obj2cam.json")
masksdir = None
savepath = Path("/scratch/jeyan/barreldata/results/barrel2/metricresults.json")
# dive3-depthcharge-03-04
# caminfopath = Path("/scratch/jeyan/barreldata/divedata/dive8/barrel4/camera.json")
# gtposepath = Path("/scratch/jeyan/barreldata/divedata/dive8/barrel4/gt_obj2cam.json")
# masksdir = None
# savepath = Path("/scratch/jeyan/barreldata/results/barrel4/metricresults.json")

estimatedpaths = {
    # barrelddt1
    # "barrelddt1_renders_layer9": Path("/scratch/jeyan/foundpose/output_barrelddt1_renders_layer9/inference/estimated-poses.json"),
    # "barrelddt1_raw_layer9": Path("/scratch/jeyan/foundpose/output_barrelddt1_raw_layer9/inference/estimated-poses.json"),
    # "barrelddt1_raw_vitl_layer18": Path("/scratch/jeyan/foundpose/output_barrelddt1_raw_vitl_layer18/inference/estimated-poses.json"),
    # "barrelddt1_raw_vitl_layer18_fit": Path("/scratch/jeyan/barreldata/results/barrelddt1/estimated-poses.json"),
    # barrel4
    # "barrel4_renders_layer9": Path("/scratch/jeyan/foundpose/output_barrel4_renders_layer9/inference/estimated-poses.json"),
    # "barrel4_raw_layer9": Path("/scratch/jeyan/foundpose/output_barrel4_raw_layer9/inference/estimated-poses.json"),
    # "barrel4_raw_vitl_layer18": Path("/scratch/jeyan/foundpose/output_barrel4_raw_vitl_layer18/inference/estimated-poses.json"),
    # "barrel4_raw_vitl_layer18_fit": Path("/scratch/jeyan/barreldata/results/barrel4/estimated-poses.json"),
    # barrel2
    "barrel2": Path("/scratch/jeyan/foundpose/output_barrel2/inference/estimated-poses.json"),
    "barrel2_fit": Path("/scratch/jeyan/barreldata/results/barrel2/estimated-poses.json"),
}

with open(gtposepath, "rt") as f:
    gtposes = json.load(f)
with open(objinfopath, "rt") as f:
    objinfo = json.load(f)
with open(caminfopath, "rt") as f:
    caminfo = json.load(f)
maskpaths = None
masks = None
if masksdir is not None:
    maskpaths = list(sorted(masksdir.glob("*.png")))
    masks = [cv2.imread(mpath, cv2.IMREAD_GRAYSCALE) / 255 for mpath in maskpaths]

renderer = create_renderer(1920, 875, renderer_type="vispy", mode="depth")
renderer.add_object("barrel", objpath)

gtposes = list(sorted(gtposes, key=lambda x: int(x["img_id"])))
gt_Rs = np.array([gtpose["R"] for gtpose in gtposes])
gt_ts = np.array([gtpose["t"] for gtpose in gtposes])[..., None]
imgnames = [Path(gtpose["img_path"]).name for gtpose in gtposes]
imgids = [gtpose["img_id"] for gtpose in gtposes]

K = np.array([
    [caminfo["fx"], 0, caminfo["cx"]],
    [0, caminfo["fy"], caminfo["cy"]],
    [0, 0, 1.0]
], dtype=float)

symTs = get_symmetry_transformations(objinfo["barrelsingle-scaled.ply"], 0.01)

mesh = trimesh.load(objpath)
vtxs = np.array(mesh.vertices)

In [None]:
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
R_gt = gt_Rs[0]
t_gt = gt_ts[0]
depth_test = renderer.render_object("barrel", R_gt, t_gt, fx, fy, cx, cy)["depth"]
# plt.imshow(depth_test)

In [None]:
allestmetrics = {}
for estname, estpath in estimatedpaths.items():
    with open(estpath, "rt") as f:
        ests = json.load(f)
    allvsd = []
    allmssd = []
    allmspd = []
    for i, imgname in enumerate(imgnames):
        R_gt = gt_Rs[i]
        t_gt = gt_ts[i]
        # no test depth, just use ground truth with ground truth mask
        depth_test = renderer.render_object("barrel", R_gt, t_gt, fx, fy, cx, cy)["depth"]
        if masks is not None:
            depth_test = masks[i] * depth_test
        imgmatches = list(filter(lambda x: Path(x["img_path"]).name == imgname, ests))
        imgvsd = []
        imgmssd = []
        imgmspd = []
        for j, imgmatch in enumerate(imgmatches):
            if not rankbest_hyp:
                if imgmatch["hypothesis_id"] != "0":
                    continue
            R_est = np.array(imgmatch["R"])
            t_est = np.array(imgmatch["t"])
            vsdres = vsd(R_est, t_est, R_gt, t_gt, depth_test, K, 0.2, [0.2], False, None, renderer, "barrel", "step")
            mssdres = mssd(R_est, t_est, R_gt, t_gt, vtxs, symTs)
            mspdres = mspd(R_est, t_est, R_gt, t_gt, K, vtxs, symTs)
            imgvsd.append(vsdres[0])
            imgmssd.append(mssdres)
            imgmspd.append(mspdres)
        # choose hypothesis with majority best metric between vsd, mssd, mspd
        winnings = np.zeros(len(imgmatches), dtype=int)
        winnings[np.argmin(imgvsd)] += 1
        winnings[np.argmin(imgmssd)] += 1
        winnings[np.argmin(imgmspd)] += 1
        probablybest = np.argmax(winnings)
        allvsd.append(imgvsd[probablybest])
        allmssd.append(imgmssd[probablybest])
        allmspd.append(imgmspd[probablybest])
    allestmetrics[estname] = {
        "avg_vsd": float(np.mean(allvsd)),
        "avg_mssd": float(np.mean(allmssd)),
        "avg_mspd": float(np.mean(allmspd)),
        "all_vsd": np.array(allvsd).tolist(),
        "all_mssd": np.array(allmssd).tolist(),
        "all_mspd": np.array(allmspd).tolist(),
    }
with open(savepath, "wt") as f:
    json.dump(allestmetrics, f)