In [9]:
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List

import numpy as np
import gtsam

import gtsfm.utils.io as io_utils
import gtsfm.utils.logger as logger_utils
from gtsfm.utils import align, transform

import visu3d as v3d

logger = logger_utils.get_logger()


In [10]:
REPO_ROOT = Path.cwd().parent.resolve()
GT_MODEL_DIR = REPO_ROOT / "tests/data/set1_lund_door/colmap_ground_truth"
ANY_SPLAT_MODEL_DIR = REPO_ROOT / "results/anysplat/door/with_parent_and_leaf_BA/results/merged"
VGGT_MODEL_DIR = REPO_ROOT / "results/vggt/door/with_parent_BA/results/merged"
MVO_SPLAT_MODEL_DIR = REPO_ROOT / "mvo_door_results/results/ba_output"


print(f"Using repo root: {REPO_ROOT}")
print(f"Ground truth dir: {GT_MODEL_DIR}")
print(f"AnySplat dir: {ANY_SPLAT_MODEL_DIR}")
print(f"VGGT dir: {VGGT_MODEL_DIR}")
print(f"MVO dir: {MVO_SPLAT_MODEL_DIR}")

Using repo root: /home/hkhanuja3/testing_gtsfm/gtsfm
Ground truth dir: /home/hkhanuja3/testing_gtsfm/gtsfm/tests/data/set1_lund_door/colmap_ground_truth
AnySplat dir: /home/hkhanuja3/testing_gtsfm/gtsfm/results/anysplat/door/with_parent_and_leaf_BA/results/merged
VGGT dir: /home/hkhanuja3/testing_gtsfm/gtsfm/results/vggt/door/with_parent_BA/results/merged
MVO dir: /home/hkhanuja3/testing_gtsfm/gtsfm/mvo_door_results/results/ba_output


In [11]:
@dataclass
class PoseRecord:
    name: str
    center: np.ndarray
    rotation: np.ndarray
    direction: np.ndarray


def _normalize(vec: np.ndarray) -> np.ndarray:
    norm = np.linalg.norm(vec)
    if norm < 1e-9:
        return vec
    return vec / norm

def calibration_to_spec(calib: gtsam.Cal3, hw: tuple[int, int]) -> v3d.CameraSpec:
    h, w = hw
    if isinstance(calib, gtsam.Cal3_S2):
        fx, fy = calib.fx(), calib.fy()
        cx, cy = calib.px(), calib.py()
    elif isinstance(calib, (gtsam.Cal3Bundler, gtsam.Cal3Unified)):  # fx == fy
        fx = fy = calib.fx()
        cx, cy = calib.px(), calib.py()
    elif isinstance(calib, gtsam.Cal3DS2):
        fx, fy = calib.fx(), calib.fy()
        cx, cy = calib.px(), calib.py()
    else:
        raise ValueError(f"Unsupported calibration type: {type(calib)}")

    K = np.array([[fx, 0, cx],
                  [0, fy, cy],
                  [0,  0,  1]], dtype=np.float32)
    return v3d.PinholeCamera(K=K, resolution=(h, w))

def load_poses(colmap_dirpath):
    """Returns mapping from image filename to associated camera pose."""
    wTi_list, img_fnames, calibrations, _, _, img_dims = io_utils.read_scene_data_from_colmap_format(colmap_dirpath)
    pose_map, spec_map = {}, {}
    for name, pose, calib, hw in zip(img_fnames, wTi_list, calibrations, img_dims):
        key = Path(name).name
        pose_map[key] = pose
        spec_map[key] = calibration_to_spec(calib, hw)
    logger.info("Loaded %d poses from %s", len(pose_map), colmap_dirpath)
    return pose_map, spec_map
    

def pose_records_from_map(pose_map: Dict[str, gtsam.Pose3]) -> List[PoseRecord]:
    records: List[PoseRecord] = []
    forward_axis = np.array([0.0, 0.0, 1.0], dtype=np.float64)
    for name in sorted(pose_map.keys()):
        pose = pose_map[name]
        t = pose.translation()
        center = np.array([t[0], t[1], t[2]], dtype=np.float64)
        R = pose.rotation().matrix()
        direction = _normalize(R @ forward_axis)
        records.append(PoseRecord(name=name, center=center, rotation=R, direction=direction))
    return records


In [12]:
def align_pose_maps_to_reference(
    reference_pose_map: Dict[str, gtsam.Pose3], estimate_pose_map: Dict[str, gtsam.Pose3]
):
    common_names = sorted(set(reference_pose_map) & set(estimate_pose_map))
    if len(common_names) < 2:
        raise ValueError("Need at least 2 overlapping cameras to estimate Sim(3) alignment")
    ref_list = [reference_pose_map[name] for name in common_names]
    est_list = [estimate_pose_map[name] for name in common_names]
    aSb = align.sim3_from_Pose3s(ref_list, est_list)
    names, poses = zip(*estimate_pose_map.items()) if estimate_pose_map else ([], [])
    aligned_list = transform.Pose3s_with_sim3(aSb, list(poses)) if poses else []
    aligned_map = dict(zip(names, aligned_list))
    summary = {
        "scale": float(aSb.scale()),
        "translation": np.array(aSb.translation()),
        "rotation": aSb.rotation().matrix(),
        "num_common": len(common_names),
    }
    return aligned_map, summary


In [13]:
import dataclass_array as dca

def make_camera_batch(pose_map, spec_map) -> v3d.Camera:
    cams = []
    for name in sorted(pose_map):
        pose = pose_map[name]
        spec = spec_map[name]

        t = pose.translation()
        world_from_cam = v3d.Transform(
            R=pose.rotation().matrix(),
            t=np.array([t[0], t[1], t[2]], dtype=np.float32),
        )
        cams.append(v3d.Camera(spec=spec, world_from_cam=world_from_cam))
    return dca.stack(cams)


In [14]:
gt_pose_map, gt_spec_map = load_poses(str(GT_MODEL_DIR))
# any_pose_map, any_spec_map = load_poses(str(ANY_SPLAT_MODEL_DIR))
vggt_pose_map, vggt_spec_map = load_poses(str(VGGT_MODEL_DIR))
mvo_pose_map, mvo_spec_map = load_poses(str(MVO_SPLAT_MODEL_DIR))


# aligned_any_pose_map, sim3_summary_any = align_pose_maps_to_reference(gt_pose_map, any_pose_map)
aligned_vggt_pose_map, sim3_summary_vggt = align_pose_maps_to_reference(gt_pose_map, vggt_pose_map)
aligned_mvo_pose_map, sim3_summary_mvo = align_pose_maps_to_reference(gt_pose_map, mvo_pose_map)


gt_records = pose_records_from_map(gt_pose_map)
# aligned_any_records = pose_records_from_map(aligned_any_pose_map)
aligned_vggt_records = pose_records_from_map(aligned_vggt_pose_map)
aligned_mvo_records = pose_records_from_map(aligned_mvo_pose_map)


# print(f"Loaded {len(gt_records)} GT poses and {len(aligned_any_records)} AnySplat poses.")
# print(f"Alignment used {sim3_summary_any['num_common']} shared image names between AnySplat and gt.")
# print(f"Estimated scale: {sim3_summary_any['scale']:.4f} for AnySplat")

print(f"Loaded {len(gt_records)} GT poses and {len(aligned_vggt_records)} VGGT poses.")
print(f"Alignment used {sim3_summary_vggt['num_common']} shared image names between VGGT and gt.")
print(f"Estimated scale: {sim3_summary_vggt['scale']:.4f} for VGGT")

print(f"Loaded {len(gt_records)} GT poses and {len(aligned_mvo_records)} mvo poses.")
print(f"Alignment used {sim3_summary_mvo['num_common']} shared image names between mvo and gt.")
print(f"Estimated scale: {sim3_summary_mvo['scale']:.4f} for mvo")


2025-12-08 04:06:21 [tricorder.cc.gatech.edu-main] [737560790.py] INFO: Loaded 12 poses from /home/hkhanuja3/testing_gtsfm/gtsfm/tests/data/set1_lund_door/colmap_ground_truth
2025-12-08 04:06:21 [tricorder.cc.gatech.edu-main] [737560790.py] INFO: Loaded 12 poses from /home/hkhanuja3/testing_gtsfm/gtsfm/results/vggt/door/with_parent_BA/results/merged
2025-12-08 04:06:21 [tricorder.cc.gatech.edu-main] [737560790.py] INFO: Loaded 12 poses from /home/hkhanuja3/testing_gtsfm/gtsfm/mvo_door_results/results/ba_output
Loaded 12 GT poses and 12 VGGT poses.
Alignment used 12 shared image names between VGGT and gt.
Estimated scale: 20.4107 for VGGT
Loaded 12 GT poses and 12 mvo poses.
Alignment used 12 shared image names between mvo and gt.
Estimated scale: 1.5403 for mvo


In [15]:
gt_cams = make_camera_batch(gt_pose_map, gt_spec_map)
# any_cams = make_camera_batch(aligned_any_pose_map, any_spec_map)
vggt_cams = make_camera_batch(aligned_vggt_pose_map, vggt_spec_map)
mvo_cams = make_camera_batch(aligned_mvo_pose_map, mvo_spec_map)


fig = v3d.make_fig(
    gt_cams.replace_fig_config(name="Ground truth", scale=0.3),
    vggt_cams.replace_fig_config(name="VGGT (aligned)", scale=0.3),
    show_zero=True,
)
fig.update_layout(width=800, height=600)
fig.show()


In [16]:
# def rays_from_records(records: List[PoseRecord]) -> v3d.Ray:
#     centers = np.stack([rec.center for rec in records], axis=0).astype(np.float32)
#     directions = np.stack([rec.direction for rec in records], axis=0).astype(np.float32)
#     return v3d.Ray(pos=centers, dir=directions).normalize()


# gt_rays = rays_from_records(gt_records)
# any_aligned_rays = rays_from_records(aligned_any_records)
# mvo_aligned_rays = rays_from_records(aligned_mvo_records)

# gt_rays.pos.shape, any_aligned_rays.pos.shape, mvo_aligned_rays.pos.shape
# gt_rays_named = gt_rays.replace(
#     fig_config=gt_rays.fig_config.replace(name="Ground truth")
# )
# any_rays_named = any_aligned_rays.replace(
#     fig_config=any_aligned_rays.fig_config.replace(name="AnySplat (aligned)")
# )
# mvo_rays_named = mvo_aligned_rays.replace(
#     fig_config=mvo_aligned_rays.fig_config.replace(name="MVO (aligned)")
# )

# fig = v3d.make_fig(
#     gt_rays_named,
#     # any_rays_named,
#     mvo_rays_named,
#     show_zero=True,
#     cam_scale=0.5,
# )
# fig.show() 