In [None]:
%load_ext autoreload
%autoreload 2

from mast3r.model import AsymmetricMASt3R
from mast3r.fast_nn import fast_reciprocal_NNs
import os
import numpy as np
import trimesh
import copy
from scipy.spatial.transform import Rotation
import tempfile
import pandas as pd
import dataclasses
from pathlib import Path
import shutil
from copy import deepcopy
import torch
import glob
from mast3r.cloud_opt.sparse_ga import sparse_global_alignment
from mast3r.cloud_opt.tsdf_optimizer import TSDFPostProcess
from mast3r.image_pairs import make_pairs
from mast3r.retrieval.processor import Retriever
from mast3r.utils.misc import mkdir_for
from cust3r.utils.image import load_images
from dust3r.dust3r.utils.device import to_numpy
from dust3r.dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes
from dust3r.dust3r.demo import get_args_parser as dust3r_get_args_parser
import matplotlib.pyplot as pl
import imageio.v2 as iio
from cust3r.utils.camera import pose_encoding_to_camera
from cust3r.post_process import estimate_focal_knowing_depth
from cust3r.utils.geometry import geotrf
from cust3r.model import ARCroco3DStereo
from cust3r.inference import inference as inference_cust3r
import time
from boq.boq_infer import get_trained_boq, boq_sort_topk
import json

In [2]:
def _convert_scene_output_to_glb(imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
                                 cam_color=None, as_pointcloud=False,
                                 transparent_cams=False, silent=False):
    assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
    pts3d = to_numpy(pts3d)
    imgs = to_numpy(imgs)
    focals = to_numpy(focals)
    cams2world = to_numpy(cams2world)
    scene = trimesh.Scene()
    # full pointcloud
    if as_pointcloud:
        pts = np.concatenate([p[m.ravel()] for p, m in zip(pts3d, mask)]).reshape(-1, 3)
        col = np.concatenate([p[m] for p, m in zip(imgs, mask)]).reshape(-1, 3)
        valid_msk = np.isfinite(pts.sum(axis=1))
        pct = trimesh.PointCloud(pts[valid_msk], colors=col[valid_msk])
        scene.add_geometry(pct)
    else:
        meshes = []
        for i in range(len(imgs)):
            pts3d_i = pts3d[i].reshape(imgs[i].shape)
            msk_i = mask[i] & np.isfinite(pts3d_i.sum(axis=-1))
            meshes.append(pts3d_to_trimesh(imgs[i], pts3d_i, msk_i))
        mesh = trimesh.Trimesh(**cat_meshes(meshes))
        scene.add_geometry(mesh)
    # add each camera
    for i, pose_c2w in enumerate(cams2world):
        if isinstance(cam_color, list):
            camera_edge_color = cam_color[i]
        else:
            camera_edge_color = cam_color or CAM_COLORS[i % len(CAM_COLORS)]
        add_scene_cam(scene, pose_c2w, camera_edge_color,
                      None if transparent_cams else imgs[i], focals[i],
                      imsize=imgs[i].shape[1::-1], screen_width=cam_size)
    rot = np.eye(4)
    rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
    scene.apply_transform(np.linalg.inv(cams2world[0] @ OPENGL @ rot))
    return scene

def get_3D_model_from_scene(silent, scene, min_conf_thr=2, as_pointcloud=False, mask_sky=False,
                            clean_depth=False, transparent_cams=False, cam_size=0.05, TSDF_thresh=0):
    """
    extract 3D_model (glb file) from a reconstructed scene
    """
    # get optimized values from scene
    scene = scene
    rgbimg = scene.imgs
    focals = scene.get_focals().cpu()
    cams2world = scene.get_im_poses().cpu()
    # 3D pointcloud from depthmap, poses and intrinsics
    if TSDF_thresh > 0:
        tsdf = TSDFPostProcess(scene, TSDF_thresh=TSDF_thresh)
        pts3d, _, confs = to_numpy(tsdf.get_dense_pts3d(clean_depth=clean_depth))
    else:
        pts3d, _, confs = to_numpy(scene.get_dense_pts3d(clean_depth=clean_depth))
    msk = to_numpy([c > min_conf_thr for c in confs])
    return _convert_scene_output_to_glb(rgbimg, pts3d, msk, focals, cams2world, as_pointcloud=as_pointcloud,
                                        transparent_cams=transparent_cams, cam_size=cam_size, silent=silent)
    

def get_reconstructed_scene(model, device, filelist,
                            cache_path,
                            retrieval_model = None,
                            silent = False,
                            optim_level = "refine+depth",
                            lr1 = 0.07, niter1 = 200, lr2 = 0.01, niter2 = 200,
                            min_conf_thr = 1.5,
                            matching_conf_thr = 0.0,
                            as_pointcloud = True, mask_sky = False, clean_depth =True, transparent_cams = False, cam_size = 0.2,
                            scenegraph_type = "complete", winsize=1, win_cyclic=False, refid=0,
                            TSDF_thresh=0.0, shared_intrinsics= False,
                            trimesh_scenes=False,
                            **kw):
    """
    from a list of images, run mast3r inference, sparse global aligner.
    then run get_3D_model_from_scene
    """
    imgs, imgs_id_dict = load_images(filelist, size=512, verbose=not silent)
    if len(imgs) == 1:
        imgs = [imgs[0], copy.deepcopy(imgs[0])]
        imgs[1]['idx'] = 1
        filelist = [filelist[0], filelist[0] + '_2']
    scene_graph_params = [scenegraph_type]
    if scenegraph_type in ["swin", "logwin"]:
        scene_graph_params.append(str(winsize))
    elif scenegraph_type == "oneref":
        scene_graph_params.append(str(refid))
    elif scenegraph_type == "retrieval":
        scene_graph_params.append(str(winsize))  # Na
        scene_graph_params.append(str(refid))  # k
    if scenegraph_type in ["swin", "logwin"] and not win_cyclic:
        scene_graph_params.append('noncyclic')
    scene_graph = '-'.join(scene_graph_params)
    sim_matrix = None
    if 'retrieval' in scenegraph_type:
        assert retrieval_model is not None
        retriever = Retriever(retrieval_model, backbone=model, device=device)
        with torch.no_grad():
            sim_matrix = retriever(filelist)
        # Cleanup
        del retriever
        torch.cuda.empty_cache()
    boq_topks = None
    if 'boq' in scenegraph_type:
        with open(os.path.join(cache_path, "boq_topk.json"), "r", encoding="utf-8") as f:
            boq_topks = json.load(f)
    pairs = make_pairs(imgs, scene_graph=scene_graph, prefilter=None, 
                       symmetrize=False, sim_mat=sim_matrix, boq_topk_dict=boq_topks, 
                       imgs_id_dict=imgs_id_dict)
    if optim_level == 'coarse':
        niter2 = 0
    # Sparse GA (forward mast3r -> matching -> 3D optim -> 2D refinement -> triangulation)
    scenes, outlier_imgs = sparse_global_alignment(filelist, imgs, imgs_id_dict, pairs, cache_path,
                                    model, lr1=lr1, niter1=niter1, lr2=lr2, niter2=niter2, device=device,
                                    opt_depth='depth' in optim_level, shared_intrinsics=shared_intrinsics,
                                    matching_conf_thr=matching_conf_thr, **kw)
    if trimesh_scenes:
        trimesh_scenes = []
        for i, scene in enumerate(scenes):
            trimesh_scene = get_3D_model_from_scene(silent, scene, min_conf_thr, as_pointcloud, mask_sky,
                                        clean_depth, transparent_cams, cam_size, TSDF_thresh)
            trimesh_scenes.append(trimesh_scene)
        return trimesh_scenes, outlier_imgs
    else:
        return scenes, outlier_imgs


@dataclasses.dataclass
class Prediction:
    image_id: str | None  # A unique identifier for the row -- unused otherwise. Used only on the hidden test set.
    dataset: str
    filename: str
    cluster_index: int | None = None
    rotation: np.ndarray | None = None
    translation: np.ndarray | None = None



device = 'cuda:0'
model = AsymmetricMASt3R.from_pretrained("ckpts/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth").to(device)

boq_model = get_trained_boq(backbone_name="dinov2", output_dim=12288, ckpt='ckpts/dinov2_12288.pth').to(device)
boq_model.eval()
print("Loaded boq model and MASt3R model successfully.")

... loading model from ckpts/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth
instantiating : AsymmetricMASt3R(enc_depth=24, dec_depth=12, enc_embed_dim=1024, dec_embed_dim=768, enc_num_heads=16, dec_num_heads=12, pos_embed='RoPE100',img_size=(512, 512), head_type='catmlp+dpt', output_mode='pts3d+desc24', depth_mode=('exp', -inf, inf), conf_mode=('exp', 1, inf), patch_embed_cls='PatchEmbedDust3R', two_confs=True, desc_conf_mode=('exp', 0, inf), landscape_only=False)
_IncompatibleKeys(missing_keys=[], unexpected_keys=['mask_token'])




Loaded boq model and MASt3R model successfully.


In [13]:
# Set is_train=True to run the notebook on the training data.
# Set is_train=False if submitting an entry to the competition (test data is hidden, and different from what you see on the "test" folder).
is_train = False
data_dir = 'data/image-matching-challenge-2025'
workdir = 'result/'
os.makedirs(workdir, exist_ok=True)
workdir = Path(workdir)

if is_train:
    sample_submission_csv = os.path.join(data_dir, 'train_labels.csv')
else:
    sample_submission_csv = os.path.join(data_dir, 'sample_submission.csv')

samples = {}
competition_data = pd.read_csv(sample_submission_csv)
for _, row in competition_data.iterrows():
    # Note: For the test data, the "scene" column has no meaning, and the rotation_matrix and translation_vector columns are random.
    if row.dataset not in samples:
        samples[row.dataset] = []
    samples[row.dataset].append(
        Prediction(
            image_id=None if is_train else row.image_id,
            dataset=row.dataset,
            filename=row.image
        )
    )

for dataset in samples:
    print(f'Dataset "{dataset}" -> num_images={len(samples[dataset])}')

max_images = None  # Used For debugging only. Set to None to disable.
datasets_to_process = None  # Not the best convention, but None means all datasets.


if is_train:
    # max_images = 5

    # Note: When running on the training dataset, the notebook will hit the time limit and die. Use this filter to run on a few specific datasets.
    datasets_to_process = [
    	# New data.
    	# 'amy_gardens',
    	'ETs',
    	# 'fbk_vineyard',
    	'stairs',
    	# Data from IMC 2023 and 2024.
    	# 'imc2024_dioscuri_baalshamin',
    	# 'imc2023_theather_imc2024_church',
    	# 'imc2023_heritage',
    	# 'imc2023_haiper',
    	# 'imc2024_lizard_pond',
    	# Crowdsourced PhotoTourism data.
    	# 'pt_stpeters_stpauls',
    	# 'pt_brandenburg_british_buckingham',
    	# 'pt_piazzasanmarco_grandplace',
    	# 'pt_sacrecoeur_trevi_tajmahal',
    ]

for dataset, predictions in samples.items():
    if datasets_to_process and dataset not in datasets_to_process:
        print(f'Skipping "{dataset}"')
        continue
    
    images_dir = os.path.join(data_dir, 'train' if is_train else 'test', dataset)
    if not os.path.exists(images_dir):
        print(f'Images dir "{images_dir}" does not exist. Skipping "{dataset}"')
        continue
    
    images_dir = Path(images_dir)

    print(f'Images dir: {images_dir}')

    image_names = [p.filename for p in predictions]
    if max_images is not None:
        image_names = image_names[:max_images]

    image_list = [os.path.join(images_dir, name) for name in image_names]

    print(f'\nProcessing dataset "{dataset}": {len(image_names)} images')

    dataset_dir = os.path.join(workdir, dataset)
    boq_topks = boq_sort_topk(image_list, boq_model, device, vis=False, topk=32)
    os.makedirs(dataset_dir, exist_ok=True)
    with open(os.path.join(dataset_dir, "boq_topk.json"), "w", encoding="utf-8") as f:
        json.dump(boq_topks, f, ensure_ascii=False, indent=4)

    scenes, outlier_imgs = get_reconstructed_scene(model, device, image_list, dataset_dir, scenegraph_type = "boq")

    filename_to_index = {p.filename: idx for idx, p in enumerate(predictions)}

    registered = 0
    for map_index, cur_map in enumerate(scenes):
        cams2world = cur_map.get_im_poses().cpu()
        for image_index, image_path in enumerate(cur_map.img_paths):
            image_name = os.path.basename(image_path)
            prediction_index = filename_to_index[image_name]
            predictions[prediction_index].cluster_index = map_index
            world2cam = np.linalg.inv(cams2world[image_index])
            predictions[prediction_index].rotation = world2cam[:3, :3]
            predictions[prediction_index].translation = world2cam[:3, 3]
            registered += 1
    mapping_result_str = f'Dataset "{dataset}" -> Registered {registered} / {len(image_names)} images with {len(scenes)} clusters'
    print(mapping_result_str)


Dataset "ETs" -> num_images=22
Dataset "amy_gardens" -> num_images=200
Dataset "fbk_vineyard" -> num_images=163
Dataset "imc2023_haiper" -> num_images=54
Dataset "imc2023_heritage" -> num_images=209
Dataset "imc2023_theather_imc2024_church" -> num_images=76
Dataset "imc2024_dioscuri_baalshamin" -> num_images=138
Dataset "imc2024_lizard_pond" -> num_images=214
Dataset "pt_brandenburg_british_buckingham" -> num_images=225
Dataset "pt_piazzasanmarco_grandplace" -> num_images=168
Dataset "pt_sacrecoeur_trevi_tajmahal" -> num_images=225
Dataset "pt_stpeters_stpauls" -> num_images=200
Dataset "stairs" -> num_images=51
Images dir: data/image-matching-challenge-2025/test/ETs

Processing dataset "ETs": 22 images


100%|██████████| 22/22 [00:00<00:00, 49.76it/s]


find topk time: 0.00017690658569335938 s
>> Loading a list of 22 images
 - adding data/image-matching-challenge-2025/test/ETs/another_et_another_et001.png with resolution 360x640 --> 288x512
 - adding data/image-matching-challenge-2025/test/ETs/another_et_another_et002.png with resolution 360x640 --> 288x512
 - adding data/image-matching-challenge-2025/test/ETs/another_et_another_et003.png with resolution 360x640 --> 288x512
 - adding data/image-matching-challenge-2025/test/ETs/another_et_another_et004.png with resolution 360x640 --> 288x512
 - adding data/image-matching-challenge-2025/test/ETs/another_et_another_et005.png with resolution 360x640 --> 288x512
 - adding data/image-matching-challenge-2025/test/ETs/another_et_another_et006.png with resolution 360x640 --> 288x512
 - adding data/image-matching-challenge-2025/test/ETs/another_et_another_et007.png with resolution 360x640 --> 288x512
 - adding data/image-matching-challenge-2025/test/ETs/another_et_another_et008.png with resolut

100%|██████████| 231/231 [00:00<00:00, 20041.87it/s]
100%|██████████| 22/22 [00:01<00:00, 14.38it/s]


clusters = [3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 1 2 5]
cluster 3:
-- another_et_another_et001
-- another_et_another_et002
-- another_et_another_et003
-- another_et_another_et004
-- another_et_another_et005
-- another_et_another_et006
-- another_et_another_et007
-- another_et_another_et008
-- another_et_another_et009
-- another_et_another_et010


100%|██████████| 45/45 [00:00<00:00, 16415.35it/s]
100%|██████████| 10/10 [00:00<00:00, 38.82it/s]


init focals = [428.2006  465.46454 480.01993 477.07062 463.4202  448.05655 451.65335
 458.37122 445.2563  426.69232]


100%|██████████| 200/200 [00:06<00:00, 32.45it/s, lr=0.0000, loss=0.177]


>> final loss = 0.17658554017543793


100%|██████████| 200/200 [00:08<00:00, 24.13it/s, lr=0.0000, loss=0.824]


>> final loss = 0.8242846131324768
Final focals = [468.37222 465.94443 462.9056  462.36932 464.8395  466.79614 466.09976
 464.78543 460.99387 453.83362]
cluster 4:
-- et_et000
-- et_et001
-- et_et002
-- et_et003
-- et_et004
-- et_et005
-- et_et006
-- et_et007
-- et_et008


100%|██████████| 36/36 [00:00<00:00, 17331.83it/s]
100%|██████████| 9/9 [00:00<00:00, 36.58it/s]


init focals = [498.44272 500.10925 497.4111  507.90295 481.6423  475.06934 465.79468
 469.42758 466.40543]


100%|██████████| 200/200 [00:04<00:00, 40.06it/s, lr=0.0000, loss=0.113]


>> final loss = 0.11304111033678055


100%|██████████| 200/200 [00:07<00:00, 26.93it/s, lr=0.0000, loss=0.693]


>> final loss = 0.6930036544799805
Final focals = [522.40857 520.1605  519.3459  523.5777  521.3308  516.6578  516.94684
 515.25867 513.94855]
cluster 1:
-- outliers_out_et001
cluster 2:
-- outliers_out_et002
cluster 5:
-- outliers_out_et003
Dataset "ETs" -> Registered 19 / 22 images with 2 clusters
Images dir "data/image-matching-challenge-2025/test/amy_gardens" does not exist. Skipping "amy_gardens"
Images dir "data/image-matching-challenge-2025/test/fbk_vineyard" does not exist. Skipping "fbk_vineyard"
Images dir "data/image-matching-challenge-2025/test/imc2023_haiper" does not exist. Skipping "imc2023_haiper"
Images dir "data/image-matching-challenge-2025/test/imc2023_heritage" does not exist. Skipping "imc2023_heritage"
Images dir "data/image-matching-challenge-2025/test/imc2023_theather_imc2024_church" does not exist. Skipping "imc2023_theather_imc2024_church"
Images dir "data/image-matching-challenge-2025/test/imc2024_dioscuri_baalshamin" does not exist. Skipping "imc2024_dioscu

100%|██████████| 51/51 [00:02<00:00, 18.49it/s]


find topk time: 0.00016808509826660156 s
>> Loading a list of 51 images
 - adding data/image-matching-challenge-2025/test/stairs/stairs_split_1_1710453576271.png with resolution 1280x1024 --> 512x400
 - adding data/image-matching-challenge-2025/test/stairs/stairs_split_1_1710453601885.png with resolution 1280x1024 --> 512x400
 - adding data/image-matching-challenge-2025/test/stairs/stairs_split_1_1710453606287.png with resolution 1280x1024 --> 512x400
 - adding data/image-matching-challenge-2025/test/stairs/stairs_split_1_1710453612890.png with resolution 1280x1024 --> 512x400
 - adding data/image-matching-challenge-2025/test/stairs/stairs_split_1_1710453616892.png with resolution 1280x1024 --> 512x400
 - adding data/image-matching-challenge-2025/test/stairs/stairs_split_1_1710453620694.png with resolution 1280x1024 --> 512x400
 - adding data/image-matching-challenge-2025/test/stairs/stairs_split_1_1710453626698.png with resolution 1280x1024 --> 512x400
 - adding data/image-matching-ch

100%|██████████| 1001/1001 [00:00<00:00, 21156.88it/s]
100%|██████████| 51/51 [00:19<00:00,  2.67it/s]


clusters = [2 2 6 6 5 5 7 4 1 6 6 1 1 8 8 4 1 1 2 2 2 4 8 2 1 7 6 2 1 2 3 3 1 1 1 2 2
 1 1 2 3 1 1 1 2 2 2 1 1 2 1]
cluster 2:
-- stairs_split_1_1710453576271
-- stairs_split_1_1710453601885
-- stairs_split_1_1710453697531
-- stairs_split_1_1710453704934
-- stairs_split_1_1710453901046
-- stairs_split_1_1710453947066
-- stairs_split_1_1710453990286
-- stairs_split_2_1710453725143
-- stairs_split_2_1710453745156
-- stairs_split_2_1710453753160
-- stairs_split_2_1710453765165
-- stairs_split_2_1710453790978
-- stairs_split_2_1710453793579
-- stairs_split_2_1710453798181
-- stairs_split_2_1710453862225


100%|██████████| 105/105 [00:00<00:00, 19500.62it/s]
100%|██████████| 15/15 [00:00<00:00, 23.57it/s]


init focals = [327.44305 333.62738 303.10373 316.44193 329.03528 310.73373 319.1424
 322.82477 318.91376 330.83618 324.36996 310.00577 329.86963 325.5856
 333.39645]


100%|██████████| 200/200 [00:07<00:00, 27.56it/s, lr=0.0000, loss=0.354]


>> final loss = 0.35404425859451294


100%|██████████| 200/200 [00:11<00:00, 16.94it/s, lr=0.0000, loss=2.506]


>> final loss = 2.5055806636810303
Final focals = [321.0583  333.2399  319.69608 334.6209  335.2054  328.35208 332.79703
 338.4959  325.9628  229.46794 332.8075  319.63095 319.39435 328.09744
 610.03516]
cluster 6:
-- stairs_split_1_1710453606287
-- stairs_split_1_1710453612890
-- stairs_split_1_1710453659313
-- stairs_split_1_1710453663515
-- stairs_split_1_1710453985484


100%|██████████| 10/10 [00:00<00:00, 12365.28it/s]
100%|██████████| 5/5 [00:00<00:00, 67.04it/s]


init focals = [325.5159  319.04257 324.7632  338.32568 323.60678]


100%|██████████| 200/200 [00:03<00:00, 53.79it/s, lr=0.0000, loss=0.268]


>> final loss = 0.26760053634643555


100%|██████████| 200/200 [00:06<00:00, 33.30it/s, lr=0.0000, loss=0.936]


>> final loss = 0.9360532760620117
Final focals = [345.88885 338.91278 341.82263 356.78943 346.14313]
cluster 5:
-- stairs_split_1_1710453616892
-- stairs_split_1_1710453620694
cluster 7:
-- stairs_split_1_1710453626698
-- stairs_split_1_1710453963274
cluster 4:
-- stairs_split_1_1710453643106
-- stairs_split_1_1710453683725
-- stairs_split_1_1710453912451
cluster 1:
-- stairs_split_1_1710453651110
-- stairs_split_1_1710453667117
-- stairs_split_1_1710453668718
-- stairs_split_1_1710453689727
-- stairs_split_1_1710453693529
-- stairs_split_1_1710453955270
-- stairs_split_2_1710453720741
-- stairs_split_2_1710453736752
-- stairs_split_2_1710453739354
-- stairs_split_2_1710453740954
-- stairs_split_2_1710453756762
-- stairs_split_2_1710453759963
-- stairs_split_2_1710453779372
-- stairs_split_2_1710453783374
-- stairs_split_2_1710453786375
-- stairs_split_2_1710453801783
-- stairs_split_2_1710453805788
-- stairs_split_2_1710453871430


100%|██████████| 153/153 [00:00<00:00, 11239.27it/s]
100%|██████████| 18/18 [00:00<00:00, 18.22it/s]


init focals = [299.93994 334.3772  299.1542  321.7056  298.5823  330.25333 305.85776
 301.8666  297.1641  309.96954 293.34232 287.30527 333.4496  293.1615
 304.14297 310.9521  293.14822 309.2224 ]


100%|██████████| 200/200 [00:08<00:00, 24.99it/s, lr=0.0000, loss=0.211]


>> final loss = 0.21108736097812653


100%|██████████| 200/200 [00:13<00:00, 14.49it/s, lr=0.0000, loss=3.328]


>> final loss = 3.3279833793640137
Final focals = [317.39804 333.0115  317.8615  313.25854 314.6121  465.81888 323.7293
 312.07205 312.3381  318.2786  303.41306 309.4354  331.745   305.86972
 314.29315 322.0563  307.77563 307.8875 ]
cluster 8:
-- stairs_split_1_1710453675921
-- stairs_split_1_1710453678922
-- stairs_split_1_1710453930259
cluster 3:
-- stairs_split_2_1710453728949
-- stairs_split_2_1710453733751
-- stairs_split_2_1710453774370
Dataset "stairs" -> Registered 38 / 51 images with 3 clusters


In [16]:
# Must Create a submission file.

array_to_str = lambda array: ';'.join([f"{x:.09f}" for x in array])
none_to_str = lambda n: ';'.join(['nan'] * n)

submission_file = 'result/submission.csv'
with open(submission_file, 'w') as f:
    if is_train:
        f.write('dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset in samples:
            for prediction in samples[dataset]:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'
                rotation = none_to_str(9) if prediction.rotation is None else array_to_str(prediction.rotation.flatten())
                translation = none_to_str(3) if prediction.translation is None else array_to_str(prediction.translation)
                f.write(f'{prediction.dataset},{cluster_name},{prediction.filename},{rotation},{translation}\n')
    else:
        f.write('image_id,dataset,scene,image,rotation_matrix,translation_vector\n')
        for dataset in samples:
            for prediction in samples[dataset]:
                cluster_name = 'outliers' if prediction.cluster_index is None else f'cluster{prediction.cluster_index}'
                rotation = none_to_str(9) if prediction.rotation is None else array_to_str(prediction.rotation.flatten())
                translation = none_to_str(3) if prediction.translation is None else array_to_str(prediction.translation)
                f.write(f'{prediction.image_id},{prediction.dataset},{cluster_name},{prediction.filename},{rotation},{translation}\n')

!head {submission_file}

2227.66s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


dataset,scene,image,rotation_matrix,translation_vector
ETs,cluster0,another_et_another_et001.png,0.829320967;-0.296454519;0.473535150;0.407318354;0.901143014;-0.149280593;-0.382423401;0.316625416;0.868176877,-0.721401930;0.208028257;2.042875290
ETs,cluster0,another_et_another_et002.png,0.836093187;-0.299573362;0.459783375;0.415738553;0.892667055;-0.174442753;-0.358136207;0.336775690;0.870745301,-0.723151922;0.274954557;1.812339783
ETs,cluster0,another_et_another_et003.png,0.800178468;-0.317904115;0.508577645;0.473317087;0.855510652;-0.209934086;-0.368354708;0.408703238;0.835030735,-0.688192189;0.369250059;1.581452131
ETs,cluster0,another_et_another_et004.png,0.818103492;-0.295061767;0.493105322;0.367004961;0.928617418;-0.053438287;-0.442248493;0.224728599;0.868099332,-0.681945086;0.449573457;1.548249960
ETs,cluster0,another_et_another_et005.png,0.798109055;-0.260332584;0.543678641;0.378932685;0.917749643;-0.116935670;-0.468644619;0.299516171;0.831437707,-0.647862315;0.257036418;1.81939

In [17]:
# Definitely Compute results if running on the training set.
# Do not do this when submitting a notebook for scoring. All you have to do is save your submission to /kaggle/working/submission.csv.
is_train = True
if is_train:
    import metric
    final_score, dataset_scores = metric.score(
        gt_csv='data/image-matching-challenge-2025/train_labels.csv',
        user_csv=submission_file,
        thresholds_csv='data/image-matching-challenge-2025/train_thresholds.csv',
        mask_csv=None if is_train else os.path.join(data_dir, 'mask.csv'),
        inl_cf=0,
        strict_cf=-1,
        verbose=True,
    )

imc2023_haiper: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2023_heritage: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2023_theather_imc2024_church: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2024_dioscuri_baalshamin: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2024_lizard_pond: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_brandenburg_british_buckingham: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_piazzasanmarco_grandplace: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_sacrecoeur_trevi_tajmahal: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_stpeters_stpauls: score=0.00% (mAA=0.00%, clusterness=0.00%)
amy_gardens: score=0.00% (mAA=0.00%, clusterness=0.00%)
fbk_vineyard: score=0.00% (mAA=0.00%, clusterness=0.00%)
ETs: score=26.67% (mAA=15.38%, clusterness=100.00%)
stairs: score=2.18% (mAA=1.11%, clusterness=57.58%)
Average over all datasets: score=2.22% (mAA=1.27%, clusterness=12.12%)
