# DINO Clustering

In [None]:
# install dependencies from wheel files
%pip install --no-index /kaggle/input/imc2024-packages-lightglue-rerun-kornia/* --no-deps

[31mERROR: Invalid requirement: '/kaggle/input/imc2024-packages-lightglue-rerun-kornia/*': Expected package name at the start of dependency specifier
    /kaggle/input/imc2024-packages-lightglue-rerun-kornia/*
    ^
Hint: It looks like a path. File '/kaggle/input/imc2024-packages-lightglue-rerun-kornia/*' does not exist.[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [1]:
import gc
import os
import sys
from copy import deepcopy
from time import sleep, time

import kornia as K
import numpy as np
import pycolmap
from IPython.display import clear_output
from scripts import utils, database, features

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATA_DIR = "../data/image-matching-challenge-2025"
DINO_DIR = "weights/dinov2"
OUTPUT_FILE = "train_predictions.csv"

DB_IMG_EXT = ""
DB_CAMERA_MODEL = "simple-pinhole"

# Configure dataset filtering 
DATASETS_FILTER = [
    # New 2025 datasets
    "amy_gardens",
    "ETs",
    "fbk_vineyard",
    "stairs",
    # Data from IMC 2023 and 2024.
    'imc2024_dioscuri_baalshamin',
    'imc2023_theather_imc2024_church',
    'imc2023_heritage',
    'imc2023_haiper',
    'imc2024_lizard_pond',
    # Crowdsourced PhotoTourism data.
    'pt_stpeters_stpauls',
    'pt_brandenburg_british_buckingham',
    'pt_piazzasanmarco_grandplace',
    'pt_sacrecoeur_trevi_tajmahal',
]

In [3]:
# Don't forget to select an accelerator on the sidebar to the right.
device = K.utils.get_cuda_device_if_available(0)
print(f"{device=}")

device=device(type='cuda', index=0)


In [4]:
# Load the dataset
samples = utils.dataset.load_dataset(DATA_DIR)

for dataset in samples:
    print(f'Dataset "{dataset}" -> num_images={len(samples[dataset])}')

Dataset "imc2023_haiper" -> num_images=54
Dataset "imc2023_heritage" -> num_images=209
Dataset "imc2023_theather_imc2024_church" -> num_images=76
Dataset "imc2024_dioscuri_baalshamin" -> num_images=138
Dataset "imc2024_lizard_pond" -> num_images=214
Dataset "pt_brandenburg_british_buckingham" -> num_images=225
Dataset "pt_piazzasanmarco_grandplace" -> num_images=168
Dataset "pt_sacrecoeur_trevi_tajmahal" -> num_images=225
Dataset "pt_stpeters_stpauls" -> num_images=200
Dataset "amy_gardens" -> num_images=200
Dataset "fbk_vineyard" -> num_images=163
Dataset "ETs" -> num_images=22
Dataset "stairs" -> num_images=51


In [5]:
samples

{'imc2023_haiper': [Prediction(image_id=None, dataset='imc2023_haiper', filename='fountain_image_116.png', cluster_index=None, rotation=None, translation=None),
  Prediction(image_id=None, dataset='imc2023_haiper', filename='fountain_image_108.png', cluster_index=None, rotation=None, translation=None),
  Prediction(image_id=None, dataset='imc2023_haiper', filename='fountain_image_101.png', cluster_index=None, rotation=None, translation=None),
  Prediction(image_id=None, dataset='imc2023_haiper', filename='fountain_image_082.png', cluster_index=None, rotation=None, translation=None),
  Prediction(image_id=None, dataset='imc2023_haiper', filename='fountain_image_071.png', cluster_index=None, rotation=None, translation=None),
  Prediction(image_id=None, dataset='imc2023_haiper', filename='fountain_image_025.png', cluster_index=None, rotation=None, translation=None),
  Prediction(image_id=None, dataset='imc2023_haiper', filename='fountain_image_000.png', cluster_index=None, rotation=None, 

In [11]:
# Clear memory to prevent OOM errors
gc.collect()
mapping_result_strs = []  # Store results for each dataset

print(f"Extracting on device {device}")
# Process each dataset
for dataset, predictions in samples.items():
    # Skip datasets not in filter list
    if DATASETS_FILTER and dataset not in DATASETS_FILTER:
        print(f'Skipping "{dataset}"')
        continue

    # Setup paths and image lists
    images_dir = os.path.join(DATA_DIR, "train", dataset)
    images = [os.path.join(images_dir, p.filename) for p in predictions]

    print(f'\nProcessing dataset "{dataset}": {len(images)} images')

    # Map filenames to prediction indices
    filename_to_index = {p.filename: idx for idx, p in enumerate(predictions)}

    # Setup output directory for features
    feature_dir = os.path.join("featureout", dataset)
    os.makedirs(feature_dir, exist_ok=True)

    # Wrap algos in try-except blocks so we can populate a submission even if one scene crashes.
    try:
        # 1. Image pair selection using DINO features
        t = time()
        cls_descriptor_dino = features.extraction.extract_cls_descriptor_dino(
            images,
            dino_path=DINO_DIR,
            device=device,
            normalize=True
        )
        reduced_features = features.extraction.feature_reducer(
            algorithm="PCA",
            features=cls_descriptor_dino.cpu().numpy(),
            n_components=50,
            scaler=None,
            random_state=42
        )
        cluster_labels = features.clustering.dino_clusterer(
            algorithm="HDBSCAN",
            features=reduced_features,
            scaler=None,
            min_cluster_size=10,
            metric='euclidean',
            cluster_selection_method='eom',
        )
        print(
            f"Clustering. Number of clusters: {np.unique(cluster_labels)}, with {sum(cluster_labels == -1)} outliers. Done in {time() - t:.4f} sec"
        )
        gc.collect()
        images_np = np.array(images)[cluster_labels != -1]
        cluster_labels = cluster_labels[cluster_labels != -1]
        for cluster in np.unique(cluster_labels):
            cluster_images = images_np[cluster_labels == cluster]
            feature_dir_cluster = os.path.join(feature_dir, f"cluster_{cluster}")
            os.makedirs(feature_dir_cluster, exist_ok=True)

            print(f"Processing Cluster {cluster}: {len(cluster_images)} images")

            index_pairs = features.matching.get_image_pairs_shortlist_dino(
                cluster_images.tolist(),
                dino_path=DINO_DIR,
                sim_th=0.3,  # Strict similarity threshold
                min_pairs=20,  # Minimum pairs per image with biggest similarity
                exhaustive_if_less=20,
                device=device,
            )
            print(
                f"Shortlisting. Number of pairs to match: {len(index_pairs)}. Done in {time() - t:.4f} sec"
            )
            gc.collect()

            # 2. Local feature detection with ALIKED
            t = time()
            features.extraction.detect_keypoint_aliked(images, feature_dir_cluster, 4096, device=device)
            gc.collect()
            print(f"Features detected in {time() - t:.4f} sec")

            # 3. Feature matching with LightGlue
            t = time()
            features.matching.match_keypoint_lightglue(
                images, index_pairs, feature_dir=feature_dir_cluster, device=device, verbose=False
            )
            print(f"Features matched in {time() - t:.4f} sec")

            # 4. Create/reset COLMAP database
            database_path = os.path.join(feature_dir_cluster, "colmap.db")
            if os.path.isfile(database_path):
                os.remove(database_path)
            gc.collect()
            sleep(1)
            # Import features and matches into COLMAP format
            database.h5_to_db.import_into_colmap(
                images_dir,
                DB_CAMERA_MODEL,
                img_ext=DB_IMG_EXT,
                feature_dir=feature_dir_cluster,
                database_path=database_path,
            )
            output_path = f"{feature_dir_cluster}/colmap_rec_aliked"

            # 5. Geometric verification with RANSAC
            t = time()
            pycolmap.match_exhaustive(database_path)
            print(f"Ran RANSAC in {time() - t:.4f} sec")

            # 6. SfM reconstruction with COLMAP
            # Configure reconstruction parameters
            mapper_options = pycolmap.IncrementalPipelineOptions()
            mapper_options.min_model_size = 3  # Allow small reconstructions (min 3 images). Colmap by default does not generate a reconstruction if <10 images are registered.
            mapper_options.max_num_models = 25  # Limit number of separate models
            os.makedirs(output_path, exist_ok=True)
            t = time()
            maps = pycolmap.incremental_mapping(
                database_path=database_path,
                image_path=images_dir,
                output_path=output_path,
                options=mapper_options,
            )
            sleep(1)
            print(f"Reconstruction done in  {time() - t:.4f} sec")
            print(maps)

            clear_output(wait=False)

            # 7. Extract poses from reconstruction
            registered = 0
            for map_index, cur_map in maps.items():
                for _, image in cur_map.images.items():
                    prediction_index = filename_to_index[image.name]
                    # predictions[prediction_index].cluster_index = map_index
                    predictions[prediction_index].cluster_index = cluster
                    predictions[prediction_index].rotation = deepcopy(
                        image.cam_from_world.rotation.matrix()
                    )
                    predictions[prediction_index].translation = deepcopy(
                        image.cam_from_world.translation
                    )
                    registered += 1
            mapping_result_str = f'Dataset "{dataset}" -> Registered {registered} / {len(images)} images with {len(np.unique(cluster_labels))} clusters'
            mapping_result_strs.append(mapping_result_str)
            print(mapping_result_str)
            print(f"# clusters predicted by colmap: {len(maps)}")
            gc.collect()

    except Exception as e:
        print(e)
        # raise e
        mapping_result_str = f'Dataset "{dataset}" -> Failed!'
        mapping_result_strs.append(mapping_result_str)
        print(mapping_result_str)

# Print summary of results
print("\nResults")
for s in mapping_result_strs:
    print(s)

Extracting on device cuda:0

Processing dataset "imc2023_haiper": 54 images


100%|██████████| 54/54 [00:07<00:00,  7.03it/s]


Clustering. Number of clusters: [0 1], with 0 outliers. Done in 9.5183 sec
Processing Cluster 0: 23 images


100%|██████████| 23/23 [00:03<00:00,  6.79it/s]
Downloading: "https://github.com/Shiaoming/ALIKED/raw/main/models/aliked-n16.pth" to /home/farrosalferro/.cache/torch/hub/checkpoints/aliked-n16.pth


Shortlisting. Number of pairs to match: 242. Done in 13.1289 sec


100%|██████████| 2.61M/2.61M [00:00<00:00, 11.4MB/s]
100%|██████████| 54/54 [00:03<00:00, 17.51it/s]
Downloading: "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/aliked_lightglue.pth" to /home/farrosalferro/.cache/torch/hub/checkpoints/aliked_lightglue_v0-1_arxiv-pth


Features detected in 4.3931 sec


100%|██████████| 45.4M/45.4M [00:04<00:00, 10.4MB/s]


Loaded LightGlue model


 88%|████████▊ | 214/242 [00:07<00:01, 27.85it/s]


KeyboardInterrupt: 

In [None]:
# Create a submission file.
utils.submission.create_submission_file(samples, OUTPUT_FILE)

!head {OUTPUT_FILE}

dataset,scene,image,rotation_matrix,translation_vector
imc2023_haiper,cluster1,fountain_image_116.png,0.868947109;0.236616305;-0.434676484;-0.403681366;0.846977974;-0.345933038;0.286308011;0.476068310;0.831496655,0.355170758;-0.848095109;1.844426447
imc2023_haiper,cluster1,fountain_image_108.png,0.933675517;-0.138427137;0.330284661;0.282077012;0.852489276;-0.440107479;-0.220641313;0.504083288;0.834995479,0.330889631;-0.735802568;1.627646554
imc2023_haiper,cluster1,fountain_image_101.png,0.638723005;-0.284327353;0.714976139;0.592957514;0.774056642;-0.221895697;-0.490341014;0.565680361;0.663001825,-0.087834167;-1.005600557;1.786360916
imc2023_haiper,cluster1,fountain_image_082.png,-0.981727810;-0.123444735;0.144816794;0.030411376;0.649467528;0.759780941;-0.187844762;0.750302158;-0.633846210,0.101094113;-1.707073590;4.183611565
imc2023_haiper,cluster1,fountain_image_071.png,-0.805992849;0.194048449;-0.559214384;-0.313994193;0.660700777;0.681822653;0.501780006;0.725134252;-0.471590015,0.17

In [15]:
# Compute results if running on the training set.
# Don't do this when submitting a notebook for scoring. All you have to do is save your submission to /kaggle/working/submission.csv.

t = time()
final_score, dataset_scores = utils.metric.score(
    gt_csv=os.path.join(DATA_DIR, "train_labels.csv"),
    user_csv=OUTPUT_FILE,
    thresholds_csv=os.path.join(DATA_DIR, "train_thresholds.csv"),
    mask_csv=None,
    inl_cf=0,
    strict_cf=-1,
    verbose=True,
)
print(f"Computed metric in: {time() - t:.02f} sec.")



AssertionError: 