# DINO Clustering

In [None]:
import gc
import os
import sys
from copy import deepcopy
from time import sleep, time

import kornia as K
import numpy as np
import pycolmap
from IPython.display import clear_output
from scripts import utils, database, features

In [None]:
DATA_DIR = "../data/image-matching-challenge-2025"
DINO_DIR = "weights/dinov2"
OUTPUT_FILE = "train_predictions.csv"

DB_IMG_EXT = ""
DB_CAMERA_MODEL = "simple-pinhole"

# Configure dataset filtering 
DATASETS_FILTER = [
    # New 2025 datasets
    "amy_gardens",
    "ETs",
    "fbk_vineyard",
    "stairs",
    # Data from IMC 2023 and 2024.
    'imc2024_dioscuri_baalshamin',
    'imc2023_theather_imc2024_church',
    'imc2023_heritage',
    'imc2023_haiper',
    'imc2024_lizard_pond',
    # Crowdsourced PhotoTourism data.
    'pt_stpeters_stpauls',
    'pt_brandenburg_british_buckingham',
    'pt_piazzasanmarco_grandplace',
    'pt_sacrecoeur_trevi_tajmahal',
]

In [None]:
# Don't forget to select an accelerator on the sidebar to the right.
device = K.utils.get_cuda_device_if_available(0)
print(f"{device=}")

In [None]:
# Load the dataset
samples = utils.dataset.load_dataset(DATA_DIR)

for dataset in samples:
    print(f'Dataset "{dataset}" -> num_images={len(samples[dataset])}')

In [None]:
samples

In [None]:
# Clear memory to prevent OOM errors
gc.collect()
mapping_result_strs = []  # Store results for each dataset

print(f"Extracting on device {device}")
# Process each dataset
for dataset, predictions in samples.items():
    # Skip datasets not in filter list
    if DATASETS_FILTER and dataset not in DATASETS_FILTER:
        print(f'Skipping "{dataset}"')
        continue

    # Setup paths and image lists
    images_dir = os.path.join(DATA_DIR, "train", dataset)
    images = [os.path.join(images_dir, p.filename) for p in predictions]

    print(f'\nProcessing dataset "{dataset}": {len(images)} images')

    # Map filenames to prediction indices
    filename_to_index = {p.filename: idx for idx, p in enumerate(predictions)}

    # Setup output directory for features
    feature_dir = os.path.join("featureout", dataset)
    os.makedirs(feature_dir, exist_ok=True)

    # Wrap algos in try-except blocks so we can populate a submission even if one scene crashes.
    try:
        # 1. Image pair selection using DINO features
        t = time()
        cls_descriptor_dino = features.extraction.extract_cls_descriptor_dino(
            images,
            dino_path=DINO_DIR,
            device=device,
            normalize=True
        )
        reduced_features = features.extraction.feature_reducer(
            algorithm="PCA",
            features=cls_descriptor_dino.cpu().numpy(),
            n_components=50,
            scaler=None,
            random_state=42
        )
        cluster_labels = features.clustering.dino_clusterer(
            algorithm="HDBSCAN",
            features=reduced_features,
            scaler=None,
            min_cluster_size=10,
            metric='euclidean',
            cluster_selection_method='eom',
        )
        print(
            f"Clustering. Number of clusters: {np.unique(cluster_labels)}, with {sum(cluster_labels == -1)} outliers. Done in {time() - t:.4f} sec"
        )
        gc.collect()
        images_np = np.array(images)[cluster_labels != -1]
        cluster_labels = cluster_labels[cluster_labels != -1]
        for cluster in np.unique(cluster_labels):
            cluster_images = images_np[cluster_labels == cluster]
            feature_dir_cluster = os.path.join(feature_dir, f"cluster_{cluster}")
            os.makedirs(feature_dir_cluster, exist_ok=True)

            print(f"Processing Cluster {cluster}: {len(cluster_images)} images")

            index_pairs = features.matching.get_image_pairs_shortlist_dino(
                cluster_images.tolist(),
                dino_path=DINO_DIR,
                sim_th=0.3,  # Strict similarity threshold
                min_pairs=20,  # Minimum pairs per image with biggest similarity
                exhaustive_if_less=20,
                device=device,
            )
            print(
                f"Shortlisting. Number of pairs to match: {len(index_pairs)}. Done in {time() - t:.4f} sec"
            )
            gc.collect()

            # 2. Local feature detection with ALIKED
            t = time()
            features.extraction.detect_keypoint_aliked(images, feature_dir_cluster, 4096, device=device)
            gc.collect()
            print(f"Features detected in {time() - t:.4f} sec")

            # 3. Feature matching with LightGlue
            t = time()
            features.matching.match_keypoint_lightglue(
                images, index_pairs, feature_dir=feature_dir_cluster, device=device, verbose=False
            )
            print(f"Features matched in {time() - t:.4f} sec")

            # 4. Create/reset COLMAP database
            database_path = os.path.join(feature_dir_cluster, "colmap.db")
            if os.path.isfile(database_path):
                os.remove(database_path)
            gc.collect()
            sleep(1)
            # Import features and matches into COLMAP format
            database.h5_to_db.import_into_colmap(
                images_dir,
                DB_CAMERA_MODEL,
                img_ext=DB_IMG_EXT,
                feature_dir=feature_dir_cluster,
                database_path=database_path,
            )
            output_path = f"{feature_dir_cluster}/colmap_rec_aliked"

            # 5. Geometric verification with RANSAC
            t = time()
            pycolmap.match_exhaustive(database_path)
            print(f"Ran RANSAC in {time() - t:.4f} sec")

            # 6. SfM reconstruction with COLMAP
            # Configure reconstruction parameters
            mapper_options = pycolmap.IncrementalPipelineOptions()
            mapper_options.min_model_size = 3  # Allow small reconstructions (min 3 images). Colmap by default does not generate a reconstruction if <10 images are registered.
            mapper_options.max_num_models = 25  # Limit number of separate models
            os.makedirs(output_path, exist_ok=True)
            t = time()
            maps = pycolmap.incremental_mapping(
                database_path=database_path,
                image_path=images_dir,
                output_path=output_path,
                options=mapper_options,
            )
            sleep(1)
            print(f"Reconstruction done in  {time() - t:.4f} sec")
            print(maps)

            clear_output(wait=False)

            # 7. Extract poses from reconstruction
            registered = 0
            for map_index, cur_map in maps.items():
                for _, image in cur_map.images.items():
                    prediction_index = filename_to_index[image.name]
                    # predictions[prediction_index].cluster_index = map_index
                    predictions[prediction_index].cluster_index = cluster
                    predictions[prediction_index].rotation = deepcopy(
                        image.cam_from_world.rotation.matrix()
                    )
                    predictions[prediction_index].translation = deepcopy(
                        image.cam_from_world.translation
                    )
                    registered += 1
            mapping_result_str = f'Dataset "{dataset}" -> Registered {registered} / {len(images)} images with {len(np.unique(cluster_labels))} clusters'
            mapping_result_strs.append(mapping_result_str)
            print(mapping_result_str)
            print(f"# clusters predicted by colmap: {len(maps)}")
            gc.collect()

    except Exception as e:
        print(e)
        # raise e
        mapping_result_str = f'Dataset "{dataset}" -> Failed!'
        mapping_result_strs.append(mapping_result_str)
        print(mapping_result_str)

# Print summary of results
print("\nResults")
for s in mapping_result_strs:
    print(s)

In [None]:
# Create a submission file.
utils.submission.create_submission_file(samples, OUTPUT_FILE)

!head {OUTPUT_FILE}

In [None]:
# Compute results if running on the training set.
# Don't do this when submitting a notebook for scoring. All you have to do is save your submission to /kaggle/working/submission.csv.

t = time()
final_score, dataset_scores = utils.metric.score(
    gt_csv=os.path.join(DATA_DIR, "train_labels.csv"),
    user_csv=OUTPUT_FILE,
    thresholds_csv=os.path.join(DATA_DIR, "train_thresholds.csv"),
    mask_csv=None,
    inl_cf=0,
    strict_cf=-1,
    verbose=True,
)
print(f"Computed metric in: {time() - t:.02f} sec.")