In [1]:
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm # Progress bars
import gc # Garbage collection for memory management
from copy import deepcopy
import sys

# Import individual modules to avoid circular imports
from scripts.utils import dataset, camera, image, metric, submission
from scripts.features import extraction, clustering

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATA_DIR = "../data/image-matching-challenge-2025"
OUTPUT_FILE = "output_graph.csv" # Output file for results
TRAIN_DIR = os.path.join(DATA_DIR, "train")


# --- Feature Extraction Parameters ---
# Options: 'SIFT', 'AKAZE', 'ORB' (DISK/ALIKED need external setup)
FEATURE_EXTRACTOR_TYPE = 'SIFT'
SIFT_NFEATURES = 8000 # Max features per image for SIFT

# --- Matching Parameters ---
MATCHER_TYPE = 'FLANN' # 'BF' (Brute Force) or 'FLANN' (Fast Library for Approximate Nearest Neighbors)
LOWE_RATIO_TEST_THRESHOLD = 0.8 # For filtering good matches (knnMatch ratio)
MIN_INLIER_MATCHES_INITIAL = 15 # Min inliers for initial pairwise geometry check
MIN_INLIER_MATCHES_GRAPH = 10 # Min inliers to add edge to view graph (can be lower)

# --- Geometric Verification (RANSAC for Fundamental Matrix) ---
RANSAC_THRESHOLD = 1.5 # RANSAC reprojection threshold in pixels for findFundamentalMat

# --- Clustering Parameters ---
# Options: 'ConnectedComponents', 'Spectral'
CLUSTERING_ALGORITHM = 'ConnectedComponents'
MIN_CLUSTER_SIZE = 3 # Minimum images to form a valid scene cluster

# --- SfM Parameters ---
MIN_VIEWS_FOR_TRIANGULATION = 2 # Need at least two views for triangulation
PNP_RANSAC_THRESHOLD = 5.0 # RANSAC reprojection threshold for solvePnPRansac
PNP_CONFIDENCE = 0.999 # Confidence for PnPRansac
MIN_3D_POINTS_FOR_PNP = 6 # Minimum 3D points required for PnP

# --- Camera Intrinsics (Approximation - Not submitted, but needed for E/PnP) ---
# We estimate a default K matrix. Real K varies per image, but this is a common
# simplification if intrinsics aren't provided or estimated.
# Focal length is often approximated based on image width.
DEFAULT_FOCAL_LENGTH_FACTOR = 1.2
# Assuming cx, cy are image center. Will be calculated per image later.

print(f"Constants defined. Using {FEATURE_EXTRACTOR_TYPE} features and {MATCHER_TYPE} matcher.")
print(f"Data Directory: {DATA_DIR}")

Constants defined. Using SIFT features and FLANN matcher.
Data Directory: ../data/image-matching-challenge-2025


In [3]:
# Load the dataset
samples = dataset.load_dataset(DATA_DIR)

for dataset_name in samples:
    print(f'Dataset "{dataset_name}" -> num_images={len(samples[dataset_name])}')

Dataset "imc2023_haiper" -> num_images=54
Dataset "imc2023_heritage" -> num_images=209
Dataset "imc2023_theather_imc2024_church" -> num_images=76
Dataset "imc2024_dioscuri_baalshamin" -> num_images=138
Dataset "imc2024_lizard_pond" -> num_images=214
Dataset "pt_brandenburg_british_buckingham" -> num_images=225
Dataset "pt_piazzasanmarco_grandplace" -> num_images=168
Dataset "pt_sacrecoeur_trevi_tajmahal" -> num_images=225
Dataset "pt_stpeters_stpauls" -> num_images=200
Dataset "amy_gardens" -> num_images=200
Dataset "fbk_vineyard" -> num_images=163
Dataset "ETs" -> num_images=22
Dataset "stairs" -> num_images=51


In [4]:
# Import matcher here to avoid circular import issues
from scripts.features import matching

def process_dataset(dataset_id, test_image_dir, predictions, extractor, matcher):
    """Runs the full pipeline for a single dataset."""
    print(f"\n--- Processing Dataset: {dataset_id} ---")

    dataset_path = os.path.join(test_image_dir, dataset_id)
    filename_to_index = {p.filename: idx for idx, p in enumerate(predictions)}
    # 1. Extract Features
    extracted_features, image_dims = extraction.load_and_extract_features_dataset(dataset_id, test_image_dir, extractor)
    image_ids_in_dataset = list(extracted_features.keys())

    if not extracted_features:
        print(f"No extracted_features extracted for dataset {dataset_id}. Marking all as outliers.")
        # Use image list from directory listing if extracted_features is empty but dir exists
        all_images = list(f.name for f in dataset_path.glob('*.png')) + \
                    list(f.name for f in dataset_path.glob('*.jpg')) + \
                    list(f.name for f in dataset_path.glob('*.jpeg'))
        for img_id in all_images:
            r_str, t_str = camera.format_pose(None, None)
            prediction_index = filename_to_index[img_id]
            predictions[prediction_index].cluster_index = "outliers"
            predictions[prediction_index].rotation = deepcopy(r_str)
            predictions[prediction_index].translation = deepcopy(t_str)
        return predictions


    # Add images found in directory but failed extraction to image_ids_in_dataset
    all_images_found = list(image_dims.keys())
    image_ids_set = set(image_ids_in_dataset)
    for img_id in all_images_found:
        if img_id not in image_ids_set:
            image_ids_in_dataset.append(img_id)


    # 2. Build View Graph
    G, pairwise_matches = clustering.build_view_graph(image_ids_in_dataset, extracted_features, matcher)


    # 3. Cluster Images
    clusters, outliers = clustering.cluster_images(G, algorithm=CLUSTERING_ALGORITHM, min_cluster_size=MIN_CLUSTER_SIZE)

    # 4. Process Outliers
    print(f"Marking {len(outliers)} images as outliers.")
    for img_id in outliers:
        r, t = camera.format_pose(None, None)
        prediction_index = filename_to_index[img_id]
        predictions[prediction_index].cluster_index = "outliers"
        predictions[prediction_index].rotation = r
        predictions[prediction_index].translation = t

    # 5. Run SfM per Cluster
    print(f"Running SfM for {len(clusters)} clusters...")
    for i, cluster_nodes in enumerate(clusters):
        cluster_label = f"cluster{i+1}"
        print(f"\nProcessing {cluster_label} ({len(cluster_nodes)} images)...")

        # Filter extracted_features/dims/matches for the current cluster
        cluster_features = {img_id: extracted_features[img_id] for img_id in cluster_nodes if img_id in extracted_features}
        cluster_dims = {img_id: image_dims[img_id] for img_id in cluster_nodes if img_id in image_dims}
        # Filter pairwise matches (tricky, need both nodes in cluster)
        cluster_pairwise_matches = {}
        for (id1, id2), matches in pairwise_matches.items():
             if id1 in cluster_nodes and id2 in cluster_nodes:
                 cluster_pairwise_matches[(id1, id2)] = matches


        cluster_poses = camera.estimate_poses_for_cluster(
            cluster_nodes,
            cluster_features,
            cluster_dims,
            matcher,
            cluster_pairwise_matches # Pass filtered matches
        )

        # Add results for this cluster
        for img_id in cluster_nodes:
            R, T = cluster_poses.get(img_id, (None, None)) # Get pose, default to None if not found
            r, t = camera.format_pose(R, T)
            prediction_index = filename_to_index[img_id]
            predictions[prediction_index].cluster_index = cluster_label
            predictions[prediction_index].rotation = deepcopy(r)
            predictions[prediction_index].translation = deepcopy(t)

        # Clean up memory
        del cluster_features, cluster_dims, cluster_poses, cluster_pairwise_matches
        gc.collect()

    print(f"--- Finished Processing Dataset: {dataset_id} ---")
    return predictions

In [None]:
# Process train datasets
if os.path.isdir(TRAIN_DIR):
    train_datasets = [os.path.basename(os.path.join(TRAIN_DIR, d)) for d in os.listdir(TRAIN_DIR) if os.path.isdir(os.path.join(TRAIN_DIR, d))]
    # sort train datasets based on their number of files
    train_datasets.sort(key=lambda x: len(os.listdir(os.path.join(TRAIN_DIR, x))), reverse=False)
    print("=== Processing Train Datasets ===")
    extractor = extraction.get_feature_extractor('SIFT', SIFT_NFEATURES)
    matcher = matching.get_matcher('FLANN', 'SIFT')
    for dataset_name in train_datasets[:3]:
        samples[dataset_name] = process_dataset(dataset_name, TRAIN_DIR, samples[dataset_name], extractor, matcher)
else:
    print("Train directory not found")

=== Processing Train Datasets ===

--- Processing Dataset: ETs ---
Extracting features for 22 images in dataset ETs...


Features ETs: 100%|██████████| 22/22 [00:00<00:00, 41.05it/s]
Features ETs: 100%|██████████| 22/22 [00:00<00:00, 41.05it/s]


Building view graph for 22 images...


Matching pairs: 100%|██████████| 231/231 [00:04<00:00, 52.34it/s]



View graph built with 22 nodes and 62 edges.
Clustering graph using ConnectedComponents...
Found 1 clusters and 2 potential outliers.
Marking 2 images as outliers.
Running SfM for 1 clusters...

Processing cluster1 (20 images)...
Initializing SfM with pair (another_et_another_et002.png, another_et_another_et001.png) with 503 matches.
Essential matrix inliers: 467
Triangulated 455 initial 3D points.
Attempting to register 18 remaining images...
Initializing SfM with pair (another_et_another_et002.png, another_et_another_et001.png) with 503 matches.
Essential matrix inliers: 467
Triangulated 455 initial 3D points.
Attempting to register 18 remaining images...


Registering images: 100%|██████████| 18/18 [00:00<00:00, 1592.77it/s]



Successfully registered image another_et_another_et005.png (109 PnP inliers).
Successfully registered image another_et_another_et004.png (107 PnP inliers).
Successfully registered image another_et_another_et003.png (56 PnP inliers).
Successfully registered image another_et_another_et006.png (36 PnP inliers).
Successfully registered image another_et_another_et007.png (14 PnP inliers).
Finished SfM for cluster. Registered 7 out of 20 images.
--- Finished Processing Dataset: ETs ---

--- Processing Dataset: stairs ---
Extracting features for 51 images in dataset stairs...


Features stairs: 100%|██████████| 51/51 [00:06<00:00,  8.35it/s]
Features stairs: 100%|██████████| 51/51 [00:06<00:00,  8.35it/s]


Building view graph for 51 images...


Matching pairs: 100%|██████████| 1275/1275 [00:13<00:00, 92.12it/s] 



View graph built with 51 nodes and 212 edges.
Clustering graph using ConnectedComponents...
Found 1 clusters and 2 potential outliers.
Marking 2 images as outliers.
Running SfM for 1 clusters...

Processing cluster1 (49 images)...
Initializing SfM with pair (stairs_split_1_1710453576271.png, stairs_split_1_1710453955270.png) with 37 matches.
Essential matrix inliers: 34
Triangulated 20 initial 3D points.
Attempting to register 47 remaining images...
Initializing SfM with pair (stairs_split_1_1710453576271.png, stairs_split_1_1710453955270.png) with 37 matches.
Essential matrix inliers: 34
Triangulated 20 initial 3D points.
Attempting to register 47 remaining images...


Registering images: 100%|██████████| 47/47 [00:00<00:00, 66063.10it/s]



Successfully registered image stairs_split_2_1710453783374.png (7 PnP inliers).
Finished SfM for cluster. Registered 3 out of 49 images.
--- Finished Processing Dataset: stairs ---

--- Processing Dataset: imc2023_haiper ---
Extracting features for 54 images in dataset imc2023_haiper...


Features imc2023_haiper: 100%|██████████| 54/54 [00:14<00:00,  3.64it/s]
Features imc2023_haiper: 100%|██████████| 54/54 [00:14<00:00,  3.64it/s]


Building view graph for 54 images...


Matching pairs: 100%|██████████| 1431/1431 [02:51<00:00,  8.34it/s]



View graph built with 54 nodes and 412 edges.
Clustering graph using ConnectedComponents...
Found 1 clusters and 0 potential outliers.
Marking 0 images as outliers.
Running SfM for 1 clusters...

Processing cluster1 (54 images)...
Initializing SfM with pair (fountain_image_136.png, fountain_image_082.png) with 896 matches.
Essential matrix inliers: 634
Triangulated 634 initial 3D points.
Attempting to register 52 remaining images...
Initializing SfM with pair (fountain_image_136.png, fountain_image_082.png) with 896 matches.
Essential matrix inliers: 634
Triangulated 634 initial 3D points.
Attempting to register 52 remaining images...


Registering images: 100%|██████████| 52/52 [00:00<00:00, 932.83it/s]

Successfully registered image fountain_image_129.png (101 PnP inliers).
Successfully registered image fountain_image_000.png (44 PnP inliers).
Successfully registered image fountain_image_143.png (19 PnP inliers).
Successfully registered image fountain_image_071.png (35 PnP inliers).
Successfully registered image fountain_image_007.png (10 PnP inliers).
Successfully registered image fountain_image_012.png (7 PnP inliers).
Successfully registered image fountain_image_214.png (37 PnP inliers).
Successfully registered image fountain_image_056.png (12 PnP inliers).
Successfully registered image fountain_image_230.png (27 PnP inliers).
Successfully registered image fountain_image_041.png (12 PnP inliers).
Successfully registered image fountain_image_025.png (15 PnP inliers).
Successfully registered image fountain_image_101.png (8 PnP inliers).
Successfully registered image fountain_image_199.png (19 PnP inliers).
Successfully registered image fountain_image_116.png (18 PnP inliers).
Success




In [11]:
# Create a submission file.
submission.create_submission_file(samples, OUTPUT_FILE)

!head {OUTPUT_FILE}

dataset,scene,image,rotation_matrix,translation_vector
imc2023_haiper,clustercluster1,fountain_image_116.png,-0.904877030;-0.332588306;0.265673821;0.414172203;-0.543800618;0.729891960;-0.098279943;0.770497181;0.629821520,-0.386007028;-0.757245401;0.001618247
imc2023_haiper,clustercluster1,fountain_image_108.png,nan;nan;nan;nan;nan;nan;nan;nan;nan,nan;nan;nan
imc2023_haiper,clustercluster1,fountain_image_101.png,-0.424429027;0.859171113;-0.285805877;-0.806752358;-0.215513193;0.550186057;0.411109030;0.464089498;0.784608376,0.351741140;-0.678798907;-0.088142393
imc2023_haiper,clustercluster1,fountain_image_082.png,0.958820137;-0.200502699;0.201153209;0.020377353;0.754991023;0.655418431;-0.283282031;-0.624329419;0.727986310,-0.309231126;-0.839983378;0.445874461
imc2023_haiper,clustercluster1,fountain_image_071.png,0.332224305;-0.888808539;0.315668168;0.322766725;0.421604113;0.847391063;-0.886255410;-0.179636726;0.426944955,-0.368747138;-1.184375493;0.748708858
imc2023_haiper,clustercluster

In [12]:
# Compute results if running on the training set.
# Don't do this when submitting a notebook for scoring. All you have to do is save your submission to /kaggle/working/submission.csv.

final_score, dataset_scores = metric.score(
    gt_csv=os.path.join(DATA_DIR, "train_labels.csv"),
    user_csv=OUTPUT_FILE,
    thresholds_csv=os.path.join(DATA_DIR, "train_thresholds.csv"),
    mask_csv=None,
    inl_cf=0,
    strict_cf=-1,
    verbose=True,
)

imc2023_haiper: score=0.00% (mAA=0.00%, clusterness=33.33%)
imc2023_heritage: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2023_theather_imc2024_church: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2024_dioscuri_baalshamin: score=0.00% (mAA=0.00%, clusterness=0.00%)
imc2024_lizard_pond: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_brandenburg_british_buckingham: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_piazzasanmarco_grandplace: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_sacrecoeur_trevi_tajmahal: score=0.00% (mAA=0.00%, clusterness=0.00%)
pt_stpeters_stpauls: score=0.00% (mAA=0.00%, clusterness=0.00%)
amy_gardens: score=0.00% (mAA=0.00%, clusterness=0.00%)
fbk_vineyard: score=0.00% (mAA=0.00%, clusterness=0.00%)
ETs: score=0.00% (mAA=0.00%, clusterness=47.50%)
stairs: score=0.00% (mAA=0.00%, clusterness=50.00%)
Average over all datasets: score=0.00% (mAA=0.00%, clusterness=10.06%)
