In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import normalize
from scipy.spatial.distance import cosine




In [2]:
# ==============================
# 1️⃣ Load Metadata
# ==============================
train_labels = pd.read_csv("/kaggle/input/image-matching-challenge-2025/train_labels.csv")

def parse_vector(vec_str):
    return list(map(float, vec_str.split(";")))

train_labels["rotation_matrix"] = train_labels["rotation_matrix"].apply(parse_vector)
train_labels["translation_vector"] = train_labels["translation_vector"].apply(parse_vector)

grouped_scenes = train_labels.groupby(["dataset", "scene"])["image"].apply(list).to_dict()



In [3]:
# ==============================
# 2️⃣ Load Pretrained EfficientNet
# ==============================
def load_model():
    """Loads the pretrained EfficientNet model for embedding extraction."""
    model = models.efficientnet_b0(pretrained=True)
    model.eval()
    return model



In [4]:
# ==============================
# 3️⃣ Image Preprocessing
# ==============================
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # ImageNet normalization
])

def preprocess_image(image_path):
    """Loads and preprocesses an image."""
    image = Image.open(image_path).convert("RGB")
    return transform(image).unsqueeze(0)  # Add batch dimension



In [5]:
# ==============================
# 4️⃣ EfficientNet Embedding Extraction
# ==============================
def get_image_embedding(image_path, model):
    """Extracts an image embedding using EfficientNet."""
    image_tensor = preprocess_image(image_path)
    
    with torch.no_grad():
        features = model.features(image_tensor)  # Extract features
        embedding = torch.flatten(features)  # Flatten feature map
    
    return embedding.numpy()



In [6]:
# ==============================
# 5️⃣ Compute Embeddings for a Scene
# ==============================
def compute_scene_embeddings(image_paths, model):
    """Computes embeddings for all images in a scene."""
    embeddings = {}
    for img in image_paths:
        embeddings[img] = get_image_embedding(img, model)
    
    return embeddings



In [7]:
# ==============================
# 6️⃣ Clustering with DBSCAN (Using Embeddings)
# ==============================
def cluster_images(embeddings, eps=0.5, min_samples=3):
    """Clusters images into scenes using DBSCAN on normalized embeddings."""
    
    image_paths = list(embeddings.keys())
    embedding_matrix = np.array([embeddings[img] for img in image_paths])

    # Normalize embeddings to use cosine similarity properly
    normalized_embeddings = normalize(embedding_matrix, axis=1)

    # Apply DBSCAN with cosine similarity metric
    dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric="cosine")
    labels = dbscan.fit_predict(normalized_embeddings)

    # Group images by scene clusters
    scene_groups = {}
    outliers = []

    for img, label in zip(image_paths, labels):
        if label == -1:
            outliers.append(img)
        else:
            scene_groups.setdefault(label, []).append(img)

    return scene_groups, outliers



In [8]:
# ==============================
# 7️⃣ Outlier Filtering via Cosine Similarity
# ==============================
def filter_outliers(embeddings, outlier_threshold=0.5):
    """Filters out images with low cosine similarity to the mean scene embedding."""
    
    image_paths = list(embeddings.keys())
    embedding_matrix = np.array([embeddings[img] for img in image_paths])

    # Compute the mean embedding for the scene
    mean_embedding = np.mean(embedding_matrix, axis=0)
    
    filtered_outliers = []
    for img, embedding in embeddings.items():
        sim_score = 1 - cosine(embedding, mean_embedding)  # Cosine similarity (higher is better)
        if sim_score < outlier_threshold:
            filtered_outliers.append(img)
    
    return filtered_outliers



In [9]:
# ==============================
# 8️⃣ Load Images from Dataset
# ==============================
def load_images_from_scene(dataset, scene):
    """Loads images from a given dataset and scene."""
    base_path = f"/kaggle/input/image-matching-challenge-2025/train/{dataset}/"
    images = grouped_scenes.get((dataset, scene), [])
    return [os.path.join(base_path, img) for img in images if os.path.exists(os.path.join(base_path, img))]



In [10]:
# ==============================
# 9️⃣ Generate Submission File
# ==============================
def save_submission_file(results, output_path="submission.csv"):
    """Saves results to a CSV submission file."""
    submission_rows = []

    for dataset, clusters in results.items():
        for cluster_id, images in clusters.items():
            for img in images:
                # Handle outliers first
                if cluster_id == "outliers":
                    rotation = "nan;nan;nan;nan;nan;nan;nan;nan;nan"
                    translation = "nan;nan;nan"
                else:
                    # Check if the image exists in train_labels
                    image_data = train_labels[train_labels["image"] == img]
                    if image_data.empty:
                        # If the image is not found, assign nan to pose data
                        rotation = "nan;nan;nan;nan;nan;nan;nan;nan;nan"
                        translation = "nan;nan;nan"
                    else:
                        # Get rotation and translation for the image
                        rotation = ";".join(map(str, image_data["rotation_matrix"].values[0]))
                        translation = ";".join(map(str, image_data["translation_vector"].values[0]))

                # Append the row for the current image
                submission_rows.append([dataset, f"cluster{cluster_id}", img, rotation, translation])

    # Create DataFrame from the submission rows and save to CSV
    submission_df = pd.DataFrame(submission_rows, columns=["dataset", "scene", "image", "rotation_matrix", "translation_vector"])
    submission_df.to_csv(output_path, index=False)
    print(f"✅ Submission saved to {output_path}")


In [11]:
# ==============================
# 🔟 Run the Pipeline
# ==============================
if __name__ == "__main__":
    model = load_model()
    results = {}

    for (dataset, scene), image_list in grouped_scenes.items():
        print(f"\nProcessing Dataset: {dataset}, Scene: {scene}")
        
        image_paths = load_images_from_scene(dataset, scene)
        
        if not image_paths:
            print("No valid images found.")
            continue

        # Compute embeddings
        embeddings = compute_scene_embeddings(image_paths, model)

        # Cluster images into scenes
        scene_clusters, outliers = cluster_images(embeddings)

        # Further refine outliers
        refined_outliers = filter_outliers(embeddings)

        # Store results
        results[dataset] = {k: v for k, v in scene_clusters.items()}
        results[dataset]["outliers"] = refined_outliers

    # Save submission file
    save_submission_file(results)


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 158MB/s]



Processing Dataset: ETs, Scene: ET

Processing Dataset: ETs, Scene: another_ET

Processing Dataset: ETs, Scene: outliers

Processing Dataset: amy_gardens, Scene: peach

Processing Dataset: fbk_vineyard, Scene: vineyard_split_1

Processing Dataset: fbk_vineyard, Scene: vineyard_split_2

Processing Dataset: fbk_vineyard, Scene: vineyard_split_3

Processing Dataset: imc2023_haiper, Scene: bike

Processing Dataset: imc2023_haiper, Scene: chairs

Processing Dataset: imc2023_haiper, Scene: fountain

Processing Dataset: imc2023_heritage, Scene: cyprus

Processing Dataset: imc2023_heritage, Scene: dioscuri

Processing Dataset: imc2023_heritage, Scene: outliers

Processing Dataset: imc2023_heritage, Scene: wall

Processing Dataset: imc2023_theather_imc2024_church, Scene: church

Processing Dataset: imc2023_theather_imc2024_church, Scene: kyiv-puppet-theater

Processing Dataset: imc2024_dioscuri_baalshamin, Scene: baalshamin

Processing Dataset: imc2024_dioscuri_baalshamin, Scene: dioscuri

Pro