# 3D Scene Reconstruction with Outlier Detection (Deep Learning Approach)
In this notebook, we'll develop a solution for reconstructing 3D scenes from image collections while identifying and filtering out unrelated images. Our approach involves detecting which images belong to the same scenes and which are outliers, followed by camera pose estimation for the images that belong togther.

## Setup and Configuration

In [1]:
# Import necessary libraries
import os
import sys
import gc
import warnings
import traceback
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed

# Third-party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import networkx as nx
from sklearn.cluster import DBSCAN, SpectralClustering
from tqdm import tqdm
from scipy.sparse import lil_matrix
from scipy.optimize import least_squares

# Torch imports
import torch
import torch.nn as nn
import torch.nn.functional as F

# Check for torch_geometric
try:
    import torch_geometric
    from torch_geometric.nn import GCNConv
    from torch_geometric.data import Data
    HAS_TORCH_GEOMETRIC = True
    print(f"torch_geometric is available (version {torch_geometric.__version__})")
except ImportError:
    HAS_TORCH_GEOMETRIC = False
    print("torch_geometric is not available, will use fallback clustering methods")

# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Define paths
TRAIN_DIR = "/kaggle/input/image-matching-challenge-2025/train/"  # Path to training data
TEST_DIR = "/kaggle/input/image-matching-challenge-2025/test/"    # Path to test data
OUTPUT_FILE = "submission.csv"  # Path for output file

# LightGlue model paths
LIGHTGLUE_DISK_PATH = "/kaggle/input/lightglue/pytorch/disk/1/disk_lightglue.pth"
LIGHTGLUE_SIFT_PATH = "/kaggle/input/lightglue/pytorch/sift/1/sift_lightglue.pth"

# SuperGlue/SuperPoint path
SUPERGLUE_DIR = "/kaggle/working/superglue_models"

# Suppress warnings
warnings.filterwarnings('ignore')

# Check GPU availability and set up CUDA for GPU acceleration
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.empty_cache()
    torch.backends.cudnn.benchmark = True  # Optimize CUDA performance
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
    # Print GPU memory info
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    device = torch.device("cpu")
    print("Using CPU")

# Memory management function
def free_memory():
    """Free GPU memory and call garbage collector."""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

# Define parameters for deep learning-based feature extraction
FEATURE_PARAMS = {
    'superpoint': {
        'weights': 'superpoint',  # Use pretrained weights
        'nms_radius': 4,          # Non-maximum suppression radius
        'max_keypoints': 4000,    # Maximum number of keypoints to detect
        'keypoint_threshold': 0.005,  # Keypoint confidence threshold
    },
    'superglue': {
        'weights': 'outdoor',     # Pretrained weights for outdoor scenes
        'sinkhorn_iterations': 20, # Number of Sinkhorn iterations
        'match_threshold': 0.2,   # Matching threshold
        'ratio_test': 0.7,        # Ratio test threshold for traditional matching
    }
}

# Parameters for pose estimation and verification
MATCHING_PARAMS = {
    'ransac_threshold': 0.5,     # RANSAC threshold (pixels)
    'min_inliers': 20,           # Minimum number of inliers
    'min_inlier_ratio': 0.4,     # Minimum inlier ratio
    'geometric_verification': True  # Use multi-stage geometric verification
}

torch_geometric is not available, will use fallback clustering methods
CUDA available: True
Using GPU: Tesla T4
GPU Memory: 15.83 GB


## Setup SuperGlue/Superpoint Models

In [2]:
# Create directory for SuperGlue/SuperPoint models (if not using LightGlue)
os.makedirs(SUPERGLUE_DIR, exist_ok=True)

# Check if SuperGlue/SuperPoint model files already exist
superpoint_exists = os.path.exists(f"{SUPERGLUE_DIR}/superpoint_v1.pth")
superglue_exists = os.path.exists(f"{SUPERGLUE_DIR}/superglue_outdoor.pth")
superpoint_py_exists = os.path.exists(f"{SUPERGLUE_DIR}/superpoint.py")
superglue_py_exists = os.path.exists(f"{SUPERGLUE_DIR}/superglue.py")
init_py_exists = os.path.exists(f"{SUPERGLUE_DIR}/__init__.py")

# Download model files if needed
if not superpoint_exists:
    print("Downloading SuperPoint model...")
    !wget -q -O {SUPERGLUE_DIR}/superpoint_v1.pth https://github.com/magicleap/SuperGluePretrainedNetwork/raw/master/models/weights/superpoint_v1.pth

if not superglue_exists:
    print("Downloading SuperGlue model...")
    !wget -q -O {SUPERGLUE_DIR}/superglue_outdoor.pth https://github.com/magicleap/SuperGluePretrainedNetwork/raw/master/models/weights/superglue_outdoor.pth

# Download Python modules if needed
if not superpoint_py_exists:
    print("Downloading SuperPoint Python module...")
    !wget -q -O {SUPERGLUE_DIR}/superpoint.py https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/superpoint.py

if not superglue_py_exists:
    print("Downloading SuperGlue Python module...")
    !wget -q -O {SUPERGLUE_DIR}/superglue.py https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/superglue.py

if not init_py_exists:
    print("Downloading __init__.py module...")
    !wget -q -O {SUPERGLUE_DIR}/__init__.py https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/__init__.py

# Add SuperGlue directory to Python path
sys.path.append(SUPERGLUE_DIR)

# Verify SuperGlue/SuperPoint setup
SUPERGLUE_AVAILABLE = (
    os.path.exists(f"{SUPERGLUE_DIR}/superpoint.py") and 
    os.path.exists(f"{SUPERGLUE_DIR}/superpoint_v1.pth")
)

if SUPERGLUE_AVAILABLE:
    print("SuperPoint and SuperGlue files are available")
else:
    print("Warning: SuperPoint and SuperGlue files are not available")

# Verify LightGlue models are available
LIGHTGLUE_DISK_AVAILABLE = os.path.exists(LIGHTGLUE_DISK_PATH)
LIGHTGLUE_SIFT_AVAILABLE = os.path.exists(LIGHTGLUE_SIFT_PATH)

if LIGHTGLUE_DISK_AVAILABLE:
    print("LightGlue DISK model is available")
else:
    print("Warning: LightGlue DISK model is not available at the expected path")

if LIGHTGLUE_SIFT_AVAILABLE:
    print("LightGlue SIFT model is available")
else:
    print("Warning: LightGlue SIFT model is not available at the expected path")

# Check for Kornia (required for LightGlue)
try:
    import kornia
    KORNIA_AVAILABLE = True
    print(f"Kornia is available (version {kornia.__version__})")
except ImportError:
    KORNIA_AVAILABLE = False
    print("Warning: Kornia is not available, LightGlue will not work")

Downloading SuperPoint model...
Downloading SuperGlue model...
Downloading SuperPoint Python module...
Downloading SuperGlue Python module...
Downloading __init__.py module...
SuperPoint and SuperGlue files are available
LightGlue DISK model is available
LightGlue SIFT model is available
Kornia is available (version 0.8.0)


## Read Training CSV Files and Extract Statistics

In [3]:
# Read training labels and thresholds from CSV files
print("\nReading training CSV files...")

def read_training_csvs():
    """Read training CSV files with ground truth scene labels.
    
    Returns:
        Dictionary with training CSV data
    """
    csv_data = {
        'labels': None,
        'thresholds': None
    }
    
    # Look for train_labels.csv
    labels_path = '/kaggle/input/image-matching-challenge-2025/train_labels.csv'
    if Path(labels_path).exists():
        try:
            labels_df = pd.read_csv(labels_path)
            print(f"Found train_labels.csv with {len(labels_df)} rows")
            csv_data['labels'] = labels_df
        except Exception as e:
            print(f"Error reading train_labels.csv: {e}")
    else:
        print("train_labels.csv not found")
        
    # Look for train_thresholds.csv
    thresholds_path = '/kaggle/input/image-matching-challenge-2025/train_thresholds.csv'
    if Path(thresholds_path).exists():
        try:
            thresholds_df = pd.read_csv(thresholds_path)
            print(f"Found train_thresholds.csv with {len(thresholds_df)} rows")
            csv_data['thresholds'] = thresholds_df
        except Exception as e:
            print(f"Error reading train_thresholds.csv: {e}")
    else:
        print("train_thresholds.csv not found")
        
    return csv_data

# Read training CSVs
training_csvs = read_training_csvs()

# Extract training statistics
if training_csvs['labels'] is not None:
    print("\nExtracting ground truth information from training CSV files...")
    
    # Extract ground truth scene assignments
    labels_df = training_csvs['labels']
    
    # Get unique datasets and count their scenes
    dataset_scenes = labels_df.groupby('dataset')['scene'].nunique()
    
    # Build training structure from CSV data
    training_structure = {}
    
    for dataset, scene_count in dataset_scenes.items():
        # Get dataset images
        dataset_images = labels_df[labels_df['dataset'] == dataset]
        
        # Count images per scene
        scene_counts = dataset_images.groupby('scene').size()
        
        # Check for outliers
        has_outliers = 'outliers' in scene_counts.index
        outlier_count = scene_counts.get('outliers', 0)
        
        # Calculate total images and outlier percentage
        total_images = dataset_images.shape[0]
        outlier_percentage = (outlier_count / total_images) * 100 if total_images > 0 else 0
        
        # Add to training structure
        training_structure[dataset] = {
            'path': Path(TRAIN_DIR) / dataset,
            'expected_scene_count': scene_count - (1 if has_outliers else 0),  # Don't count outliers as a scene
            'scene_counts': scene_counts.to_dict(),
            'total_images': total_images,
            'outlier_count': outlier_count,
            'outlier_percentage': outlier_percentage
        }
    
    # Print training structure information
    print("\nTraining Structure from CSV:")
    for dataset, info in training_structure.items():
        scene_count = info['expected_scene_count']
        print(f"  {dataset}: {scene_count} scenes, {info['total_images']} images, {info['outlier_count']} outliers ({info['outlier_percentage']:.1f}%)")
    
    # Calculate training statistics
    scene_counts = [info['expected_scene_count'] for _, info in training_structure.items()]
    image_per_scene = []
    outlier_percentages = [info['outlier_percentage'] for _, info in training_structure.items()]
    
    # Calculate average images per scene (excluding outliers)
    for dataset, info in training_structure.items():
        scene_image_counts = {k: v for k, v in info['scene_counts'].items() if k != 'outliers'}
        if scene_image_counts:
            image_per_scene.extend(scene_image_counts.values())
    
    training_stats = {
        'avg_scenes_per_dataset': np.mean(scene_counts) if scene_counts else 2.0,
        'avg_images_per_scene': np.mean(image_per_scene) if image_per_scene else 15.0,
        'avg_outlier_percentage': np.mean(outlier_percentages) if outlier_percentages else 10.0,
        'outlier_datasets_percentage': sum(1 for info in training_structure.values() if info['outlier_count'] > 0) / len(training_structure) * 100 if training_structure else 50.0
    }
    
    print("\nTraining Statistics from CSV:")
    print(f"  Average scenes per dataset: {training_stats['avg_scenes_per_dataset']:.2f}")
    print(f"  Average images per scene: {training_stats['avg_images_per_scene']:.2f}")
    print(f"  Average outlier percentage: {training_stats['avg_outlier_percentage']:.2f}%")
    print(f"  Datasets with outliers: {training_stats['outlier_datasets_percentage']:.2f}%")
    
    # Check if thresholds are available
    if training_csvs['thresholds'] is not None:
        thresholds_df = training_csvs['thresholds']
        print("\nThreshold information available for scenes")
else:
    print("No training CSV files found. Using default parameters.")
    # Use default parameters
    training_structure = {}
    training_stats = {
        'avg_scenes_per_dataset': 2.31,  # Based on previous analysis
        'avg_images_per_scene': 60.77,
        'avg_outlier_percentage': 5.85,
        'outlier_datasets_percentage': 30.77
    }

# Set up test dataset info
test_dataset_info = {}
for dataset_path in Path(TEST_DIR).glob('*'):
    if dataset_path.is_dir():
        dataset_name = dataset_path.name
        
        # If we have this dataset in training, use its information
        if dataset_name in training_structure:
            expected_scene_count = training_structure[dataset_name]['expected_scene_count']
            print(f"Using scene count from training for {dataset_name}: {expected_scene_count} scenes")
            
            test_dataset_info[dataset_name] = {
                'path': dataset_path,
                'expected_scene_count': expected_scene_count
            }
        else:
            # No matching training data, use the average
            avg_scenes = int(round(training_stats['avg_scenes_per_dataset']))
            test_dataset_info[dataset_name] = {
                'path': dataset_path,
                'expected_scene_count': avg_scenes
            }
            print(f"No training data for {dataset_name}, using average: {avg_scenes} scenes")


Reading training CSV files...
Found train_labels.csv with 1945 rows
Found train_thresholds.csv with 30 rows

Extracting ground truth information from training CSV files...

Training Structure from CSV:
  ETs: 2 scenes, 22 images, 3 outliers (13.6%)
  amy_gardens: 1 scenes, 200 images, 0 outliers (0.0%)
  fbk_vineyard: 3 scenes, 163 images, 0 outliers (0.0%)
  imc2023_haiper: 3 scenes, 54 images, 0 outliers (0.0%)
  imc2023_heritage: 3 scenes, 209 images, 61 outliers (29.2%)
  imc2023_theather_imc2024_church: 2 scenes, 76 images, 0 outliers (0.0%)
  imc2024_dioscuri_baalshamin: 2 scenes, 138 images, 24 outliers (17.4%)
  imc2024_lizard_pond: 2 scenes, 214 images, 34 outliers (15.9%)
  pt_brandenburg_british_buckingham: 3 scenes, 225 images, 0 outliers (0.0%)
  pt_piazzasanmarco_grandplace: 2 scenes, 168 images, 0 outliers (0.0%)
  pt_sacrecoeur_trevi_tajmahal: 3 scenes, 225 images, 0 outliers (0.0%)
  pt_stpeters_stpauls: 2 scenes, 200 images, 0 outliers (0.0%)
  stairs: 2 scenes, 51 i

## LightGlue Implementation

In [15]:
class LightGlueFeatureMatcher:
    """Feature extractor and matcher without relying on Kornia's LightGlue implementation."""
    
    def __init__(self, max_keypoints=4000, use_disk=True):
        """Initialize custom feature extractor and matcher.
        
        Args:
            max_keypoints: Maximum number of keypoints to detect
            use_disk: Whether to use DISK (True) or SIFT (False) features
        """
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.max_keypoints = max_keypoints
        self.use_disk = use_disk
        
        # Initialize as not available
        self.available = False
        self.extractor = None
        
        try:
            # Import required modules
            import kornia as K
            import kornia.feature as KF
            
            # Initialize the appropriate extractor
            if use_disk:
                print("Initializing Custom DISK feature extractor...")
                # Check if we can use DISK
                try:
                    # Try to initialize DISK extractor
                    self.extractor = KF.DISK.from_pretrained("depth").to(self.device)
                    print("DISK feature extractor initialized successfully")
                    self.available = True
                except Exception as e:
                    print(f"Error initializing DISK extractor: {e}")
                    traceback.print_exc()
            else:
                print("Initializing Custom SIFT feature extractor...")
                # For SIFT, we don't need a pretrained extractor
                self.available = True
                print("SIFT feature extractor initialized successfully")
                
        except Exception as e:
            print(f"Error initializing feature extractor: {e}")
            traceback.print_exc()
    
    def extract_disk(self, image_path):
        """Extract features using DISK with safer implementation.
        
        Args:
            image_path: Path to the image
            
        Returns:
            Dictionary with keypoints, descriptors, and dimensions
        """
        if not self.available or not self.use_disk or self.extractor is None:
            return None
            
        try:
            import kornia as K
            
            # Load image
            img = K.io.load_image(str(image_path), K.io.ImageLoadType.RGB32, device=self.device)[None, ...]
            
            # Extract features
            with torch.no_grad():
                # Try to extract features with the extractor
                response = self.extractor(img, self.max_keypoints, pad_if_not_divisible=True)
                
                # Check if we have a proper response object
                if hasattr(response, 'keypoints') and hasattr(response, 'descriptors'):
                    # Convert to numpy
                    keypoints = response.keypoints[0].cpu().numpy()
                    descriptors = response.descriptors[0].cpu().numpy()
                else:
                    # Handle case where response is a tuple or different format
                    print("DISK extractor response format unexpected, parsing manually...")
                    # Assuming it returns keypoints, descriptors as first two elements
                    if isinstance(response, tuple) and len(response) >= 2:
                        keypoints = response[0][0].cpu().numpy()
                        descriptors = response[1][0].cpu().numpy()
                    else:
                        print(f"Cannot parse DISK response: {type(response)}")
                        return None
            
            # Get image dimensions
            dimensions = (img.shape[2], img.shape[3])  # (height, width)
            
            # Print feature statistics
            print(f"Extracted {len(keypoints)} DISK features from {image_path.name}")
            
            return {
                'keypoints': keypoints,
                'descriptors': descriptors,
                'dimensions': dimensions,
                'path': image_path,
                'backend': 'lightglue_disk'
            }
        except Exception as e:
            print(f"Error extracting DISK features: {e}")
            traceback.print_exc()
            return None
    
    def extract_sift(self, image_path):
        """Extract features using OpenCV SIFT.
        
        Args:
            image_path: Path to the image
            
        Returns:
            Dictionary with keypoints, descriptors, and dimensions
        """
        if not self.available or self.use_disk:
            return None
            
        try:
            # Load image with OpenCV for SIFT
            img_cv = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
            if img_cv is None:
                return None
                
            # Create SIFT detector with optimized parameters
            sift = cv2.SIFT_create(
                nfeatures=self.max_keypoints,
                nOctaveLayers=5,
                contrastThreshold=0.04,
                edgeThreshold=15,
                sigma=1.6
            )
            
            # Detect keypoints and compute descriptors
            cv_keypoints, cv_descriptors = sift.detectAndCompute(img_cv, None)
            
            if cv_keypoints is None or len(cv_keypoints) == 0:
                return None
            
            # Convert keypoints to numpy arrays
            keypoints = np.array([kp.pt for kp in cv_keypoints])
            
            # Convert descriptors to proper format
            descriptors = cv_descriptors.astype(np.float32)
            
            # Get image dimensions
            dimensions = img_cv.shape
            
            # Print feature statistics
            print(f"Extracted {len(keypoints)} SIFT features from {image_path.name}")
            
            return {
                'keypoints': keypoints,
                'descriptors': descriptors,
                'dimensions': dimensions,
                'path': image_path,
                'backend': 'lightglue_sift'
            }
        except Exception as e:
            print(f"Error extracting SIFT features: {e}")
            traceback.print_exc()
            return None
    
    def extract(self, image_path):
        """Extract features based on the chosen method (DISK or SIFT).
        
        Args:
            image_path: Path to the image
            
        Returns:
            Dictionary with keypoints, descriptors, and dimensions
        """
        if self.use_disk:
            return self.extract_disk(image_path)
        else:
            return self.extract_sift(image_path)
    
    def match_disk(self, features1, features2):
        """Match DISK features between two images using custom matcher.
        
        Args:
            features1: Features from the first image
            features2: Features from the second image
            
        Returns:
            List of matches as cv2.DMatch objects
        """
        if not self.available or not self.use_disk:
            return []
        
        # Use custom matcher based on cosine similarity
        return self._match_descriptors(features1['descriptors'], features2['descriptors'])
    
    def match_sift(self, features1, features2):
        """Match SIFT features between two images using custom matcher.
        
        Args:
            features1: Features from the first image
            features2: Features from the second image
            
        Returns:
            List of matches as cv2.DMatch objects
        """
        if not self.available or self.use_disk:
            return []
        
        # Use custom matcher based on cosine similarity
        return self._match_descriptors(features1['descriptors'], features2['descriptors'])
    
    def _match_descriptors(self, desc1, desc2):
        """Match descriptors using cosine similarity and ratio test.
        
        Args:
            desc1, desc2: Feature descriptors as numpy arrays
            
        Returns:
            List of cv2.DMatch objects
        """
        try:
            # Fix descriptor orientation if necessary
            # SIFT descriptors are typically (N, 128) while SuperPoint are (256, N)
            if desc1.shape[0] < desc1.shape[1]:
                # Assume descriptors are in the format (N, D)
                pass
            else:
                # Transpose descriptors from (D, N) to (N, D)
                desc1 = desc1.T
                desc2 = desc2.T
            
            # Print descriptor shapes for debugging
            print(f"Matching descriptors with shapes: {desc1.shape} and {desc2.shape}")
            
            # Normalize descriptors for cosine similarity
            desc1_norm = desc1 / (np.linalg.norm(desc1, axis=1, keepdims=True) + 1e-8)
            desc2_norm = desc2 / (np.linalg.norm(desc2, axis=1, keepdims=True) + 1e-8)
            
            # Compute similarity matrix
            similarity = desc1_norm @ desc2_norm.T
            
            # Apply ratio test
            good_matches = []
            for i in range(similarity.shape[0]):
                # Get similarities for this descriptor
                scores = similarity[i]
                
                # Find best match
                best_idx = np.argmax(scores)
                best_score = scores[best_idx]
                
                # Find second best match
                scores_copy = scores.copy()
                scores_copy[best_idx] = -1
                second_best_idx = np.argmax(scores_copy)
                second_best_score = scores_copy[second_best_idx]
                
                # Apply ratio test (lower ratio means stricter test)
                ratio = 0.8
                if best_score > 0.6 and best_score > ratio * second_best_score:
                    m = cv2.DMatch()
                    m.queryIdx = i
                    m.trainIdx = best_idx
                    m.distance = 1.0 - best_score  # Convert similarity to distance
                    good_matches.append(m)
            
            print(f"Found {len(good_matches)} matches with ratio test")
            return good_matches
        except Exception as e:
            print(f"Error in matching descriptors: {e}")
            traceback.print_exc()
            return []
    
    def match(self, features1, features2):
        """Match features based on the chosen method (DISK or SIFT).
        
        Args:
            features1: Features from the first image
            features2: Features from the second image
            
        Returns:
            List of matches as cv2.DMatch objects
        """
        if self.use_disk:
            return self.match_disk(features1, features2)
        else:
            return self.match_sift(features1, features2)

# Test LightGlue initialization
print("\nInitializing custom feature extractors...")
disk_extractor = LightGlueFeatureMatcher(use_disk=True)
sift_extractor = LightGlueFeatureMatcher(use_disk=False)


Initializing custom feature extractors...
Initializing Custom DISK feature extractor...
DISK feature extractor initialized successfully
Initializing Custom SIFT feature extractor...
SIFT feature extractor initialized successfully


## SuperPoint and SuperGlue Implementation

In [16]:
# Create weights directory if it doesn't exist
os.makedirs(f"{SUPERGLUE_DIR}/weights", exist_ok=True)

# Download SuperPoint and SuperGlue models to base directory
if not os.path.exists(f"{SUPERGLUE_DIR}/superpoint_v1.pth"):
    print("Downloading SuperPoint model...")
    !wget -q -O {SUPERGLUE_DIR}/superpoint_v1.pth https://github.com/magicleap/SuperGluePretrainedNetwork/raw/master/models/weights/superpoint_v1.pth

if not os.path.exists(f"{SUPERGLUE_DIR}/superglue_outdoor.pth"):
    print("Downloading SuperGlue outdoor model...")
    !wget -q -O {SUPERGLUE_DIR}/superglue_outdoor.pth https://github.com/magicleap/SuperGluePretrainedNetwork/raw/master/models/weights/superglue_outdoor.pth

# Now copy the files to weights directory with the correct names
# For SuperPoint, copy to both the original name and 'superpoint_v1.pth'
if not os.path.exists(f"{SUPERGLUE_DIR}/weights/superpoint_v1.pth"):
    print("Copying SuperPoint model to weights directory...")
    !cp {SUPERGLUE_DIR}/superpoint_v1.pth {SUPERGLUE_DIR}/weights/

# For SuperGlue, we need to rename to just 'outdoor.pth'
if not os.path.exists(f"{SUPERGLUE_DIR}/weights/outdoor.pth"):
    print("Copying SuperGlue model to weights directory with correct name...")
    !cp {SUPERGLUE_DIR}/superglue_outdoor.pth {SUPERGLUE_DIR}/weights/outdoor.pth

# Verify files exist in the expected locations
print(f"SuperPoint model in base dir: {os.path.exists(f'{SUPERGLUE_DIR}/superpoint_v1.pth')}")
print(f"SuperPoint model in weights dir: {os.path.exists(f'{SUPERGLUE_DIR}/weights/superpoint_v1.pth')}")
print(f"SuperGlue model in base dir: {os.path.exists(f'{SUPERGLUE_DIR}/superglue_outdoor.pth')}")
print(f"SuperGlue model in weights dir (renamed): {os.path.exists(f'{SUPERGLUE_DIR}/weights/outdoor.pth')}")

# Download Python modules if needed
if not os.path.exists(f"{SUPERGLUE_DIR}/superpoint.py"):
    print("Downloading SuperPoint Python module...")
    !wget -q -O {SUPERGLUE_DIR}/superpoint.py https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/superpoint.py

if not os.path.exists(f"{SUPERGLUE_DIR}/superglue.py"):
    print("Downloading SuperGlue Python module...")
    !wget -q -O {SUPERGLUE_DIR}/superglue.py https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/superglue.py

if not os.path.exists(f"{SUPERGLUE_DIR}/__init__.py"):
    print("Downloading __init__.py module...")
    !wget -q -O {SUPERGLUE_DIR}/__init__.py https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/__init__.py

# Fix the SuperGlue source code to avoid the 'shape' attribute error
def fix_superglue_source():
    """Fix the SuperGlue source code to handle the shape attribute properly."""
    superglue_path = f"{SUPERGLUE_DIR}/superglue.py"
    if not os.path.exists(superglue_path):
        print("SuperGlue source not found")
        return
    
    # Read the file
    with open(superglue_path, 'r') as f:
        content = f.read()
    
    # Fix the normalize_keypoints function call
    if "kpts0 = normalize_keypoints(kpts0, data['image0'].shape)" in content:
        modified_content = content.replace(
            "kpts0 = normalize_keypoints(kpts0, data['image0'].shape)",
            "kpts0 = normalize_keypoints(kpts0, data['image0']['shape'])"
        )
        modified_content = modified_content.replace(
            "kpts1 = normalize_keypoints(kpts1, data['image1'].shape)",
            "kpts1 = normalize_keypoints(kpts1, data['image1']['shape'])"
        )
        
        # Write the modified content
        with open(superglue_path, 'w') as f:
            f.write(modified_content)
        print("Fixed SuperGlue source code to handle image shape properly")
    else:
        print("Could not find the line to fix in SuperGlue source")

# Fix the SuperGlue source
fix_superglue_source()

class SimpleKeypointDetector:
    """Base class for keypoint detectors."""
    
    def __init__(self):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class SimpleSuperPointDetector(SimpleKeypointDetector):
    """SuperPoint feature detector and descriptor."""
    
    def __init__(self, max_keypoints=4000, keypoint_threshold=0.005):
        """Initialize SuperPoint model.
        
        Args:
            max_keypoints: Maximum number of keypoints to detect
            keypoint_threshold: Keypoint confidence threshold
        """
        super().__init__()
        
        # Check if SuperGlue is available
        if not os.path.exists(f"{SUPERGLUE_DIR}/superpoint.py"):
            print("SuperPoint not available - required files missing")
            self.detector = None
            return
            
        try:
            # Import the SuperPoint model
            sys.path.append(SUPERGLUE_DIR)
            from superpoint import SuperPoint
            
            # Configure SuperPoint
            config = {
                'nms_radius': 4,
                'keypoint_threshold': keypoint_threshold,
                'max_keypoints': max_keypoints,
                # SuperPoint looks for 'superpoint_v1.pth' by default
                'weights': 'superpoint_v1'
            }
            
            # Create the model
            self.detector = SuperPoint(config)
            
            # Load weights manually from one of the possible paths
            if os.path.exists(f"{SUPERGLUE_DIR}/weights/superpoint_v1.pth"):
                weights_path = f"{SUPERGLUE_DIR}/weights/superpoint_v1.pth"
            elif os.path.exists(f"{SUPERGLUE_DIR}/superpoint_v1.pth"):
                weights_path = f"{SUPERGLUE_DIR}/superpoint_v1.pth"
            else:
                raise FileNotFoundError("SuperPoint weights not found")
                
            print(f"Loading SuperPoint model from {weights_path}")
            state_dict = torch.load(weights_path, map_location=self.device)
            self.detector.load_state_dict(state_dict)
            
            # Move to device and set to eval mode
            self.detector = self.detector.to(self.device).eval()
            print("SuperPoint model loaded successfully")
        except Exception as e:
            print(f"Error loading SuperPoint model: {e}")
            traceback.print_exc()
            self.detector = None
    
    def detect_and_compute(self, image):
        """Detect keypoints and compute descriptors.
        
        Args:
            image: Input image in BGR format
            
        Returns:
            Tuple of (keypoints, descriptors, scores) or None if detection fails
        """
        if self.detector is None:
            return None
            
        try:
            # Convert image to grayscale if necessary
            if len(image.shape) == 3:
                gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            else:
                gray = image
            
            # Normalize and convert to tensor
            img = torch.from_numpy(gray).float() / 255.0
            img = img.unsqueeze(0).unsqueeze(0).to(self.device)
            
            # Detect keypoints and compute descriptors
            with torch.no_grad():
                pred = self.detector({'image': img})
            
            # Convert results to numpy arrays
            keypoints = pred['keypoints'][0].cpu().numpy()
            scores = pred['scores'][0].cpu().numpy()
            descriptors = pred['descriptors'][0].cpu().numpy()
            
            # Print shape information for debugging
            print(f"SuperPoint: Found {len(keypoints)} keypoints")
            print(f"Keypoints shape: {keypoints.shape}, Descriptors shape: {descriptors.shape}")
            
            return keypoints, descriptors, scores
        except Exception as e:
            print(f"Error detecting keypoints: {e}")
            traceback.print_exc()
            return None

class SimpleSuperGlueMatcher:
    """SuperGlue feature matcher."""
    
    def __init__(self, match_threshold=0.2):
        """Initialize SuperGlue model.
        
        Args:
            match_threshold: Matching confidence threshold
        """
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.match_threshold = match_threshold
        
        # Check if SuperGlue is available
        if not os.path.exists(f"{SUPERGLUE_DIR}/superglue.py"):
            print("SuperGlue not available - required files missing")
            self.matcher = None
            return
            
        try:
            # Import the SuperGlue model
            sys.path.append(SUPERGLUE_DIR)
            from superglue import SuperGlue
            
            # Configure SuperGlue
            config = {
                'weights': 'outdoor',  # Will look for weights/outdoor.pth
                'sinkhorn_iterations': 20,
                'match_threshold': match_threshold
            }
            
            # Create the model
            self.matcher = SuperGlue(config)
            
            # Load weights manually from one of the possible paths
            if os.path.exists(f"{SUPERGLUE_DIR}/weights/outdoor.pth"):
                weights_path = f"{SUPERGLUE_DIR}/weights/outdoor.pth"
            elif os.path.exists(f"{SUPERGLUE_DIR}/superglue_outdoor.pth"):
                weights_path = f"{SUPERGLUE_DIR}/superglue_outdoor.pth"
            else:
                raise FileNotFoundError("SuperGlue weights not found")
                
            print(f"Loading SuperGlue model from {weights_path}")
            state_dict = torch.load(weights_path, map_location=self.device)
            self.matcher.load_state_dict(state_dict)
            
            # Move to device and set to eval mode
            self.matcher = self.matcher.to(self.device).eval()
            print("SuperGlue model loaded successfully")
        except Exception as e:
            print(f"Error loading SuperGlue model: {e}")
            traceback.print_exc()
            self.matcher = None
    
    def match(self, kp1, desc1, kp2, desc2, scores1=None, scores2=None):
        """Match features between two images.
        
        Args:
            kp1, kp2: Keypoints from the first and second images
            desc1, desc2: Descriptors from the first and second images
            scores1, scores2: Keypoint scores (optional)
            
        Returns:
            List of cv2.DMatch objects
        """
        if self.matcher is None:
            # Fall back to traditional matching using custom matcher
            return self._match_custom(desc1, desc2)
            
        try:
            # Print shape info for debugging
            print(f"SuperGlue: kp1 shape: {kp1.shape}, desc1 shape: {desc1.shape}")
            print(f"SuperGlue: kp2 shape: {kp2.shape}, desc2 shape: {desc2.shape}")
            
            # Fix descriptor orientation if necessary (SuperPoint outputs descriptors as D×N)
            if desc1.shape[0] == 256 and desc1.shape[1] != 256:
                # Already in the correct format (D×N)
                pass
            elif desc1.shape[1] == 256 and desc1.shape[0] != 256:
                # Need to transpose from N×D to D×N
                desc1 = desc1.T
                desc2 = desc2.T
                print(f"Transposed descriptors to: {desc1.shape} and {desc2.shape}")
            
            # Prepare data for SuperGlue
            if scores1 is None:
                scores1 = np.ones(len(kp1))
            if scores2 is None:
                scores2 = np.ones(len(kp2))
                
            # Convert to torch tensors
            data = {
                'keypoints0': torch.from_numpy(kp1).float().to(self.device)[None],
                'keypoints1': torch.from_numpy(kp2).float().to(self.device)[None],
                'descriptors0': torch.from_numpy(desc1).float().to(self.device)[None],
                'descriptors1': torch.from_numpy(desc2).float().to(self.device)[None],
                'scores0': torch.from_numpy(scores1).float().to(self.device)[None],
                'scores1': torch.from_numpy(scores2).float().to(self.device)[None],
                # Add image shapes for SuperGlue (as dictionary)
                'image0': {'shape': torch.tensor([1, 1, kp1.shape[0], desc1.shape[1]], device=self.device)},
                'image1': {'shape': torch.tensor([1, 1, kp2.shape[0], desc2.shape[1]], device=self.device)}
            }
            
            # Match features
            with torch.no_grad():
                pred = self.matcher(data)
            
            # Convert results to list of DMatch objects
            matches = pred['matches0'][0].cpu().numpy()
            confidences = pred['matching_scores0'][0].cpu().numpy()
            
            # Create list of DMatch objects
            good_matches = []
            for i, idx in enumerate(matches):
                if idx >= 0 and confidences[i] > self.match_threshold:
                    m = cv2.DMatch()
                    m.queryIdx = i
                    m.trainIdx = idx
                    m.distance = 1.0 - confidences[i]  # Convert confidence to distance
                    good_matches.append(m)
            
            print(f"SuperGlue: Found {len(good_matches)} matches")
            return good_matches
        except Exception as e:
            print(f"Error matching features: {e}")
            traceback.print_exc()
            # Fall back to traditional matching
            return self._match_custom(desc1, desc2)
    
    def _match_custom(self, desc1, desc2):
        """Match descriptors using custom similarity-based matcher.
        
        Args:
            desc1, desc2: Feature descriptors
            
        Returns:
            List of cv2.DMatch objects
        """
        try:
            # Fix descriptor orientation if necessary (SuperPoint outputs descriptors as D×N)
            if desc1.shape[0] == 256 and desc1.shape[1] != 256:
                # Transpose from D×N to N×D
                desc1 = desc1.T
                desc2 = desc2.T
                print(f"Transposed descriptors for matching: {desc1.shape} and {desc2.shape}")
            
            # Normalize descriptors for cosine similarity
            desc1_norm = np.copy(desc1)
            desc2_norm = np.copy(desc2)
            
            # Normalize rows
            desc1_norms = np.linalg.norm(desc1_norm, axis=1, keepdims=True)
            desc2_norms = np.linalg.norm(desc2_norm, axis=1, keepdims=True)
            
            # Avoid division by zero
            desc1_norm = np.divide(desc1_norm, desc1_norms, out=np.zeros_like(desc1_norm), where=desc1_norms!=0)
            desc2_norm = np.divide(desc2_norm, desc2_norms, out=np.zeros_like(desc2_norm), where=desc2_norms!=0)
            
            # Compute similarity matrix
            similarity = desc1_norm @ desc2_norm.T
            
            # Apply ratio test
            good_matches = []
            for i in range(similarity.shape[0]):
                # Get similarities for this descriptor
                scores = similarity[i]
                
                # Find best match
                best_idx = np.argmax(scores)
                best_score = scores[best_idx]
                
                # Find second best match
                scores_copy = scores.copy()
                scores_copy[best_idx] = -1
                second_best_idx = np.argmax(scores_copy)
                second_best_score = scores_copy[second_best_idx]
                
                # Apply ratio test (lower ratio means stricter test)
                ratio = 0.8
                if best_score > 0.6 and best_score > ratio * second_best_score:
                    m = cv2.DMatch()
                    m.queryIdx = i
                    m.trainIdx = best_idx
                    m.distance = 1.0 - best_score  # Convert similarity to distance
                    good_matches.append(m)
            
            print(f"Custom matcher: Found {len(good_matches)} matches")
            return good_matches
        except Exception as e:
            print(f"Error in custom matching: {e}")
            traceback.print_exc()
            return []

# Initialize SuperPoint and SuperGlue
print("\nInitializing SuperPoint and SuperGlue...")
superpoint_detector = SimpleSuperPointDetector()
superglue_matcher = SimpleSuperGlueMatcher()

SuperPoint model in base dir: True
SuperPoint model in weights dir: True
SuperGlue model in base dir: True
SuperGlue model in weights dir (renamed): True
Fixed SuperGlue source code to handle image shape properly

Initializing SuperPoint and SuperGlue...
Loaded SuperPoint model
Loading SuperPoint model from /kaggle/working/superglue_models/weights/superpoint_v1.pth
SuperPoint model loaded successfully
Loaded SuperGlue model ("outdoor" weights)
Loading SuperGlue model from /kaggle/working/superglue_models/weights/outdoor.pth
SuperGlue model loaded successfully


## Multi-Backend Feature Extraction and Matching System

In [17]:
class MultiBackendMatcher:
    """Feature extractor and matcher with multiple backend options."""
    
    def __init__(self, feature_params, matching_params):
        """Initialize with multiple feature extraction and matching backends.
        
        Args:
            feature_params: Parameters for feature detection
            matching_params: Parameters for feature matching
        """
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.feature_params = feature_params
        self.matching_params = matching_params
        
        # Available backends list
        self.backends = []
        
        # Initialize LightGlue with DISK if available
        try:
            self.lightglue_disk = LightGlueFeatureMatcher(use_disk=True)
            if self.lightglue_disk.available:
                print("LightGlue with DISK features is available")
                self.backends.append("lightglue_disk")
        except Exception as e:
            print(f"LightGlue with DISK features is not available: {e}")
        
        # Initialize LightGlue with SIFT if available
        try:
            self.lightglue_sift = LightGlueFeatureMatcher(use_disk=False)
            if self.lightglue_sift.available:
                print("LightGlue with SIFT features is available")
                self.backends.append("lightglue_sift")
        except Exception as e:
            print(f"LightGlue with SIFT features is not available: {e}")
        
        # Initialize SuperPoint/SuperGlue if available
        try:
            self.superpoint = SimpleSuperPointDetector()
            self.superglue = SimpleSuperGlueMatcher()
            
            if self.superpoint.detector is not None and self.superglue.matcher is not None:
                print("SuperPoint/SuperGlue is available")
                self.backends.append("superglue")
        except Exception as e:
            print(f"SuperPoint/SuperGlue is not available: {e}")
        
        # Always add traditional SIFT as a fallback
        self.backends.append("sift")
        print("Traditional SIFT is available as fallback")
        
        # Report available backends
        if self.backends:
            primary_backend = self.backends[0]
            print(f"Using {primary_backend} as primary backend")
            print(f"Available backends (in order): {', '.join(self.backends)}")
        else:
            print("Warning: No feature extraction backends available")
    
    def extract(self, image_path):
        """Extract features from an image using the best available backend.
        
        Args:
            image_path: Path to the image
            
        Returns:
            Dictionary with keypoints, descriptors, and dimensions, or None if extraction fails
        """
        features = None
        backend_used = None
        
        # Try each backend in order until one succeeds
        for backend in self.backends:
            try:
                if backend == "lightglue_disk":
                    features = self.lightglue_disk.extract(image_path)
                elif backend == "lightglue_sift":
                    features = self.lightglue_sift.extract(image_path)
                elif backend == "superglue":
                    features = self._extract_superpoint(image_path)
                elif backend == "sift":
                    features = self._extract_sift(image_path)
                
                if features is not None and len(features.get('keypoints', [])) > 0:
                    backend_used = backend
                    if 'backend' not in features:
                        features['backend'] = backend
                    break
            except Exception as e:
                print(f"Error extracting features with {backend}: {e}")
                traceback.print_exc()
        
        if features is None:
            print(f"All feature extraction backends failed for {image_path}")
        else:
            print(f"Extracted features from {image_path.name} using {backend_used} backend")
        
        return features
    
    def _extract_superpoint(self, image_path):
        """Extract features using SuperPoint.
        
        Args:
            image_path: Path to the image
            
        Returns:
            Dictionary with features or None if extraction fails
        """
        if self.superpoint.detector is None:
            return None
        
        try:
            # Load image
            img = cv2.imread(str(image_path))
            if img is None:
                print(f"Could not read image: {image_path}")
                return None
            
            # Extract features
            result = self.superpoint.detect_and_compute(img)
            if result is None:
                return None
            
            keypoints, descriptors, scores = result
            
            return {
                'keypoints': keypoints,
                'descriptors': descriptors,
                'scores': scores,
                'dimensions': img.shape[:2] if len(img.shape) == 3 else img.shape,
                'path': image_path,
                'backend': 'superglue'
            }
        except Exception as e:
            print(f"Error extracting SuperPoint features: {e}")
            traceback.print_exc()
            return None
    
    def _extract_sift(self, image_path):
        """Extract features using SIFT.
        
        Args:
            image_path: Path to the image
            
        Returns:
            Dictionary with features or None if extraction fails
        """
        try:
            # Load image
            img = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
            if img is None:
                print(f"Could not read image: {image_path}")
                return None
                
            # Create SIFT detector with optimized parameters
            sift = cv2.SIFT_create(
                nfeatures=4000,
                nOctaveLayers=5,
                contrastThreshold=0.04,
                edgeThreshold=15,
                sigma=1.6
            )
            
            # Detect keypoints and compute descriptors
            keypoints, descriptors = sift.detectAndCompute(img, None)
            
            if keypoints is None or len(keypoints) == 0:
                return None
                
            print(f"Extracted {len(keypoints)} SIFT features from {image_path.name}")
            
            return {
                'keypoints': keypoints,
                'descriptors': descriptors,
                'dimensions': img.shape,
                'path': image_path,
                'backend': 'sift'
            }
        except Exception as e:
            print(f"Error extracting SIFT features: {e}")
            traceback.print_exc()
            return None
    
    def match(self, features1, features2):
        """Match features between two images based on their backends.
        
        Args:
            features1: Features from the first image
            features2: Features from the second image
            
        Returns:
            Tuple of (matches, inlier_count, is_geometrically_consistent)
        """
        # Get backend types
        backend1 = features1.get('backend', 'unknown')
        backend2 = features2.get('backend', 'unknown')
        
        # Check if both features use the same backend
        if backend1 != backend2:
            print(f"Warning: Mixing feature backends ({backend1} and {backend2})")
        
        # Attempt to match based on backend type
        matches = None
        try:
            if backend1.startswith('lightglue_disk') and backend2.startswith('lightglue_disk'):
                matches = self.lightglue_disk.match(features1, features2)
            elif backend1.startswith('lightglue_sift') and backend2.startswith('lightglue_sift'):
                matches = self.lightglue_sift.match(features1, features2)
            elif backend1 == 'superglue' and backend2 == 'superglue':
                matches = self.superglue.match(
                    features1['keypoints'], features1['descriptors'],
                    features2['keypoints'], features2['descriptors'],
                    features1.get('scores'), features2.get('scores')
                )
            else:
                # Fall back to appropriate matching based on keypoint type
                if hasattr(features1['keypoints'][0], 'pt'):  # CV2 KeyPoint objects (SIFT)
                    matches = self._match_cv2_keypoints(features1, features2)
                else:  # Raw keypoints array
                    matches = self._match_raw_keypoints(features1, features2)
        except Exception as e:
            print(f"Error in feature matching: {e}")
            traceback.print_exc()
            # Try generic descriptor matching as last resort
            try:
                matches = self._match_descriptors(
                    features1['descriptors'], 
                    features2['descriptors'],
                    features1.get('backend'), 
                    features2.get('backend')
                )
            except Exception as e2:
                print(f"Generic descriptor matching also failed: {e2}")
                traceback.print_exc()
                return [], 0, False
        
        # Verify matches geometrically if we have matches
        if matches and len(matches) > 0:
            return self._verify_matches(matches, features1, features2)
        else:
            return [], 0, False
    
    def _match_cv2_keypoints(self, features1, features2):
        """Match OpenCV KeyPoint features using FLANN.
        
        Args:
            features1, features2: Feature dictionaries with cv2.KeyPoint objects
            
        Returns:
            List of cv2.DMatch objects
        """
        # Use custom feature matching instead of FLANN to avoid dimensionality issues
        return self._match_descriptors(
            features1['descriptors'], 
            features2['descriptors'],
            features1.get('backend'), 
            features2.get('backend')
        )
    
    def _match_raw_keypoints(self, features1, features2):
        """Match raw keypoint features (SuperPoint or LightGlue).
        
        Args:
            features1, features2: Feature dictionaries with raw keypoint arrays
            
        Returns:
            List of cv2.DMatch objects
        """
        return self._match_descriptors(
            features1['descriptors'], 
            features2['descriptors'],
            features1.get('backend'), 
            features2.get('backend')
        )
    
    def _match_descriptors(self, desc1, desc2, backend1=None, backend2=None):
        """Match descriptors using cosine similarity and ratio test.
        This is a safer generic method that works with any descriptor format.
        
        Args:
            desc1, desc2: Feature descriptors as numpy arrays
            backend1, backend2: Optional backend names for debugging
            
        Returns:
            List of cv2.DMatch objects
        """
        try:
            # Print original descriptor shapes
            print(f"Original descriptor shapes - desc1: {desc1.shape}, desc2: {desc2.shape}")
            
            # Fix descriptor orientation if necessary
            if backend1 == 'superglue' or backend2 == 'superglue':
                # SuperPoint descriptors are typically (256, N)
                if desc1.shape[0] == 256 and desc1.shape[1] != 256:
                    # Transpose from (256, N) to (N, 256)
                    desc1 = desc1.T
                if desc2.shape[0] == 256 and desc2.shape[1] != 256:
                    # Transpose from (256, N) to (N, 256)
                    desc2 = desc2.T
                print(f"Transposed SuperPoint descriptors to: {desc1.shape} and {desc2.shape}")
            elif hasattr(desc1, 'shape') and len(desc1.shape) == 1:
                # Handle 1D descriptors (reshape to 2D)
                desc1 = desc1.reshape(1, -1)
                desc2 = desc2.reshape(1, -1)
                print(f"Reshaped 1D descriptors to: {desc1.shape} and {desc2.shape}")
            elif isinstance(desc1, list):
                # Handle list descriptors
                desc1 = np.array(desc1)
                desc2 = np.array(desc2)
                print(f"Converted list descriptors to arrays: {desc1.shape} and {desc2.shape}")
            
            # Ensure we have 2D arrays with features as rows
            if len(desc1.shape) != 2 or len(desc2.shape) != 2:
                print(f"Unexpected descriptor shapes: {desc1.shape} and {desc2.shape}")
                # Try to reshape if possible
                if len(desc1.shape) > 2:
                    desc1 = desc1.reshape(desc1.shape[0], -1)
                if len(desc2.shape) > 2:
                    desc2 = desc2.reshape(desc2.shape[0], -1)
            
            # Ensure features are rows (N, D) not columns (D, N)
            if desc1.shape[0] < desc1.shape[1]:
                # Already (N, D) format
                pass
            else:
                # Try to determine if we need to transpose
                # For SIFT: (N, 128), for SuperPoint: (256, N)
                is_superpoint = (desc1.shape[0] == 256 or desc1.shape[1] == 256)
                
                if is_superpoint:
                    # SuperPoint descriptors - transpose if needed
                    if desc1.shape[0] != desc1.shape[1] and desc1.shape[0] > desc1.shape[1]:
                        # More rows than columns - probably needs transpose
                        desc1 = desc1.T
                    if desc2.shape[0] != desc2.shape[1] and desc2.shape[0] > desc2.shape[1]:
                        desc2 = desc2.T
            
            # Print final descriptor shapes
            print(f"Final descriptor shapes for matching - desc1: {desc1.shape}, desc2: {desc2.shape}")
            
            # Normalize descriptors for cosine similarity
            desc1_norm = np.copy(desc1)
            desc2_norm = np.copy(desc2)
            
            # Normalize rows
            desc1_norms = np.linalg.norm(desc1_norm, axis=1, keepdims=True) + 1e-10
            desc2_norms = np.linalg.norm(desc2_norm, axis=1, keepdims=True) + 1e-10
            
            desc1_norm = desc1_norm / desc1_norms
            desc2_norm = desc2_norm / desc2_norms
            
            # Compute similarity matrix
            similarity = desc1_norm @ desc2_norm.T
            
            # Apply ratio test
            good_matches = []
            for i in range(similarity.shape[0]):
                # Get similarities for this descriptor
                scores = similarity[i]
                
                # Find best match
                best_idx = np.argmax(scores)
                best_score = scores[best_idx]
                
                # Find second best match
                scores_copy = scores.copy()
                scores_copy[best_idx] = -1
                second_best_idx = np.argmax(scores_copy)
                second_best_score = scores_copy[second_best_idx]
                
                # Apply ratio test (lower ratio means stricter test)
                ratio = 0.8
                if best_score > 0.6 and best_score > ratio * second_best_score:
                    m = cv2.DMatch()
                    m.queryIdx = i
                    m.trainIdx = best_idx
                    m.distance = 1.0 - best_score  # Convert similarity to distance
                    good_matches.append(m)
            
            print(f"Generic descriptor matcher: Found {len(good_matches)} matches")
            return good_matches
        except Exception as e:
            print(f"Error in generic descriptor matching: {e}")
            traceback.print_exc()
            return []
    
    def _verify_matches(self, matches, features1, features2):
        """Verify matches using geometric constraints.
        
        Args:
            matches: List of cv2.DMatch objects
            features1, features2: Feature dictionaries
            
        Returns:
            Tuple of (filtered_matches, inlier_count, is_geometrically_consistent)
        """
        # Check if we have enough matches
        if len(matches) < 8:
            return [], 0, False
        
        try:
            # Extract matched points
            # Handle different keypoint types (cv2.KeyPoint vs. raw arrays)
            is_keypoint_object = hasattr(features1['keypoints'][0], 'pt') if len(features1['keypoints']) > 0 else False
            
            if is_keypoint_object:
                # Handle KeyPoint objects
                src_pts = np.float32([features1['keypoints'][m.queryIdx].pt for m in matches])
                dst_pts = np.float32([features2['keypoints'][m.trainIdx].pt for m in matches])
            else:
                # Handle raw keypoints
                src_pts = np.float32([features1['keypoints'][m.queryIdx] for m in matches])
                dst_pts = np.float32([features2['keypoints'][m.trainIdx] for m in matches])
            
            # Apply strong geometric verification
            # 1. Find fundamental matrix with RANSAC
            F, mask_f = cv2.findFundamentalMat(
                src_pts, dst_pts, 
                method=cv2.FM_RANSAC, 
                ransacReprojThreshold=self.matching_params['ransac_threshold'],
                confidence=0.999,
                maxIters=5000
            )
            
            # If fundamental matrix estimation fails, try homography (for planar scenes)
            if F is None or F.shape != (3, 3):
                H, mask_h = cv2.findHomography(
                    src_pts, dst_pts, 
                    method=cv2.RANSAC, 
                    ransacReprojThreshold=3.0,
                    confidence=0.99,
                    maxIters=2000
                )
                
                if H is None:
                    return [], 0, False
                    
                inliers = np.squeeze(mask_h.astype(bool))
                inlier_count = np.sum(inliers)
                
                # Only accept homography if high inlier ratio
                if inlier_count < self.matching_params['min_inliers'] or inlier_count / len(matches) < self.matching_params['min_inlier_ratio']:
                    return [], 0, False
                    
                # Filter matches based on homography inliers
                filtered_matches = [matches[i] for i in range(len(matches)) if inliers[i]]
                
                return filtered_matches, inlier_count, True
            
            # Filter matches based on fundamental matrix inliers
            inliers = np.squeeze(mask_f.astype(bool))
            inlier_count = np.sum(inliers)
            
            # Strict consistency check - high inlier count AND high inlier ratio
            is_consistent = inlier_count >= self.matching_params['min_inliers'] and inlier_count / len(matches) >= self.matching_params['min_inlier_ratio']
            
            if not is_consistent:
                return [], 0, False
            
            # Second verification: Calculate essential matrix and decompose to R,t
            if self.matching_params.get('geometric_verification', True):
                dims1 = features1['dimensions']
                focal1 = max(dims1) / 2
                pp1 = (dims1[1]/2, dims1[0]/2)
                
                # Use points only from fundamental matrix inliers
                src_pts_inliers = src_pts[inliers]
                dst_pts_inliers = dst_pts[inliers]
                
                # Compute essential matrix
                E, mask_e = cv2.findEssentialMat(
                    src_pts_inliers, dst_pts_inliers,
                    focal=focal1, pp=pp1,
                    method=cv2.RANSAC,
                    prob=0.999,
                    threshold=1.0
                )
                
                if E is None or E.shape != (3, 3):
                    # Essential matrix failed, but fundamental was good
                    # Use fundamental matrix results
                    filtered_matches = [matches[i] for i in range(len(matches)) if inliers[i]]
                    return filtered_matches, inlier_count, True
                
                # Try to recover pose to ensure geometric consistency
                retval, R, t, mask_pose = cv2.recoverPose(E, src_pts_inliers, dst_pts_inliers, focal=focal1, pp=pp1)
                
                # Final filtering: combine all verification steps
                pose_inliers = np.squeeze(mask_pose.astype(bool))
                final_inlier_count = np.sum(pose_inliers)
                
                # Ensure we have enough inliers after all verification steps
                final_consistent = final_inlier_count >= self.matching_params['min_inliers']
                
                if final_consistent:
                    # Use matches that passed the fundamental matrix test
                    filtered_matches = [matches[i] for i in range(len(matches)) if inliers[i]]
                    return filtered_matches, final_inlier_count, True
            
            # If not using second verification or it failed
            filtered_matches = [matches[i] for i in range(len(matches)) if inliers[i]]
            return filtered_matches, inlier_count, True
        
        except Exception as e:
            # Handle errors in matching
            print(f"Error in geometric verification: {e}")
            traceback.print_exc()
            return [], 0, False

# Initialize the multi-backend matcher
print("\nInitializing Multi-Backend Feature Matcher...")
multi_backend_matcher = MultiBackendMatcher(FEATURE_PARAMS, MATCHING_PARAMS)


Initializing Multi-Backend Feature Matcher...
Initializing Custom DISK feature extractor...
DISK feature extractor initialized successfully
LightGlue with DISK features is available
Initializing Custom SIFT feature extractor...
SIFT feature extractor initialized successfully
LightGlue with SIFT features is available
Loaded SuperPoint model
Loading SuperPoint model from /kaggle/working/superglue_models/weights/superpoint_v1.pth
SuperPoint model loaded successfully
Loaded SuperGlue model ("outdoor" weights)
Loading SuperGlue model from /kaggle/working/superglue_models/weights/outdoor.pth
SuperGlue model loaded successfully
SuperPoint/SuperGlue is available
Traditional SIFT is available as fallback
Using lightglue_disk as primary backend
Available backends (in order): lightglue_disk, lightglue_sift, superglue, sift


## Multi-Backend Feature Extraction Pipeline

In [18]:
def process_dataset_features_multi_backend(dataset_path, multi_backend_matcher, max_workers=4):
    """Process all images in a dataset using the multi-backend feature extractor.
    
    Args:
        dataset_path: Path to the dataset directory
        multi_backend_matcher: Multi-backend matcher instance
        max_workers: Maximum number of parallel workers
        
    Returns:
        Dictionary mapping image names to their features
    """
    features_dict = {}
    
    # Get all image files (only .png for this dataset)
    image_files = list(dataset_path.glob('*.png'))
    
    # Define a worker function for parallel processing
    def process_image(img_path):
        features = multi_backend_matcher.extract(img_path)
        if features is not None and len(features.get('keypoints', [])) > 0:
            return img_path.name, features
        return None, None
    
    # Process images in parallel
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(process_image, img_path) for img_path in image_files]
        
        for future in tqdm(as_completed(futures), total=len(futures), desc="Extracting features"):
            img_name, features = future.result()
            if img_name is not None:
                features_dict[img_name] = features
    
    # Free memory after feature extraction
    free_memory()
    
    # Print feature extraction statistics
    backends_used = {}
    for img_name, features in features_dict.items():
        backend = features.get('backend', 'unknown')
        backends_used[backend] = backends_used.get(backend, 0) + 1
    
    print(f"Extracted features from {len(features_dict)} images")
    for backend, count in backends_used.items():
        print(f"  {backend}: {count} images ({count/len(features_dict)*100:.1f}%)")
    
    return features_dict

def build_match_graph_multi_backend(features_dict, multi_backend_matcher, batch_size=50):
    """Build a graph representing image matches using multi-backend matching.
    
    Args:
        features_dict: Dictionary of image features
        multi_backend_matcher: Multi-backend matcher instance
        batch_size: Number of edges to process in each batch
        
    Returns:
        NetworkX graph where nodes are images and edges represent matches
    """
    # Create graph
    G = nx.Graph()
    
    # Add nodes for each image
    for img_name in features_dict.keys():
        G.add_node(img_name)
    
    # Create all image pairs for matching
    image_names = list(features_dict.keys())
    n = len(image_names)
    
    if n <= 1:  # Only proceed if we have at least 2 images
        return G
    
    # Create all pairs for processing
    pairs = [(i, j) for i in range(n) for j in range(i+1, n)]
    total_pairs = len(pairs)
    
    # Process in batches to avoid memory issues
    for batch_start in tqdm(range(0, total_pairs, batch_size), desc="Building match graph"):
        batch_end = min(batch_start + batch_size, total_pairs)
        batch_pairs = pairs[batch_start:batch_end]
        
        for i, j in batch_pairs:
            img1_name = image_names[i]
            img2_name = image_names[j]
            
            features1 = features_dict[img1_name]
            features2 = features_dict[img2_name]
            
            # Match features and verify
            matches, inlier_count, is_consistent = multi_backend_matcher.match(features1, features2)
            
            # Add edge if geometrically consistent
            if is_consistent:
                G.add_edge(img1_name, img2_name, weight=inlier_count, matches=matches)
        
        # Free memory after each batch
        if batch_end % (batch_size * 5) == 0:
            free_memory()
    
    return G

def process_test_dataset_multi_backend(dataset_path, dataset_name, training_structure, test_dataset_info, multi_backend_matcher):
    """Process a test dataset using multi-backend matching.
    
    Args:
        dataset_path: Path to test dataset
        dataset_name: Name of the dataset
        training_structure: Structure information from training data
        test_dataset_info: Additional analysis info for test datasets
        multi_backend_matcher: Multi-backend matcher instance
        
    Returns:
        Dictionary with scenes, outliers, and poses
    """
    print(f"  Processing dataset '{dataset_name}'...")
    
    # Extract features from test dataset
    print("  Extracting features...")
    features_dict = process_dataset_features_multi_backend(dataset_path, multi_backend_matcher)
    
    if not features_dict:
        print(f"  Warning: No features extracted from {dataset_name}")
        return {
            'scenes': [],
            'outliers': [img.name for img in dataset_path.glob('*.png')],
            'poses': {}
        }
    
    # Build match graph
    print("  Building match graph...")
    match_graph = build_match_graph_multi_backend(features_dict, multi_backend_matcher)
    
    print(f"  Match graph has {match_graph.number_of_nodes()} nodes and {match_graph.number_of_edges()} edges")
    
    # Get expected scene count from test_dataset_info if available
    expected_scene_count = None
    if dataset_name in test_dataset_info:
        expected_scene_count = test_dataset_info[dataset_name].get('expected_scene_count')
        print(f"  Using expected scene count from analysis: {expected_scene_count}")
    
    # Cluster into scenes
    print(f"  Clustering scenes...")
    try:
        if HAS_TORCH_GEOMETRIC and False:  # Temporarily disabled torch_geometric
            print("  Using GNN-based clustering...")
            scenes, outliers = cluster_scenes_with_deep_features(match_graph, expected_scene_count, device)
        else:
            print("  Using traditional clustering...")
            scenes, outliers = cluster_scenes_high_accuracy(match_graph, expected_scene_count)
    except Exception as e:
        print(f"  Clustering failed: {e}")
        traceback.print_exc()
        print("  Falling back to spectral clustering")
        scenes, outliers = cluster_scenes_high_accuracy(match_graph, expected_scene_count)
    
    print(f"  Identified {len(scenes)} scenes and {len(outliers)} outliers")
    
    # Estimate poses for each scene
    all_poses = {}
    
    for i, scene in enumerate(scenes):
        print(f"  Estimating poses for scene {i+1} ({len(scene)} images)...")
        scene_poses = estimate_poses_with_bundle_adjustment(scene, features_dict)
        
        # Count registered images
        registered_count = sum(1 for _, (R, T) in scene_poses.items() if R is not None and T is not None)
        print(f"  Registered {registered_count}/{len(scene)} images in scene {i+1}")
        
        all_poses.update(scene_poses)
        
        # Free memory
        free_memory()
    
    # Help garbage collection
    del features_dict
    del match_graph
    free_memory()
    
    return {
        'scenes': scenes,
        'outliers': outliers,
        'poses': all_poses
    }

# Test the extraction pipeline with a small sample of images
def test_feature_extraction_pipeline(multi_backend_matcher, test_dir=TEST_DIR, max_images=5):
    """Test the feature extraction pipeline on a few images."""
    print("\nTesting feature extraction pipeline...")
    
    # Find test datasets
    test_datasets = list(Path(test_dir).glob('*'))
    if not test_datasets:
        print("No test datasets found")
        return
    
    # Select first dataset
    test_dataset = test_datasets[0]
    print(f"Using test dataset: {test_dataset.name}")
    
    # Get some sample images
    image_files = list(test_dataset.glob('*.png'))
    if not image_files:
        print("No images found in test dataset")
        return
    
    # Limit to a few images
    sample_images = image_files[:min(max_images, len(image_files))]
    
    # Extract features from each image
    for img_path in sample_images:
        print(f"\nExtracting features from {img_path.name}...")
        features = multi_backend_matcher.extract(img_path)
        
        if features is not None:
            backend = features.get('backend', 'unknown')
            keypoint_count = len(features['keypoints'])
            descriptor_shape = features['descriptors'].shape if hasattr(features['descriptors'], 'shape') else None
            
            print(f"  Successfully extracted {keypoint_count} features using {backend}")
            print(f"  Descriptor shape: {descriptor_shape}")
        else:
            print(f"  Failed to extract features")
    
    # Try to match some pairs
    if len(sample_images) >= 2:
        img1_path = sample_images[0]
        img2_path = sample_images[1]
        
        print(f"\nMatching features between {img1_path.name} and {img2_path.name}...")
        features1 = multi_backend_matcher.extract(img1_path)
        features2 = multi_backend_matcher.extract(img2_path)
        
        if features1 is not None and features2 is not None:
            matches, inlier_count, is_consistent = multi_backend_matcher.match(features1, features2)
            
            if is_consistent:
                print(f"  Images are geometrically consistent with {inlier_count} inliers")
            else:
                print(f"  Images are not geometrically consistent")
        else:
            print(f"  Could not match features (extraction failed)")

# Test the pipeline
test_feature_extraction_pipeline(multi_backend_matcher)


Testing feature extraction pipeline...
Using test dataset: ETs

Extracting features from another_et_another_et004.png...
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1757 SIFT features from another_et_another_et004.png
Extracted features from another_et_another_et004.png using lightglue_sift backend
  Successfully extracted 1757 features using lightglue_sift
  Descriptor shape: (1757, 128)

Extracting features from outliers_out_et003.png...
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1557 SIFT features from outliers_out_et003.png
Extracted features from outliers_out_et003.png using lightglue_sift backend
  Successfully extracted 1557 features using lightglue_sift
  Descriptor shape: (1557, 128)

Extracting features from another_et_another_et006.png...
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <cl

Traceback (most recent call last):
  File "<ipython-input-15-9c89ef6c86d6>", line 234, in _match_descriptors
    similarity = desc1_norm @ desc2_norm.T
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1557 is different from 1757)


## Scene Clustering Algorithms

In [19]:
def cluster_scenes_high_accuracy(match_graph, expected_scene_count=None):
    """Cluster scenes using a high accuracy approach that combines spectral clustering,
    edge weight thresholds, and component analysis.
    
    Args:
        match_graph: NetworkX graph where nodes are images and edges represent matches
        expected_scene_count: Optional expected number of scenes
        
    Returns:
        Tuple of (scenes, outliers) where scenes is a list of image sets and outliers is a list of images
    """
    print(f"Graph has {match_graph.number_of_nodes()} nodes and {match_graph.number_of_edges()} edges")
    
    # Return empty result if the graph is too small
    if match_graph.number_of_nodes() <= 1:
        return [], list(match_graph.nodes())
    
    # Handle case with no edges (all outliers)
    if match_graph.number_of_edges() == 0:
        return [], list(match_graph.nodes())
    
    # Create a filtered graph with only strong edges
    strong_graph = nx.Graph()
    strong_graph.add_nodes_from(match_graph.nodes())
    
    # Determine strength threshold adaptively
    weights = [data['weight'] for _, _, data in match_graph.edges(data=True)]
    if not weights:
        return [], list(match_graph.nodes())
    
    weight_threshold = max(20, np.percentile(weights, 80))
    print(f"Using edge weight threshold of {weight_threshold}")
    
    # Add edges that meet the threshold
    for u, v, data in match_graph.edges(data=True):
        if data['weight'] >= weight_threshold:
            strong_graph.add_edge(u, v, **data)
    
    # First try: Get connected components from strong graph
    connected_components = list(nx.connected_components(strong_graph))
    
    # If we have a reasonable number of components, use them directly
    if expected_scene_count is not None and abs(len(connected_components) - expected_scene_count) <= 2:
        print(f"Using {len(connected_components)} connected components as scenes")
        large_components = []
        min_scene_size = 3  # Minimum images to consider a valid scene
        
        for comp in connected_components:
            if len(comp) >= min_scene_size:
                large_components.append(list(comp))
        
        # All nodes not in large components are outliers
        all_scene_nodes = set()
        for comp in large_components:
            all_scene_nodes.update(comp)
            
        outliers = [node for node in match_graph.nodes() if node not in all_scene_nodes]
        
        return large_components, outliers
    
    # Second try: Try spectral clustering if connected components isn't ideal
    if expected_scene_count is None:
        # Estimate number of clusters if not provided
        # Use heuristic: aim for scenes with 10-30 images based on training data
        avg_images_per_scene = 15
        k = max(2, match_graph.number_of_nodes() // avg_images_per_scene)
        print(f"Estimated number of scenes: {k}")
    else:
        k = expected_scene_count
        print(f"Using expected scene count: {k}")
    
    try:
        # Create a weighted adjacency matrix from the match graph
        nodes = list(match_graph.nodes())
        node_idx = {node: i for i, node in enumerate(nodes)}
        
        # Create weighted adjacency matrix
        n = len(nodes)
        adj_matrix = np.zeros((n, n))
        
        for u, v, data in match_graph.edges(data=True):
            i, j = node_idx[u], node_idx[v]
            weight = data['weight']
            # Higher weights mean stronger connections
            adj_matrix[i, j] = weight
            adj_matrix[j, i] = weight
        
        # Normalize adjacency matrix for better clustering
        row_sums = adj_matrix.sum(axis=1)
        row_sums[row_sums == 0] = 1  # Avoid division by zero
        adj_matrix_norm = adj_matrix / row_sums[:, np.newaxis]
        
        # Try spectral clustering
        spectral = SpectralClustering(
            n_clusters=k,
            affinity='precomputed',
            random_state=42,
            assign_labels='discretize',
            n_init=10
        )
        
        # Fit the model and get cluster labels
        cluster_labels = spectral.fit_predict(adj_matrix_norm)
        
        # Organize nodes by cluster
        clusters = [[] for _ in range(k)]
        for i, label in enumerate(cluster_labels):
            clusters[label].append(nodes[i])
        
        # Filter out clusters that are too small (likely outliers)
        min_cluster_size = 3
        large_clusters = [cluster for cluster in clusters if len(cluster) >= min_cluster_size]
        
        # Collect outliers (small clusters and isolated nodes)
        outlier_set = set()
        
        # Add isolated nodes from match_graph
        for node in match_graph.nodes():
            if match_graph.degree(node) == 0:
                outlier_set.add(node)
                
        # Add nodes from small clusters
        for cluster in clusters:
            if len(cluster) < min_cluster_size:
                outlier_set.update(cluster)
        
        # Final validation: verify clusters are connected
        verified_clusters = []
        
        for cluster in large_clusters:
            # Create a subgraph for this cluster
            subg = match_graph.subgraph(cluster)
            
            # Get connected components in the subgraph
            subg_components = list(nx.connected_components(subg))
            
            # Add each component as a separate cluster
            for comp in subg_components:
                if len(comp) >= min_cluster_size:
                    verified_clusters.append(list(comp))
                else:
                    # Components that are too small become outliers
                    outlier_set.update(comp)
        
        outliers = list(outlier_set)
        print(f"Spectral clustering found {len(verified_clusters)} scenes and {len(outliers)} outliers")
        
        return verified_clusters, outliers
        
    except Exception as e:
        print(f"Spectral clustering failed: {e}")
        traceback.print_exc()
        
        # Fallback to DBSCAN if spectral clustering fails
        print("Falling back to DBSCAN clustering")
        
        # Try DBSCAN clustering
        adjacency_list = []
        for u, v, data in match_graph.edges(data=True):
            i, j = node_idx[u], node_idx[v]
            weight = 1.0 / (data['weight'] + 1e-5)  # Convert to distance (lower is better)
            adjacency_list.append((i, j, weight))
            adjacency_list.append((j, i, weight))
        
        # Create sparse matrix
        sparse_matrix = lil_matrix((n, n))
        for i, j, weight in adjacency_list:
            sparse_matrix[i, j] = weight
        
        # Use DBSCAN for clustering
        eps = 0.5  # Distance threshold
        min_samples = 3  # Minimum cluster size
        
        db = DBSCAN(eps=eps, min_samples=min_samples, metric='precomputed')
        db_labels = db.fit_predict(sparse_matrix.tocsr())
        
        # Organize nodes by cluster
        db_clusters = {}
        for i, label in enumerate(db_labels):
            if label != -1:  # -1 indicates noise points
                if label not in db_clusters:
                    db_clusters[label] = []
                db_clusters[label].append(nodes[i])
        
        # Get outliers (noise points)
        db_outliers = [nodes[i] for i, label in enumerate(db_labels) if label == -1]
        
        # Convert to list of lists format
        db_clusters_list = list(db_clusters.values())
        
        print(f"DBSCAN found {len(db_clusters_list)} scenes and {len(db_outliers)} outliers")
        
        return db_clusters_list, db_outliers

# If torch_geometric is available, implement the GNN-based clustering approach
if HAS_TORCH_GEOMETRIC:
    # Define a GNN Model for node classification
    class GCNSceneClassifier(nn.Module):
        """Graph Convolutional Network for scene classification."""
        
        def __init__(self, num_node_features, hidden_dim=64, num_classes=10):
            """Initialize GCN for scene classification.
            
            Args:
                num_node_features: Number of input features per node
                hidden_dim: Hidden dimension size
                num_classes: Number of output classes
            """
            super(GCNSceneClassifier, self).__init__()
            self.conv1 = GCNConv(num_node_features, hidden_dim)
            self.conv2 = GCNConv(hidden_dim, hidden_dim)
            self.conv3 = GCNConv(hidden_dim, num_classes)
            
            self.dropout = nn.Dropout(0.3)
            self.relu = nn.ReLU()
        
        def forward(self, x, edge_index, edge_weight=None):
            """Forward pass through the GCN.
            
            Args:
                x: Node features tensor
                edge_index: Graph connectivity in COO format
                edge_weight: Edge weights
                
            Returns:
                Node embeddings tensor
            """
            # First GCN layer
            x = self.conv1(x, edge_index, edge_weight)
            x = self.relu(x)
            x = self.dropout(x)
            
            # Second GCN layer
            x = self.conv2(x, edge_index, edge_weight)
            x = self.relu(x)
            x = self.dropout(x)
            
            # Final layer
            x = self.conv3(x, edge_index, edge_weight)
            
            return x
    
    def cluster_scenes_with_deep_features(match_graph, expected_scene_count=None, device=None):
        """Cluster scenes using a Graph Neural Network approach.
        
        Args:
            match_graph: NetworkX graph where nodes are images and edges represent matches
            expected_scene_count: Optional expected number of scenes
            device: Torch device
            
        Returns:
            Tuple of (scenes, outliers) where scenes is a list of image sets and outliers is a list of images
        """
        if device is None:
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        print(f"Using GNN-based scene clustering on device: {device}")
        
        # Return empty result if the graph is too small
        if match_graph.number_of_nodes() <= 1:
            return [], list(match_graph.nodes())
        
        # Handle case with no edges (all outliers)
        if match_graph.number_of_edges() == 0:
            return [], list(match_graph.nodes())
        
        try:
            # Create node features
            nodes = list(match_graph.nodes())
            node_idx = {node: i for i, node in enumerate(nodes)}
            
            # Create node features matrix
            num_nodes = len(nodes)
            
            # Use simple features like node degree and edge weights
            node_degrees = np.array([match_graph.degree(node) for node in nodes])
            node_weighted_degrees = np.array([
                sum(data['weight'] for _, _, data in match_graph.edges(node, data=True)) 
                for node in nodes
            ])
            
            # Normalize features
            node_degrees_norm = node_degrees / (np.max(node_degrees) + 1e-8)
            node_weighted_degrees_norm = node_weighted_degrees / (np.max(node_weighted_degrees) + 1e-8)
            
            # Concatenate features
            node_features = np.column_stack([
                node_degrees_norm.reshape(-1, 1),
                node_weighted_degrees_norm.reshape(-1, 1)
            ])
            
            # Create edge index and weights
            edge_index = []
            edge_weights = []
            
            for u, v, data in match_graph.edges(data=True):
                # Convert to indices
                i, j = node_idx[u], node_idx[v]
                
                # Add edge in both directions
                edge_index.append([i, j])
                edge_index.append([j, i])
                
                # Add edge weights
                weight = data['weight']
                edge_weights.append(weight)
                edge_weights.append(weight)
            
            # Convert to PyTorch tensors
            edge_index = torch.tensor(edge_index, dtype=torch.long).t().to(device)
            edge_weights = torch.tensor(edge_weights, dtype=torch.float).to(device)
            node_features = torch.tensor(node_features, dtype=torch.float).to(device)
            
            # Determine number of clusters
            if expected_scene_count is None:
                # Estimate number of clusters if not provided
                avg_images_per_scene = 15
                k = max(2, len(nodes) // avg_images_per_scene)
                print(f"Estimated number of scenes: {k}")
            else:
                k = expected_scene_count
                print(f"Using expected scene count: {k}")
            
            # Create PyTorch Geometric Data object
            data = Data(
                x=node_features,
                edge_index=edge_index,
                edge_attr=edge_weights
            )
            
            # Train the GNN model to generate node embeddings
            model = GCNSceneClassifier(
                num_node_features=node_features.shape[1],
                hidden_dim=64,
                num_classes=k
            ).to(device)
            
            # Set model to evaluation mode (no need for actual training in this case)
            model.eval()
            
            # Generate node embeddings
            with torch.no_grad():
                node_embeddings = model(data.x, data.edge_index, data.edge_attr)
            
            # Convert embeddings to numpy for clustering
            embeddings_np = node_embeddings.cpu().numpy()
            
            # Apply K-means clustering on the embeddings
            from sklearn.cluster import KMeans
            kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
            cluster_labels = kmeans.fit_predict(embeddings_np)
            
            # Organize nodes by cluster
            clusters = [[] for _ in range(k)]
            for i, label in enumerate(cluster_labels):
                clusters[label].append(nodes[i])
            
            # Filter out clusters that are too small (likely outliers)
            min_cluster_size = 3
            large_clusters = [cluster for cluster in clusters if len(cluster) >= min_cluster_size]
            
            # Identify outliers
            outlier_set = set()
            
            # Add nodes from small clusters
            for cluster in clusters:
                if len(cluster) < min_cluster_size:
                    outlier_set.update(cluster)
            
            # Final validation: verify clusters are connected
            verified_clusters = []
            
            for cluster in large_clusters:
                # Create a subgraph for this cluster
                subg = match_graph.subgraph(cluster)
                
                # Get connected components in the subgraph
                subg_components = list(nx.connected_components(subg))
                
                # Add each component as a separate cluster
                for comp in subg_components:
                    if len(comp) >= min_cluster_size:
                        verified_clusters.append(list(comp))
                    else:
                        # Components that are too small become outliers
                        outlier_set.update(comp)
            
            outliers = list(outlier_set)
            print(f"GNN-based clustering found {len(verified_clusters)} scenes and {len(outliers)} outliers")
            
            return verified_clusters, outliers
            
        except Exception as e:
            print(f"GNN-based clustering failed: {e}")
            traceback.print_exc()
            
            # Fall back to high-accuracy clustering
            print("Falling back to traditional clustering")
            return cluster_scenes_high_accuracy(match_graph, expected_scene_count)

# Test the clustering functions
def test_scene_clustering():
    """Test function to verify the scene clustering algorithms."""
    print("\nTesting scene clustering algorithms...")
    
    # Create a test graph with two distinct clusters
    G = nx.Graph()
    
    # Add two clusters
    # Cluster 1: images 1-10
    for i in range(1, 11):
        for j in range(i+1, 11):
            G.add_edge(f"img{i}.png", f"img{j}.png", weight=100)
    
    # Cluster 2: images 11-20
    for i in range(11, 21):
        for j in range(i+1, 21):
            G.add_edge(f"img{i}.png", f"img{j}.png", weight=100)
    
    # Add a few weak connections between clusters
    G.add_edge("img5.png", "img15.png", weight=10)
    G.add_edge("img8.png", "img12.png", weight=15)
    
    # Add some outlier nodes with weak connections
    G.add_node("outlier1.png")
    G.add_node("outlier2.png")
    G.add_edge("outlier3.png", "img3.png", weight=5)
    G.add_edge("outlier4.png", "img18.png", weight=5)
    
    # Try clustering with unknown scene count
    print("\nClustering with unknown scene count:")
    scenes, outliers = cluster_scenes_high_accuracy(G)
    
    print(f"Identified {len(scenes)} scenes and {len(outliers)} outliers")
    for i, scene in enumerate(scenes):
        print(f"Scene {i+1}: {len(scene)} images")
    
    # Try clustering with known scene count
    print("\nClustering with known scene count (2):")
    scenes, outliers = cluster_scenes_high_accuracy(G, 2)
    
    print(f"Identified {len(scenes)} scenes and {len(outliers)} outliers")
    for i, scene in enumerate(scenes):
        print(f"Scene {i+1}: {len(scene)} images")
    
    # Test with GNN-based clustering if available
    if HAS_TORCH_GEOMETRIC:
        print("\nTesting GNN-based clustering with known scene count (2):")
        scenes, outliers = cluster_scenes_with_deep_features(G, 2, device)
        
        print(f"Identified {len(scenes)} scenes and {len(outliers)} outliers")
        for i, scene in enumerate(scenes):
            print(f"Scene {i+1}: {len(scene)} images")

# Test scene clustering
test_scene_clustering()


Testing scene clustering algorithms...

Clustering with unknown scene count:
Graph has 24 nodes and 94 edges
Using edge weight threshold of 100.0
Estimated number of scenes: 2
Spectral clustering found 2 scenes and 2 outliers
Identified 2 scenes and 2 outliers
Scene 1: 11 images
Scene 2: 11 images

Clustering with known scene count (2):
Graph has 24 nodes and 94 edges
Using edge weight threshold of 100.0
Using expected scene count: 2
Spectral clustering found 2 scenes and 2 outliers
Identified 2 scenes and 2 outliers
Scene 1: 11 images
Scene 2: 11 images


## Pose Estimation with Bundle Adjustment

In [20]:
def estimate_relative_pose(features1, features2):
    """Estimate relative pose between two cameras.
    
    Args:
        features1: Features from the first image
        features2: Features from the second image
        
    Returns:
        Tuple of (E, R, T) where E is essential matrix, R is rotation, T is translation
    """
    try:
        # Extract and match keypoints
        is_keypoint_object = hasattr(features1['keypoints'][0], 'pt') if len(features1['keypoints']) > 0 else False
        
        if is_keypoint_object:
            # CV2 keypoint objects
            kp1 = np.float32([kp.pt for kp in features1['keypoints']])
            kp2 = np.float32([kp.pt for kp in features2['keypoints']])
        else:
            # Raw keypoint arrays
            kp1 = np.float32(features1['keypoints'])
            kp2 = np.float32(features2['keypoints'])
        
        # Get image dimensions
        dims1 = features1['dimensions']
        dims2 = features2['dimensions']
        
        # Estimate focal length and principal point
        focal1 = max(dims1[:2]) / 2
        focal2 = max(dims2[:2]) / 2
        
        pp1 = (dims1[1]/2, dims1[0]/2) if len(dims1) >= 2 else (dims1[0]/2, dims1[0]/2)
        pp2 = (dims2[1]/2, dims2[0]/2) if len(dims2) >= 2 else (dims2[0]/2, dims2[0]/2)
        
        # Match features directly
        desc1 = features1['descriptors']
        desc2 = features2['descriptors']
        
        # Fix descriptor orientation if necessary
        if desc1.shape[0] == 256 and desc1.shape[1] != 256:
            # Transpose from (D×N) to (N×D)
            desc1 = desc1.T
            desc2 = desc2.T
        
        # Normalize descriptors for matching
        desc1_norm = desc1 / (np.linalg.norm(desc1, axis=1, keepdims=True) + 1e-8)
        desc2_norm = desc2 / (np.linalg.norm(desc2, axis=1, keepdims=True) + 1e-8)
        
        # Compute distance matrix
        distances = 1.0 - desc1_norm @ desc2_norm.T
        
        # Apply ratio test for matching
        matches = []
        for i in range(distances.shape[0]):
            dist = distances[i, :]
            idx = np.argsort(dist)
            
            # Get best and second best matches
            best_idx = idx[0]
            second_best_idx = idx[1] if len(idx) > 1 else best_idx
            
            # Apply ratio test
            if dist[best_idx] < 0.8 * dist[second_best_idx]:
                matches.append(cv2.DMatch(i, best_idx, dist[best_idx]))
        
        # Extract matched points
        if len(matches) < 8:
            return None, None, None
        
        src_pts = np.float32([kp1[m.queryIdx] for m in matches])
        dst_pts = np.float32([kp2[m.trainIdx] for m in matches])
        
        # Compute essential matrix
        E, mask = cv2.findEssentialMat(
            src_pts, dst_pts, 
            focal=focal1, pp=pp1,
            method=cv2.RANSAC, 
            prob=0.999, 
            threshold=1.0
        )
        
        if E is None or E.shape != (3, 3):
            return None, None, None
        
        # Recover relative pose
        _, R, T, mask = cv2.recoverPose(E, src_pts, dst_pts, focal=focal1, pp=pp1)
        
        return E, R, T
    except Exception as e:
        print(f"Error estimating relative pose: {e}")
        traceback.print_exc()
        return None, None, None

def estimate_poses_with_bundle_adjustment(scene_images, features_dict):
    """Estimate camera poses for a scene using robust SfM and bundle adjustment.
    
    Args:
        scene_images: List of image names in the scene
        features_dict: Dictionary of image features
        
    Returns:
        Dictionary mapping image names to (R, T) poses
    """
    # Return empty result if we have too few images
    if len(scene_images) < 2:
        return {img: (None, None) for img in scene_images}
    
    # Build match graph for this scene
    scene_graph = nx.Graph()
    for img in scene_images:
        scene_graph.add_node(img)
    
    # Add edges for pairs with valid features
    for i, img1 in enumerate(scene_images):
        if img1 not in features_dict:
            continue
            
        for j in range(i + 1, len(scene_images)):
            img2 = scene_images[j]
            if img2 not in features_dict:
                continue
                
            features1 = features_dict[img1]
            features2 = features_dict[img2]
            
            # Skip if features are invalid
            if (features1 is None or features2 is None or 
                'keypoints' not in features1 or 'keypoints' not in features2 or
                len(features1['keypoints']) == 0 or len(features2['keypoints']) == 0):
                continue
            
            # Determine backend
            backend1 = features1.get('backend', 'unknown')
            backend2 = features2.get('backend', 'unknown')
            
            # Skip if backends are incompatible
            if backend1 != backend2 and not (backend1.startswith('lightglue') and backend2.startswith('lightglue')):
                # Can only match features with same backend or both lightglue
                continue
            
            # Check if match already stored
            if scene_graph.has_edge(img1, img2):
                continue
                
            # Estimate relative pose
            E, R, T = estimate_relative_pose(features1, features2)
            
            # Add edge if geometrically consistent
            if R is not None and T is not None:
                scene_graph.add_edge(img1, img2, E=E, R=R, T=T)
    
    # Check if we have enough edges to reconstruct
    if scene_graph.number_of_edges() < len(scene_images) - 1:
        print(f"Warning: Not enough connected edges for robust reconstruction ({scene_graph.number_of_edges()} < {len(scene_images) - 1})")
    
    # Use spanning tree for initial pose estimates
    poses = {}
    
    # Find maximum spanning tree based on number of inliers
    mst = nx.maximum_spanning_tree(scene_graph)
    
    # Choose first camera as reference
    reference_camera = scene_images[0]
    poses[reference_camera] = (np.eye(3), np.zeros((3, 1)))
    
    # Perform breadth-first traversal to propagate poses
    traversal_queue = [reference_camera]
    visited = set([reference_camera])
    
    while traversal_queue:
        current = traversal_queue.pop(0)
        
        # Process neighbors
        for neighbor in mst.neighbors(current):
            if neighbor in visited:
                continue
                
            # Mark as visited and add to queue
            visited.add(neighbor)
            traversal_queue.append(neighbor)
            
            # Get relative pose
            if mst.has_edge(current, neighbor):
                # current -> neighbor edge
                R_rel = mst[current][neighbor]['R']
                T_rel = mst[current][neighbor]['T']
                
                # Get current camera pose
                R_curr, T_curr = poses[current]
                
                # Compute absolute pose of neighbor
                R_abs = R_rel @ R_curr
                T_abs = R_rel @ T_curr + T_rel
                
                # Store pose
                poses[neighbor] = (R_abs, T_abs)
            else:
                # neighbor -> current edge (need to invert)
                R_rel = mst[neighbor][current]['R']
                T_rel = mst[neighbor][current]['T']
                
                # Invert relative pose
                R_rel_inv = R_rel.T
                T_rel_inv = -R_rel_inv @ T_rel
                
                # Get current camera pose
                R_curr, T_curr = poses[current]
                
                # Compute absolute pose of neighbor
                R_abs = R_rel_inv @ R_curr
                T_abs = R_rel_inv @ T_curr + T_rel_inv
                
                # Store pose
                poses[neighbor] = (R_abs, T_abs)
    
    # Add missing images with null poses
    for img in scene_images:
        if img not in poses:
            poses[img] = (None, None)
    
    # TODO: Add bundle adjustment for further refinement
    # This would be done using sparse bundle adjustment
    # For now, we return the poses from spanning tree
    
    return poses

def estimate_poses_batch(batched_scenes, features_dict):
    """Estimate poses for batched scenes.
    
    Args:
        batched_scenes: List of scenes, where each scene is a list of image names
        features_dict: Dictionary of image features
        
    Returns:
        Dictionary mapping image names to (R, T) poses
    """
    all_poses = {}
    
    # Process each scene
    for i, scene in enumerate(batched_scenes):
        print(f"Processing scene {i+1}/{len(batched_scenes)} ({len(scene)} images)")
        scene_poses = estimate_poses_with_bundle_adjustment(scene, features_dict)
        all_poses.update(scene_poses)
    
    return all_poses

def test_pose_estimation():
    """Test function for pose estimation."""
    print("\nTesting pose estimation...")
    
    # Create dummy features for testing
    features_dict = {}
    
    # Create 5 images in a sequence
    for i in range(5):
        img_name = f"img{i}.png"
        
        # Create dummy features
        keypoints = np.random.rand(100, 2)
        descriptors = np.random.rand(100, 128)
        
        features_dict[img_name] = {
            'keypoints': keypoints,
            'descriptors': descriptors,
            'dimensions': (480, 640),
            'backend': 'test'
        }
    
    # Dummy scene
    scene = [f"img{i}.png" for i in range(5)]
    
    # Test pose estimation
    poses = estimate_poses_with_bundle_adjustment(scene, features_dict)
    
    # Print results
    print(f"Estimated poses for {len(poses)} images")
    registered_count = sum(1 for _, (R, T) in poses.items() if R is not None and T is not None)
    print(f"Successfully registered {registered_count}/{len(scene)} images")

# Test pose estimation
test_pose_estimation()


Testing pose estimation...
Estimated poses for 5 images
Successfully registered 1/5 images


## Main Processing Pipeline

In [22]:
def format_and_save_submission(results, output_path):
    """Format results and save to submission file.
    
    Args:
        results: Dictionary mapping dataset names to scene assignments and poses
        output_path: Path to save submission file
    """
    # Create output dataframe
    submission = []
    
    for dataset_name, result in results.items():
        scenes = result['scenes']
        outliers = result['outliers']
        poses = result['poses']
        
        # Add each assigned scene to submission
        for scene_idx, scene in enumerate(scenes):
            for image in scene:
                R, T = poses.get(image, (None, None))
                
                if R is not None and T is not None:
                    # Format rotation matrix as string
                    R_str = ';'.join([str(float(r)) for r in R.flatten()])
                    
                    # Format translation vector as string
                    T_str = ';'.join([str(float(t)) for t in T.flatten()])
                else:
                    # Image is part of a scene but not registered
                    R_str = ';'.join(['nan'] * 9)
                    T_str = ';'.join(['nan'] * 3)
                
                # Add to submission
                submission.append({
                    'dataset': dataset_name,
                    'scene': f'cluster{scene_idx+1}',
                    'image': image,
                    'rotation_matrix': R_str,
                    'translation_vector': T_str
                })
        
        # Add outliers to submission
        for image in outliers:
            # Use nan values for outliers
            R_str = ';'.join(['nan'] * 9)
            T_str = ';'.join(['nan'] * 3)
            
            # Add to submission
            submission.append({
                'dataset': dataset_name,
                'scene': 'outliers',
                'image': image,
                'rotation_matrix': R_str,
                'translation_vector': T_str
            })
    
    # Convert to dataframe and save
    df = pd.DataFrame(submission)
    
    # Make sure all required columns are present
    required_columns = ['dataset', 'scene', 'image', 'rotation_matrix', 'translation_vector']
    for col in required_columns:
        if col not in df.columns:
            df[col] = 'nan'
    
    # Ensure columns are in the right order
    df = df[required_columns]
    
    # Save to CSV
    df.to_csv(output_path, index=False)
    print(f"Saved submission to {output_path}")
    
    # Print submission statistics
    total_images = len(df)
    outliers = len(df[df['scene'] == 'outliers'])
    
    print(f"Submission statistics:")
    print(f"  Total images: {total_images}")
    print(f"  Outliers: {outliers} ({outliers/total_images*100:.1f}%)")
    print(f"  Registered images: {len(df[df['rotation_matrix'] != ';'.join(['nan']*9)])}")
    
    # Count unique scenes
    scene_counts = df.groupby('dataset')['scene'].nunique()
    print(f"  Average scenes per dataset: {scene_counts.mean():.2f}")
    
    return df

def process_all_test_datasets(test_dir, multi_backend_matcher, training_structure, test_dataset_info):
    """Process all test datasets.
    
    Args:
        test_dir: Path to test directory
        multi_backend_matcher: Multi-backend matcher instance
        training_structure: Training structure information
        test_dataset_info: Test dataset analysis info
        
    Returns:
        Dictionary with results for all datasets
    """
    # Find all test datasets
    test_datasets = {p.name: p for p in Path(test_dir).glob('*') if p.is_dir()}
    
    if not test_datasets:
        print("No test datasets found!")
        return {}
    
    print(f"Found {len(test_datasets)} test datasets")
    
    # Process each dataset
    results = {}
    
    for dataset_name, dataset_path in test_datasets.items():
        print(f"\nProcessing test dataset '{dataset_name}'...")
        
        # Skip non-directory items
        if not dataset_path.is_dir():
            continue
            
        # Process dataset
        result = process_test_dataset_multi_backend(
            dataset_path, 
            dataset_name, 
            training_structure, 
            test_dataset_info, 
            multi_backend_matcher
        )
        
        # Store result
        results[dataset_name] = result
        
        # Free memory
        free_memory()
    
    return results

def run_full_pipeline():
    """Run the full processing pipeline."""
    print("\nStarting full processing pipeline...")
    
    # Initialize the multi-backend matcher
    print("Initializing multi-backend matcher...")
    matcher = MultiBackendMatcher(FEATURE_PARAMS, MATCHING_PARAMS)
    
    # Process all test datasets
    results = process_all_test_datasets(
        TEST_DIR,
        matcher,
        training_structure,
        test_dataset_info
    )
    
    # Generate submission file
    print("\nGenerating submission file...")
    submission_df = format_and_save_submission(results, OUTPUT_FILE)
    
    print("\nProcessing pipeline complete!")
    return submission_df

def fix_multi_backend_matcher():
    """Fix the multi-backend matcher to address descriptor orientation issues."""
    print("\nFixing MultiBackendMatcher._match_descriptors method...")
    
    # Monkey patch the _match_descriptors method to handle descriptor orientation issues
    def fixed_match_descriptors(self, desc1, desc2, backend1=None, backend2=None):
        """Match descriptors using cosine similarity and ratio test.
        This is a safer generic method that works with any descriptor format.
        
        Args:
            desc1, desc2: Feature descriptors as numpy arrays
            backend1, backend2: Optional backend names for debugging
            
        Returns:
            List of cv2.DMatch objects
        """
        try:
            # Print original descriptor shapes
            print(f"Original descriptor shapes - desc1: {desc1.shape}, desc2: {desc2.shape}")
            
            # Fix descriptor orientation to ensure they are in (N, D) format
            # This is crucial for proper matrix multiplication later
            
            # Handle SuperPoint descriptors (256, N)
            if backend1 == 'superglue' or backend2 == 'superglue':
                # SuperPoint descriptors are typically (256, N)
                if desc1.shape[0] == 256 and desc1.shape[1] != 256:
                    # Transpose from (256, N) to (N, 256)
                    desc1 = desc1.T
                if desc2.shape[0] == 256 and desc2.shape[1] != 256:
                    # Transpose from (256, N) to (N, 256)
                    desc2 = desc2.T
                print(f"Transposed SuperPoint descriptors to: {desc1.shape} and {desc2.shape}")
            
            # Handle SIFT descriptors which might be transposed
            elif backend1 == 'lightglue_sift' or backend2 == 'lightglue_sift':
                # Check if descriptors are in (D, N) format instead of (N, D)
                if desc1.shape[0] < desc1.shape[1]:
                    # Already in (N, D) format
                    pass
                else:
                    # Transpose from (D, N) to (N, D)
                    desc1 = desc1.T
                    desc2 = desc2.T
                    print(f"Transposed SIFT descriptors to: {desc1.shape} and {desc2.shape}")
            
            # Handle 1D descriptors (reshape to 2D)
            elif hasattr(desc1, 'shape') and len(desc1.shape) == 1:
                desc1 = desc1.reshape(1, -1)
                desc2 = desc2.reshape(1, -1)
                print(f"Reshaped 1D descriptors to: {desc1.shape} and {desc2.shape}")
            
            # Handle list descriptors
            elif isinstance(desc1, list):
                desc1 = np.array(desc1)
                desc2 = np.array(desc2)
                print(f"Converted list descriptors to arrays: {desc1.shape} and {desc2.shape}")
            
            # Ensure we have 2D arrays with features as rows
            if len(desc1.shape) != 2 or len(desc2.shape) != 2:
                print(f"Unexpected descriptor shapes: {desc1.shape} and {desc2.shape}")
                # Try to reshape if possible
                if len(desc1.shape) > 2:
                    desc1 = desc1.reshape(desc1.shape[0], -1)
                if len(desc2.shape) > 2:
                    desc2 = desc2.reshape(desc2.shape[0], -1)
            
            # Print final descriptor shapes
            print(f"Final descriptor shapes for matching - desc1: {desc1.shape}, desc2: {desc2.shape}")
            
            # Normalize descriptors for cosine similarity
            desc1_norm = np.copy(desc1)
            desc2_norm = np.copy(desc2)
            
            # Normalize rows
            desc1_norms = np.linalg.norm(desc1_norm, axis=1, keepdims=True) + 1e-10
            desc2_norms = np.linalg.norm(desc2_norm, axis=1, keepdims=True) + 1e-10
            
            desc1_norm = desc1_norm / desc1_norms
            desc2_norm = desc2_norm / desc2_norms
            
            # Create distance matrix (avoid matmul issues)
            distances = np.zeros((desc1_norm.shape[0], desc2_norm.shape[0]))
            
            # Compute pairwise distances manually to avoid matmul issues
            # This is an alternative to the matrix multiplication approach
            for i in range(desc1_norm.shape[0]):
                for j in range(desc2_norm.shape[0]):
                    # Cosine similarity
                    dot_product = np.dot(desc1_norm[i], desc2_norm[j])
                    # Convert to distance (1 - similarity)
                    distances[i, j] = 1.0 - dot_product
            
            # Apply ratio test
            good_matches = []
            for i in range(distances.shape[0]):
                # Get distances for this descriptor
                dist = distances[i]
                
                # Find best match (lowest distance)
                best_idx = np.argmin(dist)
                best_dist = dist[best_idx]
                
                # Find second best match
                dist_copy = dist.copy()
                dist_copy[best_idx] = float('inf')
                second_best_idx = np.argmin(dist_copy)
                second_best_dist = dist_copy[second_best_idx]
                
                # Apply ratio test (lower ratio means stricter test)
                if best_dist < 0.8 * second_best_dist:
                    m = cv2.DMatch()
                    m.queryIdx = i
                    m.trainIdx = best_idx
                    m.distance = best_dist
                    good_matches.append(m)
            
            print(f"Generic descriptor matcher: Found {len(good_matches)} matches")
            return good_matches
        except Exception as e:
            print(f"Error in generic descriptor matching: {e}")
            traceback.print_exc()
            return []
    
    # Apply the patch to the MultiBackendMatcher instance
    MultiBackendMatcher._match_descriptors = fixed_match_descriptors
    
    print("Matcher fixed successfully!")

# Fix the multi-backend matcher
fix_multi_backend_matcher()

# Run a small test of the pipeline
def test_pipeline():
    """Run a small test of the pipeline."""
    print("\nRunning small pipeline test...")
    
    # Initialize matcher
    matcher = MultiBackendMatcher(FEATURE_PARAMS, MATCHING_PARAMS)
    
    # Test feature extraction and matching
    test_feature_extraction_pipeline(matcher, max_images=3)
    
    # Test clustering
    test_scene_clustering()
    
    # Test pose estimation
    test_pose_estimation()
    
    print("\nPipeline test complete!")

# Pipeline test
test_pipeline()

# Run full pipeline
run_full_pipeline()


Fixing MultiBackendMatcher._match_descriptors method...
Matcher fixed successfully!

Running small pipeline test...
Initializing Custom DISK feature extractor...
DISK feature extractor initialized successfully
LightGlue with DISK features is available
Initializing Custom SIFT feature extractor...
SIFT feature extractor initialized successfully
LightGlue with SIFT features is available
Loaded SuperPoint model
Loading SuperPoint model from /kaggle/working/superglue_models/weights/superpoint_v1.pth
SuperPoint model loaded successfully
Loaded SuperGlue model ("outdoor" weights)
Loading SuperGlue model from /kaggle/working/superglue_models/weights/outdoor.pth
SuperGlue model loaded successfully
SuperPoint/SuperGlue is available
Traditional SIFT is available as fallback
Using lightglue_disk as primary backend
Available backends (in order): lightglue_disk, lightglue_sift, superglue, sift

Testing feature extraction pipeline...
Using test dataset: ETs

Extracting features from another_et_anot

Traceback (most recent call last):
  File "<ipython-input-15-9c89ef6c86d6>", line 234, in _match_descriptors
    similarity = desc1_norm @ desc2_norm.T
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1557 is different from 1757)


SuperGlue model loaded successfully
SuperPoint/SuperGlue is available
Traditional SIFT is available as fallback
Using lightglue_disk as primary backend
Available backends (in order): lightglue_disk, lightglue_sift, superglue, sift
Found 2 test datasets

Processing test dataset 'ETs'...
  Processing dataset 'ETs'...
  Extracting features...


Extracting features:   5%|▍         | 1/22 [00:04<01:29,  4.27s/it]

DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1557 SIFT features from outliers_out_et003.png
Extracted features from outliers_out_et003.png using lightglue_sift backend
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1757 SIFT features from another_et_another_et004.png
Extracted features from another_et_another_et004.png using lightglue_sift backend


Extracting features:   9%|▉         | 2/22 [00:04<00:42,  2.12s/it]

DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1202 SIFT features from another_et_another_et009.png
Extracted features from another_et_another_et009.png using lightglue_sift backend


Extracting features:  14%|█▎        | 3/22 [00:05<00:27,  1.47s/it]

DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1587 SIFT features from another_et_another_et006.png
Extracted features from another_et_another_et006.png using lightglue_sift backend


Extracting features:  23%|██▎       | 5/22 [00:06<00:11,  1.44it/s]

Extracted 2135 SIFT features from et_et004.png
Extracted features from et_et004.png using lightglue_sift backend


Extracting features:  27%|██▋       | 6/22 [00:08<00:20,  1.28s/it]

DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 648 SIFT features from outliers_out_et001.png
Extracted features from outliers_out_et001.png using lightglue_sift backend
Extracted 1355 SIFT features from et_et002.png
Extracted features from et_et002.png using lightglue_sift backend
DISK extractor response format unexpected, parsing manually...DISK extractor response format unexpected, parsing manually...

Cannot parse DISK response: <class 'list'>
Cannot parse DISK response: <class 'list'>


Extracting features:  41%|████      | 9/22 [00:09<00:08,  1.61it/s]

Extracted 1452 SIFT features from et_et008.png
Extracted features from et_et008.png using lightglue_sift backend
Extracted 1701 SIFT features from another_et_another_et008.png
Extracted features from another_et_another_et008.png using lightglue_sift backend
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>


Extracting features:  50%|█████     | 11/22 [00:12<00:09,  1.11it/s]

Extracted 1908 SIFT features from another_et_another_et005.png
Extracted features from another_et_another_et005.png using lightglue_sift backend
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1245 SIFT features from et_et006.png
Extracted features from et_et006.png using lightglue_sift backend


Extracting features:  55%|█████▍    | 12/22 [00:12<00:06,  1.47it/s]

Extracted 1729 SIFT features from another_et_another_et003.png
Extracted features from another_et_another_et003.png using lightglue_sift backend
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 2034 SIFT features from another_et_another_et002.png
Extracted features from another_et_another_et002.png using lightglue_sift backend


Extracting features:  64%|██████▎   | 14/22 [00:12<00:07,  1.11it/s]

DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1328 SIFT features from et_et007.png
Extracted features from et_et007.png using lightglue_sift backend
Extracted 1679 SIFT features from et_et000.png
Extracted features from et_et000.png using lightglue_sift backend





DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1103 SIFT features from another_et_another_et010.png
Extracted features from another_et_another_et010.png using lightglue_sift backend
Extracted 1422 SIFT features from et_et001.png
Extracted features from et_et001.png using lightglue_sift backend
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
DISK extractor response format unexpected, parsing manually...
Cannot parse DISK response: <class 'list'>
Extracted 1071 SIFT features from et_et005.png
Extracted features from et_et005.png using lightglue_sift backend
Extracted 2211 SIFT features from another_et_another_et001.png
Extracted features from another_et_another_et001.png using lightglue_sift backend
DISK extractor response format unexpected, parsing manually.

KeyboardInterrupt: 

## Visualize and Evaluate Results

In [None]:
def visualize_scene_clusters(dataset_name, dataset_results, output_dir=None):
    """Visualize scene clusters with sample images.
    
    Args:
        dataset_name: Name of the dataset
        dataset_results: Results dictionary with scenes and outliers
        output_dir: Optional directory to save visualizations
    """
    import matplotlib.pyplot as plt
    from matplotlib.gridspec import GridSpec
    
    scenes = dataset_results['scenes']
    outliers = dataset_results['outliers']
    
    # Create directory for visualizations if needed
    if output_dir is not None:
        os.makedirs(output_dir, exist_ok=True)
    
    # Set up figure
    n_scenes = len(scenes)
    fig = plt.figure(figsize=(15, 5 * (n_scenes + 1)))
    gs = GridSpec(n_scenes + 1, 1, figure=fig)
    
    # Plot scenes
    for i, scene in enumerate(scenes):
        ax = fig.add_subplot(gs[i, 0])
        ax.set_title(f"Scene {i+1}: {len(scene)} images")
        
        # Display sample images from this scene (up to 5)
        sample_images = scene[:min(5, len(scene))]
        
        # Create a grid for sample images
        n_samples = len(sample_images)
        img_grid = GridSpec(1, n_samples, subplot_spec=gs[i, 0])
        
        for j, img_name in enumerate(sample_images):
            img_ax = fig.add_subplot(img_grid[0, j])
            img_path = Path(TEST_DIR) / dataset_name / img_name
            
            try:
                img = plt.imread(img_path)
                img_ax.imshow(img)
                img_ax.set_title(f"{img_name[:10]}...")
                img_ax.axis('off')
            except Exception as e:
                img_ax.text(0.5, 0.5, f"Error: {e}", ha='center', va='center')
                img_ax.axis('off')
    
    # Plot outliers
    if outliers:
        ax = fig.add_subplot(gs[n_scenes, 0])
        ax.set_title(f"Outliers: {len(outliers)} images")
        
        # Display sample outlier images (up to 5)
        sample_outliers = outliers[:min(5, len(outliers))]
        
        # Create a grid for sample images
        n_samples = len(sample_outliers)
        img_grid = GridSpec(1, n_samples, subplot_spec=gs[n_scenes, 0])
        
        for j, img_name in enumerate(sample_outliers):
            img_ax = fig.add_subplot(img_grid[0, j])
            img_path = Path(TEST_DIR) / dataset_name / img_name
            
            try:
                img = plt.imread(img_path)
                img_ax.imshow(img)
                img_ax.set_title(f"{img_name[:10]}...")
                img_ax.axis('off')
            except Exception as e:
                img_ax.text(0.5, 0.5, f"Error: {e}", ha='center', va='center')
                img_ax.axis('off')
    
    plt.tight_layout()
    
    # Save or display the visualization
    if output_dir is not None:
        plt.savefig(os.path.join(output_dir, f"{dataset_name}_scenes.png"))
        plt.close()
    else:
        plt.show()

def visualize_camera_poses(dataset_name, scene_idx, scene_images, poses, output_dir=None):
    """Visualize camera poses in 3D.
    
    Args:
        dataset_name: Name of the dataset
        scene_idx: Scene index
        scene_images: List of image names in the scene
        poses: Dictionary of camera poses
        output_dir: Optional directory to save visualizations
    """
    from mpl_toolkits.mplot3d import Axes3D
    import matplotlib.pyplot as plt
    import numpy as np
    
    # Filter out images without valid poses
    valid_images = [img for img in scene_images if img in poses and poses[img][0] is not None]
    
    if not valid_images:
        print(f"No valid poses for {dataset_name}, scene {scene_idx}")
        return
    
    # Create figure
    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111, projection='3d')
    
    # Plot camera positions
    positions = []
    for img in valid_images:
        R, t = poses[img]
        positions.append(t)
    
    positions = np.array(positions)
    
    # Calculate center
    center = np.mean(positions, axis=0)
    
    # Plot camera positions
    ax.scatter(positions[:, 0], positions[:, 1], positions[:, 2], c='blue', s=50)
    
    # Plot camera viewing directions
    for img in valid_images:
        R, t = poses[img]
        
        # Camera viewing direction (toward -Z in camera coordinates)
        direction = R.T @ np.array([0, 0, -1])
        
        # Scale direction vector for visualization
        scale = 0.5
        
        # Plot viewing direction
        ax.quiver(t[0], t[1], t[2], 
                  direction[0] * scale, direction[1] * scale, direction[2] * scale, 
                  color='red', arrow_length_ratio=0.1)
    
    # Set axis labels
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    
    # Set title
    ax.set_title(f"{dataset_name}, Scene {scene_idx}: {len(valid_images)} cameras")
    
    # Center the view on the mean position
    ax.set_xlim(center[0] - 2, center[0] + 2)
    ax.set_ylim(center[1] - 2, center[1] + 2)
    ax.set_zlim(center[2] - 2, center[2] + 2)
    
    plt.tight_layout()
    
    # Save or display the visualization
    if output_dir is not None:
        os.makedirs(output_dir, exist_ok=True)
        plt.savefig(os.path.join(output_dir, f"{dataset_name}_scene{scene_idx}_poses.png"))
        plt.close()
    else:
        plt.show()

def visualize_results(submission_df, test_dir, output_dir='visualizations'):
    """Visualize the results from the submission file.
    
    Args:
        submission_df: Submission dataframe
        test_dir: Test directory
        output_dir: Directory to save visualizations
    """
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Group results by dataset
    for dataset in submission_df['dataset'].unique():
        dataset_df = submission_df[submission_df['dataset'] == dataset]
        
        # Extract scenes and outliers
        scenes = []
        for scene in dataset_df['scene'].unique():
            if scene != 'outliers':
                scene_images = dataset_df[dataset_df['scene'] == scene]['image'].tolist()
                scenes.append(scene_images)
        
        outliers = dataset_df[dataset_df['scene'] == 'outliers']['image'].tolist()
        
        # Create dataset results
        dataset_results = {
            'scenes': scenes,
            'outliers': outliers,
            'poses': {}
        }
        
        # Extract poses
        for _, row in dataset_df.iterrows():
            if row['scene'] != 'outliers':
                # Parse rotation matrix
                try:
                    R_str = row['rotation_matrix']
                    if 'nan' not in R_str:
                        R = np.array([float(x) for x in R_str.split(';')]).reshape(3, 3)
                    
                        # Parse translation vector
                        T_str = row['translation_vector']
                        T = np.array([float(x) for x in T_str.split(';')])
                        
                        dataset_results['poses'][row['image']] = (R, T)
                    else:
                        dataset_results['poses'][row['image']] = (None, None)
                except Exception:
                    dataset_results['poses'][row['image']] = (None, None)
        
        # Visualize scene clusters
        visualize_scene_clusters(dataset, dataset_results, output_dir)
        
        # Visualize camera poses for each scene
        for i, scene in enumerate(scenes):
            visualize_camera_poses(dataset, i+1, scene, dataset_results['poses'], output_dir)

# Add this to the end of the main function to visualize results
def visualize_submission_results():
    """Visualize the results from the submission file."""
    if os.path.exists(OUTPUT_FILE):
        submission_df = pd.read_csv(OUTPUT_FILE)
        visualize_results(submission_df, TEST_DIR)
    else:
        print("Submission file not found. Run the pipeline first.")

if __name__ == "__main__":
    # Execute the main pipeline
    main()
    
    # Visualize results (optional)
    try:
        visualize_submission_results()
    except Exception as e:
        print(f"Visualization failed: {e}")