## IMC24 Starter for Image Matching Challenge 2024 Hexathlon.

IMC24 Starter came to existance thanks to [imc-understanding-the-baseline][1]. The metric score for mean average accuracy came from [IMC2024-3D-metric-evaluation-example][2].<br>
To further modify the code for submission and scoring, the utility script can be accessed [here][3].

[1]: https://www.kaggle.com/code/asarvazyan/imc-understanding-the-baseline
[2]: https://www.kaggle.com/code/fabiobellavia/imc2024-3d-metric-evaluation-example
[3]: https://www.kaggle.com/code/nartaa/imc24

# SETUP

In [1]:
from imc24 import *

In [2]:
!cp -r /kaggle/input/disambiguation disambiguation

In [3]:
import sys
sys.path.append("/kaggle/input/disambiguation")
sys.path.append("/kaggle/working")

all_categories = ['symmetries-and-repeats', 'historical_preservation', 'air-to-ground', 'day-night', 'temporal', 'historical_preservation', 'nature', 'transparent']


# SIMILLIAR PAIRS

In [4]:
def get_pairs(images_list,device=DEVICE):
    if EXHAUSTIVE:
        return list(combinations(range(len(images_list)), 2)) 
    
    processor = AutoImageProcessor.from_pretrained('/kaggle/input/dinov2/pytorch/base/1/')
    model = AutoModel.from_pretrained('/kaggle/input/dinov2/pytorch/base/1/').eval().to(DEVICE)
    embeddings = []
    
    for img_path in images_list:
        image = K.io.load_image(img_path, K.io.ImageLoadType.RGB32, device=DEVICE)[None, ...]
        with torch.inference_mode():
            inputs = processor(images=image, return_tensors="pt", do_rescale=False ,do_resize=True, 
                               do_center_crop=True, size=224).to(DEVICE)
            outputs = model(**inputs)
            embedding = F.normalize(outputs.last_hidden_state.max(dim=1)[0])
        embeddings.append(embedding)
        
    embeddings = torch.cat(embeddings, dim=0)
    distances = torch.cdist(embeddings,embeddings).cpu()
    distances_ = (distances <= DISTANCES_THRESHOLD).numpy()
    np.fill_diagonal(distances_,False)
    z = distances_.sum(axis=1)
    idxs0 = np.where(z == 0)[0]
    for idx0 in idxs0:
        t = np.argsort(distances[idx0])[1:MIN_PAIRS]
        distances_[idx0,t] = True
        
    s = np.where(distances >= TOLERANCE)
    distances_[s] = False
    
    idxs = []
    for i in range(len(images_list)):
        for j in range(len(images_list)):
            if distances_[i][j]:
                idxs += [(i,j)] if i<j else [(j,i)]
    
    idxs = list(set(idxs))
    return idxs

# KEYPOINTS EXTRACTOR AND MATCHER

In [5]:
def keypoints_matches(images_list,pairs, categories):
#     images_list_sorted = sorted(images_list)

    extractor = ALIKED(max_num_keypoints=MAX_NUM_KEYPOINTS,detection_threshold=DETECTION_THRESHOLD,resize=RESIZE_TO).eval().to(DEVICE)
    matcher = KF.LightGlueMatcher("aliked", {'width_confidence':-1, 'depth_confidence':-1, 'mp':True if 'cuda' in str(DEVICE) else False}).eval().to(DEVICE)
    rotation = create_model("swsl_resnext50_32x4d").eval().to(DEVICE)
    
    with h5py.File("keypoints.h5", mode="w") as f_kp, h5py.File("descriptors.h5", mode="w") as f_desc:  
        for image_path in images_list:
            with torch.inference_mode():
                image = load_image(image_path).to(DEVICE)
                feats = extractor.extract(image)
                f_kp[image_path.name] = feats["keypoints"].squeeze().cpu().numpy()
                f_desc[image_path.name] = feats["descriptors"].squeeze().detach().cpu().numpy()
                
    with h5py.File("keypoints.h5", mode="r") as f_kp, h5py.File("descriptors.h5", mode="r") as f_desc, \
         h5py.File("matches.h5", mode="w") as f_matches:  
        for pair in pairs:
            key1, key2 = images_list[pair[0]].name, images_list[pair[1]].name
            
#             idx1 = images_list_sorted.index(images_list[pair[0]])
#             idx2 = images_list_sorted.index(images_list[pair[1]])
#             print(idx1, idx2)
#             if abs(idx1 - idx2)>10 and ("symmetries-and-repeats" in categories and 'transparent' not in categories):
#                      continue
            kp1 = torch.from_numpy(f_kp[key1][...]).to(DEVICE)
            kp2 = torch.from_numpy(f_kp[key2][...]).to(DEVICE)
            desc1 = torch.from_numpy(f_desc[key1][...]).to(DEVICE)
            desc2 = torch.from_numpy(f_desc[key2][...]).to(DEVICE)
            with torch.inference_mode():
                _, idxs = matcher(desc1, desc2, KF.laf_from_center_scale_ori(kp1[None]), KF.laf_from_center_scale_ori(kp2[None]))
            if len(idxs): group = f_matches.require_group(key1)
            if len(idxs) >= MIN_MATCHES: group.create_dataset(key2, data=idxs.detach().cpu().numpy())

# RANSAC AND SPARSE RECONSTRUCTION

In [6]:
def ransac_and_sparse_reconstruction(images_path, categories):
    now = datetime.datetime.now()
    time_str = now.strftime("%Y-%m-%d_%H-%M-%S")
    db_name = f'colmap_{time_str}.db'
    db = COLMAPDatabase.connect(db_name)
    db.create_tables()
    fname_to_id = add_keypoints(db, '/kaggle/working/', images_path, '', 'simple-radial', False)
    add_matches(db, '/kaggle/working/',fname_to_id)
    db.commit()
    
    
    # 
    if "symmetries-and-repeats" in categories and 'transparent' not in categories:
        print("remove disambiguation ing")
        from disambiguation import calculate_geodesic_consistency_scores_fn
#         from disambiguation import calculate_missing_correspondences_scores_fn
        from disambiguation.utils.read_write_database import remove_matches_from_db
        
        scores = calculate_geodesic_consistency_scores_fn(None, old_db_path=Path(db_name), 
                                                          track_degree=3, 
                                                          coverage_thres=0.7, alpha=0.1, 
                                                          minimal_views=4, ds = "largearray")
        
#         scores = calculate_missing_correspondences_scores_fn(None, old_db_path=Path(db_name), score_version=2)
#         thres = 0.1
                                             
        thres = 0.05
        valid = scores >= thres
        invalid = np.logical_not(valid)
        scores[invalid] = 0.0
        new_db_name = Path(db_name.replace(".db", "_new_database.db"))
        remove_matches_from_db(Path(db_name), new_db_name, Path("tmp_match.txt"), valid)
        db_name = new_db_name

    # 
    
    
    pycolmap.match_exhaustive(db_name, sift_options={'num_threads':1})
    maps = pycolmap.incremental_mapping(
        database_path=db_name, 
        image_path=images_path,
        output_path='/kaggle/working/', 
        options=pycolmap.IncrementalPipelineOptions({'min_model_size':MIN_MODEL_SIZE, 'max_num_models':MAX_NUM_MODELS, 'num_threads':1})
    )
    return maps

# HYPERPARAMETER TUNING

In [7]:
# SIMILLIAR PAIRS
EXHAUSTIVE = True
MIN_PAIRS = 50
DISTANCES_THRESHOLD = 0.3
TOLERANCE = 500

# KEYPOINTS EXTRACTOR AND MATCHER
MAX_NUM_KEYPOINTS = 4096
RESIZE_TO = 1280
DETECTION_THRESHOLD = 0.005
MIN_MATCHES = 100

# RANSAC AND SPARSE RECONSTRUCTION
MIN_MODEL_SIZE = 5
MAX_NUM_MODELS = 3

# CROSS VALIDATION
N_SAMPLES = 50

SUBMISSION = True

## CROSS VALIDATION

In [8]:
if not SUBMISSION:
    def image_path(row):
        row['image_path'] = 'train/' + row['dataset'] + '/images/' + row['image_name']
        return row

    train_df = pd.read_csv(f'{IMC_PATH}/train/train_labels.csv')
    train_df = train_df.apply(image_path,axis=1).drop_duplicates(subset=['image_path'])
    G = train_df.groupby(['dataset','scene'])['image_path']
    image_paths = []
    
    for g in G:
        n = N_SAMPLES
        n = n if n < len(g[1]) else len(g[1])
        g = g[0],g[1].sample(n,random_state=42).reset_index(drop=True)
        for image_path in g[1]:
            image_paths.append(image_path)
        
    gt_df = train_df[train_df.image_path.isin(image_paths)].reset_index(drop=True)
    pred_df = gt_df[['image_path','dataset','scene','rotation_matrix','translation_vector']]
    pred_df.to_csv('pred_df.csv',index=False)
    run('pred_df.csv', get_pairs, keypoints_matches, ransac_and_sparse_reconstruction, submit=False)
    pred_df = pd.read_csv('submission.csv')
    mAA = round(score(gt_df, pred_df),4)
    print('*** Total mean Average Accuracy ***')
    print(f"mAA: {mAA}")

# SUBMISSION

In [9]:
def my_parse_sample_submission(data_path):
    data_dict = {}
    categories_dict = {}
    with open(data_path / "sample_submission.csv", "r") as f:
        for i, l in enumerate(f):
            if i == 0:
                print("header:", l)

            if l and i > 0:
                image_path, dataset, scene, _, _ = l.strip().split(',')
                if dataset not in data_dict:
                    data_dict[dataset] = {}
                if scene not in data_dict[dataset]:
                    data_dict[dataset][scene] = []
                data_dict[dataset][scene].append(Path(IMC_PATH +'/'+ image_path))
                
    with open(data_path / "test/categories.csv", "r") as f:
        for i, l in enumerate(f):
            # Skip header
            if i == 0:
                print("header:", l)

            if l and i > 0:
                scene, categories = l.strip().split(',')
                categories_dict[scene] = categories
    for dataset in data_dict:
        for scene in data_dict[dataset]:
            print(f"{dataset} / {scene} -> {len(data_dict[dataset][scene])} images")

    return data_dict, categories_dict

def my_run(data_path,get_pairs,keypoints_matches,ransac_and_sparse_reconstruction,submit=True):
    results = {}
    
    data_dict, categories_dict = my_parse_sample_submission(data_path)
    datasets = list(data_dict.keys())
    
    for dataset in datasets:
        if dataset not in results:
            results[dataset] = {}
            
        for scene in data_dict[dataset]:
            images_dir = data_dict[dataset][scene][0].parent
            results[dataset][scene] = {}
            image_paths = data_dict[dataset][scene]
            categories = categories_dict[scene]
            index_pairs = get_pairs(image_paths)
            keypoints_matches(image_paths, index_pairs, categories)                
            maps = ransac_and_sparse_reconstruction(image_paths[0].parent, categories)
            clear_output(wait=False)
            
            path = 'test' if submit else 'train'
            images_registered  = 0
            best_idx = 0
            for idx, rec in maps.items():
                if len(rec.images) > images_registered:
                    images_registered = len(rec.images)
                    best_idx = idx
            print("maps:", maps)
            for k, im in maps[best_idx].images.items():
                key = Path(IMC_PATH) / path / scene / "images" / im.name
                results[dataset][scene][key] = {}
                results[dataset][scene][key]["R"] = deepcopy(im.cam_from_world.rotation.matrix())
                results[dataset][scene][key]["t"] = deepcopy(np.array(im.cam_from_world.translation))

            create_submission(results, data_dict, Path(IMC_PATH))

In [10]:
if SUBMISSION:
    data_path = IMC_PATH
    my_run(Path(data_path), get_pairs, keypoints_matches, ransac_and_sparse_reconstruction)

maps: {0: Reconstruction(num_reg_images=37, num_cameras=37, num_points3D=12504, num_observations=65426)}
