In [20]:
from data_preprocess.image_matching_dataset import ImageMatchingDataset
import torchvision.transforms as transforms
import torch

from load_h5py_files import load_sift_output, load_flann_output, load_flann_from_images_name
from clustering.cluster_images import build_graph, graph_clustering, interactive_graph
from batch_descriptor import batch_feature_descriptor
from torch.utils.data import DataLoader

import os
import cv2 as cv
import h5py
import numpy as np
import pandas as pd
import ast
import json

from clustering.ransac_test import run_ransac
from load_h5py_files import load_matches_from_h5
import networkx as nx

In [13]:
def clustering_with_ransac(matches_file, labels_df, matcher_type, descriptor_type):
    '''
    Args:
        matches_file (str): path to CSV image matching result
        labels_df (DataFrame): dataframe of train labels CSV
        matcher_type (str): matcher type (flann / lightglue)
        descriptor_type (str): descriptor type (sift / disk)

    Returns:
        Dict: Dictionary containing dataset and its graph
        Dict: Dictionary containing dataset and its clustering data
    '''
    matches_df = pd.read_csv(matches_file)

    # add new column in dataframe for ransac data
    matches_df["filtered_points0"] = None
    matches_df["filtered_points1"] = None
    matches_df["ransac_mask"] = None

    # run ransac
    for idx, data in matches_df.iterrows():
        points0 = torch.Tensor(ast.literal_eval(data["points0"]))
        points1 = torch.Tensor(ast.literal_eval(data["points1"]))
    
        filtered_p0, filtered_p1, ransac_mask = run_ransac(points0, points1)
        matches_df.at[idx, "filtered_points0"] = filtered_p0
        matches_df.at[idx, "filtered_points1"] = filtered_p0
        matches_df.at[idx, "ransac_mask"] = ransac_mask

    # create graph
    graphs = build_graph(matches_df, labels_df, matcher_type)

    # save graphs
    graph_output_dir = os.path.join("evaluation", "graphs", f"{matcher_type}_with_{descriptor_type}")
    os.makedirs(graph_output_dir, exist_ok=True)

    for graph in graphs:
        data_graph = nx.node_link_data(graphs[graph], edges="edges")
        with open(os.path.join(graph_output_dir, f"graph_{graph}.json"), "w") as f:
            json.dump(data_graph, f)

    # create clustering
    clustering = graph_clustering(graphs)

    # save clustering
    cluster_output_dir = os.path.join("evaluation","clusters")
    os.makedirs(cluster_output_dir, exist_ok=True)
    with open(os.path.join(cluster_output_dir, f"clusters_{matcher_type}_with_{descriptor_type}.json"), "w") as f:
        json.dump(clustering, f)

    return graphs, clustering

In [22]:
flann_sift_path = os.path.join("csv_matcher", "result_flann_sift.csv")
flann_disk_path = os.path.join("csv_matcher", "result_flann_disk.csv")
labels_df = pd.read_csv("data/train_labels.csv")

visualization_dir = os.path.join("evaluation", "cluster_visualization")
os.makedirs(visualization_dir, exist_ok=True)

In [14]:
# clustering: flann with sift
graph_flann_sift, cluster_flann_sift = clustering_with_ransac(flann_sift_path, labels_df, "flann", "sift")

In [17]:
graph_flann_sift

{'pt_sacrecoeur_trevi_tajmahal': <networkx.classes.graph.Graph at 0x7f99af3a1e90>,
 'pt_stpeters_stpauls': <networkx.classes.graph.Graph at 0x7f99a1120a10>,
 'pt_brandenburg_british_buckingham': <networkx.classes.graph.Graph at 0x7f999ed00110>,
 'imc2023_heritage': <networkx.classes.graph.Graph at 0x7f99a31fd110>,
 'imc2024_lizard_pond': <networkx.classes.graph.Graph at 0x7f99abbaac10>,
 'fbk_vineyard': <networkx.classes.graph.Graph at 0x7f999e8e1e90>,
 'imc2024_dioscuri_baalshamin': <networkx.classes.graph.Graph at 0x7f99a6dcfcd0>,
 'amy_gardens': <networkx.classes.graph.Graph at 0x7f99abf89ad0>,
 'imc2023_haiper': <networkx.classes.graph.Graph at 0x7f99abff41d0>,
 'pt_piazzasanmarco_grandplace': <networkx.classes.graph.Graph at 0x7f99ac05cdd0>,
 'stairs': <networkx.classes.graph.Graph at 0x7f99abfb9410>,
 'imc2023_theather_imc2024_church': <networkx.classes.graph.Graph at 0x7f999e7cacd0>,
 'ETs': <networkx.classes.graph.Graph at 0x7f99abfc34d0>}

In [16]:
graph_flann_sift["ETs"].edges(data=True)

EdgeDataView([('outliers_out_et003.png', 'et_et003.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et007.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et008.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et010.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et005.png', {'weight': 7}), ('outliers_out_et003.png', 'outliers_out_et002.png', {'weight': 7}), ('et_et007.png', 'et_et004.png', {'weight': 17}), ('et_et007.png', 'et_et006.png', {'weight': 415}), ('et_et007.png', 'et_et008.png', {'weight': 166}), ('et_et007.png', 'et_et002.png', {'weight': 11}), ('et_et007.png', 'et_et001.png', {'weight': 14}), ('et_et007.png', 'another_et_another_et006.png', {'weight': 7}), ('et_et007.png', 'another_et_another_et001.png', {'weight': 7}), ('et_et007.png', 'et_et005.png', {'weight': 279}), ('et_et007.png', 'et_et003.png', {'weight': 7}), ('et_et007.png', 'et_et000.png', {'weight': 7}), ('et_et003.png', 'et_et004.

In [18]:
cluster_flann_sift

{'pt_sacrecoeur_trevi_tajmahal': {'clusters': {1: ['sacre_coeur_93423882_13476569344.png',
    'sacre_coeur_45927449_5904684394.png',
    'sacre_coeur_90467261_5816977333.png',
    'sacre_coeur_47029087_8959697396.png',
    'sacre_coeur_33340964_112626481.png',
    'sacre_coeur_55695069_9640098822.png',
    'sacre_coeur_86731151_442052253.png',
    'sacre_coeur_90517919_4394638107.png',
    'sacre_coeur_55297752_267282770.png',
    'sacre_coeur_98546957_2200688611.png',
    'sacre_coeur_68663909_1356782012.png',
    'sacre_coeur_80691366_2623658065.png',
    'sacre_coeur_16537684_4545274148.png',
    'sacre_coeur_04739785_8198592553.png',
    'sacre_coeur_55167859_91653543.png',
    'sacre_coeur_70116109_2697195876.png',
    'sacre_coeur_39702035_1808089104.png',
    'sacre_coeur_46031506_3822251715.png',
    'sacre_coeur_68688218_4738908453.png',
    'sacre_coeur_82410562_10997050704.png',
    'sacre_coeur_41224690_4580747259.png',
    'sacre_coeur_34264811_99184810.png',
    'sacre_c

In [19]:
cluster_flann_sift["ETs"]

{'clusters': {1: ['outliers_out_et003.png',
   'outliers_out_et002.png',
   'another_et_another_et010.png',
   'another_et_another_et007.png',
   'another_et_another_et008.png',
   'another_et_another_et009.png'],
  3: ['et_et003.png',
   'et_et001.png',
   'et_et004.png',
   'et_et002.png',
   'et_et000.png'],
  4: ['another_et_another_et006.png',
   'another_et_another_et002.png',
   'another_et_another_et004.png',
   'another_et_another_et003.png',
   'another_et_another_et005.png',
   'another_et_another_et001.png']},
 'outliers': {0: ['outliers_out_et001.png'],
  2: ['et_et007.png', 'et_et006.png', 'et_et008.png', 'et_et005.png']},
 'communities': {'outliers_out_et001.png': 0,
  'outliers_out_et003.png': 1,
  'outliers_out_et002.png': 1,
  'et_et007.png': 2,
  'et_et003.png': 3,
  'et_et006.png': 2,
  'et_et001.png': 3,
  'et_et004.png': 3,
  'et_et002.png': 3,
  'et_et008.png': 2,
  'et_et005.png': 2,
  'et_et000.png': 3,
  'another_et_another_et006.png': 4,
  'another_et_another

In [32]:
with open("evaluation/graphs/flann_with_sift/graph_ETs.json", "r") as f:
    ets_graph = json.load(f)

ets_graph

{'directed': False,
 'multigraph': False,
 'graph': {},
 'nodes': [{'id': 'outliers_out_et001.png'},
  {'id': 'outliers_out_et003.png'},
  {'id': 'outliers_out_et002.png'},
  {'id': 'et_et007.png'},
  {'id': 'et_et003.png'},
  {'id': 'et_et006.png'},
  {'id': 'et_et001.png'},
  {'id': 'et_et004.png'},
  {'id': 'et_et002.png'},
  {'id': 'et_et008.png'},
  {'id': 'et_et005.png'},
  {'id': 'et_et000.png'},
  {'id': 'another_et_another_et006.png'},
  {'id': 'another_et_another_et002.png'},
  {'id': 'another_et_another_et010.png'},
  {'id': 'another_et_another_et004.png'},
  {'id': 'another_et_another_et007.png'},
  {'id': 'another_et_another_et008.png'},
  {'id': 'another_et_another_et003.png'},
  {'id': 'another_et_another_et005.png'},
  {'id': 'another_et_another_et001.png'},
  {'id': 'another_et_another_et009.png'}],
 'edges': [{'weight': 7,
   'source': 'outliers_out_et003.png',
   'target': 'et_et003.png'},
  {'weight': 7,
   'source': 'outliers_out_et003.png',
   'target': 'another_e

In [35]:
ets_graph = nx.node_link_graph(ets_graph, edges="edges")
ets_graph.edges(data=True)

EdgeDataView([('outliers_out_et003.png', 'et_et003.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et007.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et008.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et010.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et005.png', {'weight': 7}), ('outliers_out_et003.png', 'outliers_out_et002.png', {'weight': 7}), ('et_et007.png', 'et_et004.png', {'weight': 17}), ('et_et007.png', 'et_et006.png', {'weight': 415}), ('et_et007.png', 'et_et008.png', {'weight': 166}), ('et_et007.png', 'et_et002.png', {'weight': 11}), ('et_et007.png', 'et_et001.png', {'weight': 14}), ('et_et007.png', 'another_et_another_et006.png', {'weight': 7}), ('et_et007.png', 'another_et_another_et001.png', {'weight': 7}), ('et_et007.png', 'et_et005.png', {'weight': 279}), ('et_et007.png', 'et_et003.png', {'weight': 7}), ('et_et007.png', 'et_et000.png', {'weight': 7}), ('et_et003.png', 'et_et004.

In [31]:
with open("evaluation/clusters/clusters_flann_with_sift.json", "r") as f:
    sift_cluster = json.load(f)

sift_cluster

{'pt_sacrecoeur_trevi_tajmahal': {'clusters': {'1': ['sacre_coeur_93423882_13476569344.png',
    'sacre_coeur_45927449_5904684394.png',
    'sacre_coeur_90467261_5816977333.png',
    'sacre_coeur_47029087_8959697396.png',
    'sacre_coeur_33340964_112626481.png',
    'sacre_coeur_55695069_9640098822.png',
    'sacre_coeur_86731151_442052253.png',
    'sacre_coeur_90517919_4394638107.png',
    'sacre_coeur_55297752_267282770.png',
    'sacre_coeur_98546957_2200688611.png',
    'sacre_coeur_68663909_1356782012.png',
    'sacre_coeur_80691366_2623658065.png',
    'sacre_coeur_16537684_4545274148.png',
    'sacre_coeur_04739785_8198592553.png',
    'sacre_coeur_55167859_91653543.png',
    'sacre_coeur_70116109_2697195876.png',
    'sacre_coeur_39702035_1808089104.png',
    'sacre_coeur_46031506_3822251715.png',
    'sacre_coeur_68688218_4738908453.png',
    'sacre_coeur_82410562_10997050704.png',
    'sacre_coeur_41224690_4580747259.png',
    'sacre_coeur_34264811_99184810.png',
    'sacre

In [23]:
# create interactive graph

interactive_graph(graph_flann_sift, cluster_flann_sift, os.path.join(visualization_dir, "flann_with_sift"))

evaluation/cluster_visualization/flann_with_sift/visualization_cluster_pt_sacrecoeur_trevi_tajmahal.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_pt_stpeters_stpauls.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_pt_brandenburg_british_buckingham.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_imc2023_heritage.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_imc2024_lizard_pond.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_fbk_vineyard.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_imc2024_dioscuri_baalshamin.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_amy_gardens.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_imc2023_haiper.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_pt_piazzasanmarco_grandplace.html
evaluation/cluster_visu

## flann with disk

In [24]:
# clustering: flann with disk
graph_flann_disk, cluster_flann_disk = clustering_with_ransac(flann_disk_path, labels_df, "flann", "disk")

In [25]:
graph_flann_disk

{'fbk_vineyard': <networkx.classes.graph.Graph at 0x7f99ada2d5d0>,
 'pt_sacrecoeur_trevi_tajmahal': <networkx.classes.graph.Graph at 0x7f999eae2a50>,
 'pt_piazzasanmarco_grandplace': <networkx.classes.graph.Graph at 0x7f99abf79410>,
 'imc2024_lizard_pond': <networkx.classes.graph.Graph at 0x7f999d6f8390>,
 'pt_brandenburg_british_buckingham': <networkx.classes.graph.Graph at 0x7f999f30d550>,
 'amy_gardens': <networkx.classes.graph.Graph at 0x7f99adaa2050>,
 'imc2024_dioscuri_baalshamin': <networkx.classes.graph.Graph at 0x7f999ea10f90>,
 'imc2023_haiper': <networkx.classes.graph.Graph at 0x7f999f485590>,
 'pt_stpeters_stpauls': <networkx.classes.graph.Graph at 0x7f999f485b10>,
 'imc2023_heritage': <networkx.classes.graph.Graph at 0x7f999ea34090>,
 'imc2023_theather_imc2024_church': <networkx.classes.graph.Graph at 0x7f999f3ae6d0>,
 'stairs': <networkx.classes.graph.Graph at 0x7f999f44a450>,
 'ETs': <networkx.classes.graph.Graph at 0x7f99abcc4e90>}

In [26]:
graph_flann_disk["ETs"].edges(data=True)

EdgeDataView([('et_et007.png', 'et_et006.png', {'weight': 653}), ('et_et007.png', 'et_et005.png', {'weight': 567}), ('et_et007.png', 'et_et008.png', {'weight': 29}), ('et_et003.png', 'et_et001.png', {'weight': 156}), ('et_et003.png', 'et_et002.png', {'weight': 57}), ('et_et003.png', 'et_et000.png', {'weight': 848}), ('et_et003.png', 'et_et004.png', {'weight': 34}), ('et_et006.png', 'et_et008.png', {'weight': 7}), ('et_et006.png', 'et_et005.png', {'weight': 389}), ('et_et001.png', 'et_et000.png', {'weight': 310}), ('et_et001.png', 'et_et002.png', {'weight': 323}), ('et_et001.png', 'et_et004.png', {'weight': 37}), ('et_et004.png', 'et_et002.png', {'weight': 93}), ('et_et004.png', 'et_et000.png', {'weight': 34}), ('et_et002.png', 'et_et000.png', {'weight': 89}), ('et_et008.png', 'et_et005.png', {'weight': 19}), ('another_et_another_et006.png', 'another_et_another_et008.png', {'weight': 7}), ('another_et_another_et006.png', 'another_et_another_et007.png', {'weight': 144}), ('another_et_ano

In [27]:
cluster_flann_disk

{'fbk_vineyard': {'clusters': {0: ['vineyard_split_2_frame_1260.png',
    'vineyard_split_2_frame_1255.png',
    'vineyard_split_2_frame_1270.png',
    'vineyard_split_2_frame_1265.png'],
   2: ['vineyard_split_2_frame_1245.png',
    'vineyard_split_2_frame_1240.png',
    'vineyard_split_2_frame_1235.png',
    'vineyard_split_2_frame_1230.png'],
   4: ['vineyard_split_2_frame_1205.png',
    'vineyard_split_2_frame_1210.png',
    'vineyard_split_2_frame_1215.png'],
   5: ['vineyard_split_2_frame_1170.png',
    'vineyard_split_2_frame_1165.png',
    'vineyard_split_2_frame_1160.png',
    'vineyard_split_2_frame_1175.png'],
   7: ['vineyard_split_2_frame_1300.png',
    'vineyard_split_2_frame_1290.png',
    'vineyard_split_2_frame_1310.png',
    'vineyard_split_2_frame_1295.png',
    'vineyard_split_2_frame_1305.png',
    'vineyard_split_2_frame_1315.png'],
   8: ['vineyard_split_2_frame_1200.png',
    'vineyard_split_2_frame_1195.png',
    'vineyard_split_2_frame_1190.png',
    'vineyard

In [28]:
cluster_flann_disk["ETs"]

{'clusters': {3: ['et_et007.png',
   'et_et006.png',
   'et_et008.png',
   'et_et005.png'],
  5: ['et_et003.png',
   'et_et001.png',
   'et_et004.png',
   'et_et002.png',
   'et_et000.png'],
  6: ['another_et_another_et006.png',
   'another_et_another_et007.png',
   'another_et_another_et008.png',
   'another_et_another_et009.png'],
  7: ['another_et_another_et002.png',
   'another_et_another_et004.png',
   'another_et_another_et003.png',
   'another_et_another_et005.png',
   'another_et_another_et001.png']},
 'outliers': {0: ['outliers_out_et001.png'],
  1: ['outliers_out_et003.png'],
  2: ['outliers_out_et002.png'],
  4: ['another_et_another_et010.png']},
 'communities': {'outliers_out_et001.png': 0,
  'outliers_out_et003.png': 1,
  'outliers_out_et002.png': 2,
  'et_et007.png': 3,
  'et_et003.png': 5,
  'et_et006.png': 3,
  'et_et001.png': 5,
  'et_et004.png': 5,
  'et_et002.png': 5,
  'et_et008.png': 3,
  'et_et005.png': 3,
  'et_et000.png': 5,
  'another_et_another_et006.png': 6,


In [36]:
with open("evaluation/graphs/flann_with_disk/graph_ETs.json") as f:
    ets_disk_graph = json.load(f)

ets_disk_graph

{'directed': False,
 'multigraph': False,
 'graph': {},
 'nodes': [{'id': 'outliers_out_et001.png'},
  {'id': 'outliers_out_et003.png'},
  {'id': 'outliers_out_et002.png'},
  {'id': 'et_et007.png'},
  {'id': 'et_et003.png'},
  {'id': 'et_et006.png'},
  {'id': 'et_et001.png'},
  {'id': 'et_et004.png'},
  {'id': 'et_et002.png'},
  {'id': 'et_et008.png'},
  {'id': 'et_et005.png'},
  {'id': 'et_et000.png'},
  {'id': 'another_et_another_et006.png'},
  {'id': 'another_et_another_et002.png'},
  {'id': 'another_et_another_et010.png'},
  {'id': 'another_et_another_et004.png'},
  {'id': 'another_et_another_et007.png'},
  {'id': 'another_et_another_et008.png'},
  {'id': 'another_et_another_et003.png'},
  {'id': 'another_et_another_et005.png'},
  {'id': 'another_et_another_et001.png'},
  {'id': 'another_et_another_et009.png'}],
 'edges': [{'weight': 653, 'source': 'et_et007.png', 'target': 'et_et006.png'},
  {'weight': 567, 'source': 'et_et007.png', 'target': 'et_et005.png'},
  {'weight': 29, 'sou

In [37]:
with open("evaluation/clusters/clusters_flann_with_disk.json", "r") as f:
    disk_cluster = json.load(f)

disk_cluster

{'fbk_vineyard': {'clusters': {'0': ['vineyard_split_2_frame_1260.png',
    'vineyard_split_2_frame_1255.png',
    'vineyard_split_2_frame_1270.png',
    'vineyard_split_2_frame_1265.png'],
   '2': ['vineyard_split_2_frame_1245.png',
    'vineyard_split_2_frame_1240.png',
    'vineyard_split_2_frame_1235.png',
    'vineyard_split_2_frame_1230.png'],
   '4': ['vineyard_split_2_frame_1205.png',
    'vineyard_split_2_frame_1210.png',
    'vineyard_split_2_frame_1215.png'],
   '5': ['vineyard_split_2_frame_1170.png',
    'vineyard_split_2_frame_1165.png',
    'vineyard_split_2_frame_1160.png',
    'vineyard_split_2_frame_1175.png'],
   '7': ['vineyard_split_2_frame_1300.png',
    'vineyard_split_2_frame_1290.png',
    'vineyard_split_2_frame_1310.png',
    'vineyard_split_2_frame_1295.png',
    'vineyard_split_2_frame_1305.png',
    'vineyard_split_2_frame_1315.png'],
   '8': ['vineyard_split_2_frame_1200.png',
    'vineyard_split_2_frame_1195.png',
    'vineyard_split_2_frame_1190.png',
 

In [38]:
# create interactive graph

interactive_graph(graph_flann_disk, cluster_flann_disk, os.path.join(visualization_dir, "flann_with_disk"))

evaluation/cluster_visualization/flann_with_disk/visualization_cluster_fbk_vineyard.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_pt_sacrecoeur_trevi_tajmahal.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_pt_piazzasanmarco_grandplace.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_imc2024_lizard_pond.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_pt_brandenburg_british_buckingham.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_amy_gardens.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_imc2024_dioscuri_baalshamin.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_imc2023_haiper.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_pt_stpeters_stpauls.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_imc2023_heritage.html
evaluation/cluster_visu

In [20]:
from data_preprocess.image_matching_dataset import ImageMatchingDataset
import torchvision.transforms as transforms
import torch

from load_h5py_files import load_sift_output, load_flann_output, load_flann_from_images_name
from clustering.cluster_images import build_graph, graph_clustering, interactive_graph
from batch_descriptor import batch_feature_descriptor
from torch.utils.data import DataLoader

import os
import cv2 as cv
import h5py
import numpy as np
import pandas as pd
import ast
import json

from clustering.ransac_test import run_ransac
from load_h5py_files import load_matches_from_h5
import networkx as nx

In [13]:
def clustering_with_ransac(matches_file, labels_df, matcher_type, descriptor_type):
    matches_df = pd.read_csv(matches_file)

    # add new column in dataframe for ransac data
    matches_df["filtered_points0"] = None
    matches_df["filtered_points1"] = None
    matches_df["ransac_mask"] = None

    for idx, data in matches_df.iterrows():
        points0 = torch.Tensor(ast.literal_eval(data["points0"]))
        points1 = torch.Tensor(ast.literal_eval(data["points1"]))
    
        filtered_p0, filtered_p1, ransac_mask = run_ransac(points0, points1)
        matches_df.at[idx, "filtered_points0"] = filtered_p0
        matches_df.at[idx, "filtered_points1"] = filtered_p0
        matches_df.at[idx, "ransac_mask"] = ransac_mask

    graphs = build_graph(matches_df, labels_df, matcher_type)

    # save graphs
    graph_output_dir = os.path.join("evaluation", "graphs", f"{matcher_type}_with_{descriptor_type}")
    os.makedirs(graph_output_dir, exist_ok=True)

    for graph in graphs:
        data_graph = nx.node_link_data(graphs[graph], edges="edges")
        with open(os.path.join(graph_output_dir, f"graph_{graph}.json"), "w") as f:
            json.dump(data_graph, f)

    clustering = graph_clustering(graphs)

    # save clustering
    cluster_output_dir = os.path.join("evaluation","clusters")
    os.makedirs(cluster_output_dir, exist_ok=True)
    with open(os.path.join(cluster_output_dir, f"clusters_{matcher_type}_with_{descriptor_type}.json"), "w") as f:
        json.dump(clustering, f)

    return graphs, clustering

In [22]:
flann_sift_path = os.path.join("csv_matcher", "result_flann_sift.csv")
flann_disk_path = os.path.join("csv_matcher", "result_flann_disk.csv")
labels_df = pd.read_csv("data/train_labels.csv")

visualization_dir = os.path.join("evaluation", "cluster_visualization")
os.makedirs(visualization_dir, exist_ok=True)

In [14]:
# clustering: flann with sift
graph_flann_sift, cluster_flann_sift = clustering_with_ransac(flann_sift_path, labels_df, "flann", "sift")

In [17]:
graph_flann_sift

{'pt_sacrecoeur_trevi_tajmahal': <networkx.classes.graph.Graph at 0x7f99af3a1e90>,
 'pt_stpeters_stpauls': <networkx.classes.graph.Graph at 0x7f99a1120a10>,
 'pt_brandenburg_british_buckingham': <networkx.classes.graph.Graph at 0x7f999ed00110>,
 'imc2023_heritage': <networkx.classes.graph.Graph at 0x7f99a31fd110>,
 'imc2024_lizard_pond': <networkx.classes.graph.Graph at 0x7f99abbaac10>,
 'fbk_vineyard': <networkx.classes.graph.Graph at 0x7f999e8e1e90>,
 'imc2024_dioscuri_baalshamin': <networkx.classes.graph.Graph at 0x7f99a6dcfcd0>,
 'amy_gardens': <networkx.classes.graph.Graph at 0x7f99abf89ad0>,
 'imc2023_haiper': <networkx.classes.graph.Graph at 0x7f99abff41d0>,
 'pt_piazzasanmarco_grandplace': <networkx.classes.graph.Graph at 0x7f99ac05cdd0>,
 'stairs': <networkx.classes.graph.Graph at 0x7f99abfb9410>,
 'imc2023_theather_imc2024_church': <networkx.classes.graph.Graph at 0x7f999e7cacd0>,
 'ETs': <networkx.classes.graph.Graph at 0x7f99abfc34d0>}

In [16]:
graph_flann_sift["ETs"].edges(data=True)

EdgeDataView([('outliers_out_et003.png', 'et_et003.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et007.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et008.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et010.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et005.png', {'weight': 7}), ('outliers_out_et003.png', 'outliers_out_et002.png', {'weight': 7}), ('et_et007.png', 'et_et004.png', {'weight': 17}), ('et_et007.png', 'et_et006.png', {'weight': 415}), ('et_et007.png', 'et_et008.png', {'weight': 166}), ('et_et007.png', 'et_et002.png', {'weight': 11}), ('et_et007.png', 'et_et001.png', {'weight': 14}), ('et_et007.png', 'another_et_another_et006.png', {'weight': 7}), ('et_et007.png', 'another_et_another_et001.png', {'weight': 7}), ('et_et007.png', 'et_et005.png', {'weight': 279}), ('et_et007.png', 'et_et003.png', {'weight': 7}), ('et_et007.png', 'et_et000.png', {'weight': 7}), ('et_et003.png', 'et_et004.

In [18]:
cluster_flann_sift

{'pt_sacrecoeur_trevi_tajmahal': {'clusters': {1: ['sacre_coeur_93423882_13476569344.png',
    'sacre_coeur_45927449_5904684394.png',
    'sacre_coeur_90467261_5816977333.png',
    'sacre_coeur_47029087_8959697396.png',
    'sacre_coeur_33340964_112626481.png',
    'sacre_coeur_55695069_9640098822.png',
    'sacre_coeur_86731151_442052253.png',
    'sacre_coeur_90517919_4394638107.png',
    'sacre_coeur_55297752_267282770.png',
    'sacre_coeur_98546957_2200688611.png',
    'sacre_coeur_68663909_1356782012.png',
    'sacre_coeur_80691366_2623658065.png',
    'sacre_coeur_16537684_4545274148.png',
    'sacre_coeur_04739785_8198592553.png',
    'sacre_coeur_55167859_91653543.png',
    'sacre_coeur_70116109_2697195876.png',
    'sacre_coeur_39702035_1808089104.png',
    'sacre_coeur_46031506_3822251715.png',
    'sacre_coeur_68688218_4738908453.png',
    'sacre_coeur_82410562_10997050704.png',
    'sacre_coeur_41224690_4580747259.png',
    'sacre_coeur_34264811_99184810.png',
    'sacre_c

In [19]:
cluster_flann_sift["ETs"]

{'clusters': {1: ['outliers_out_et003.png',
   'outliers_out_et002.png',
   'another_et_another_et010.png',
   'another_et_another_et007.png',
   'another_et_another_et008.png',
   'another_et_another_et009.png'],
  3: ['et_et003.png',
   'et_et001.png',
   'et_et004.png',
   'et_et002.png',
   'et_et000.png'],
  4: ['another_et_another_et006.png',
   'another_et_another_et002.png',
   'another_et_another_et004.png',
   'another_et_another_et003.png',
   'another_et_another_et005.png',
   'another_et_another_et001.png']},
 'outliers': {0: ['outliers_out_et001.png'],
  2: ['et_et007.png', 'et_et006.png', 'et_et008.png', 'et_et005.png']},
 'communities': {'outliers_out_et001.png': 0,
  'outliers_out_et003.png': 1,
  'outliers_out_et002.png': 1,
  'et_et007.png': 2,
  'et_et003.png': 3,
  'et_et006.png': 2,
  'et_et001.png': 3,
  'et_et004.png': 3,
  'et_et002.png': 3,
  'et_et008.png': 2,
  'et_et005.png': 2,
  'et_et000.png': 3,
  'another_et_another_et006.png': 4,
  'another_et_another

In [32]:
with open("evaluation/graphs/flann_with_sift/graph_ETs.json", "r") as f:
    ets_graph = json.load(f)

ets_graph

{'directed': False,
 'multigraph': False,
 'graph': {},
 'nodes': [{'id': 'outliers_out_et001.png'},
  {'id': 'outliers_out_et003.png'},
  {'id': 'outliers_out_et002.png'},
  {'id': 'et_et007.png'},
  {'id': 'et_et003.png'},
  {'id': 'et_et006.png'},
  {'id': 'et_et001.png'},
  {'id': 'et_et004.png'},
  {'id': 'et_et002.png'},
  {'id': 'et_et008.png'},
  {'id': 'et_et005.png'},
  {'id': 'et_et000.png'},
  {'id': 'another_et_another_et006.png'},
  {'id': 'another_et_another_et002.png'},
  {'id': 'another_et_another_et010.png'},
  {'id': 'another_et_another_et004.png'},
  {'id': 'another_et_another_et007.png'},
  {'id': 'another_et_another_et008.png'},
  {'id': 'another_et_another_et003.png'},
  {'id': 'another_et_another_et005.png'},
  {'id': 'another_et_another_et001.png'},
  {'id': 'another_et_another_et009.png'}],
 'edges': [{'weight': 7,
   'source': 'outliers_out_et003.png',
   'target': 'et_et003.png'},
  {'weight': 7,
   'source': 'outliers_out_et003.png',
   'target': 'another_e

In [35]:
ets_graph = nx.node_link_graph(ets_graph, edges="edges")
ets_graph.edges(data=True)

EdgeDataView([('outliers_out_et003.png', 'et_et003.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et007.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et008.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et010.png', {'weight': 7}), ('outliers_out_et003.png', 'another_et_another_et005.png', {'weight': 7}), ('outliers_out_et003.png', 'outliers_out_et002.png', {'weight': 7}), ('et_et007.png', 'et_et004.png', {'weight': 17}), ('et_et007.png', 'et_et006.png', {'weight': 415}), ('et_et007.png', 'et_et008.png', {'weight': 166}), ('et_et007.png', 'et_et002.png', {'weight': 11}), ('et_et007.png', 'et_et001.png', {'weight': 14}), ('et_et007.png', 'another_et_another_et006.png', {'weight': 7}), ('et_et007.png', 'another_et_another_et001.png', {'weight': 7}), ('et_et007.png', 'et_et005.png', {'weight': 279}), ('et_et007.png', 'et_et003.png', {'weight': 7}), ('et_et007.png', 'et_et000.png', {'weight': 7}), ('et_et003.png', 'et_et004.

In [31]:
with open("evaluation/clusters/clusters_flann_with_sift.json", "r") as f:
    sift_cluster = json.load(f)

sift_cluster

{'pt_sacrecoeur_trevi_tajmahal': {'clusters': {'1': ['sacre_coeur_93423882_13476569344.png',
    'sacre_coeur_45927449_5904684394.png',
    'sacre_coeur_90467261_5816977333.png',
    'sacre_coeur_47029087_8959697396.png',
    'sacre_coeur_33340964_112626481.png',
    'sacre_coeur_55695069_9640098822.png',
    'sacre_coeur_86731151_442052253.png',
    'sacre_coeur_90517919_4394638107.png',
    'sacre_coeur_55297752_267282770.png',
    'sacre_coeur_98546957_2200688611.png',
    'sacre_coeur_68663909_1356782012.png',
    'sacre_coeur_80691366_2623658065.png',
    'sacre_coeur_16537684_4545274148.png',
    'sacre_coeur_04739785_8198592553.png',
    'sacre_coeur_55167859_91653543.png',
    'sacre_coeur_70116109_2697195876.png',
    'sacre_coeur_39702035_1808089104.png',
    'sacre_coeur_46031506_3822251715.png',
    'sacre_coeur_68688218_4738908453.png',
    'sacre_coeur_82410562_10997050704.png',
    'sacre_coeur_41224690_4580747259.png',
    'sacre_coeur_34264811_99184810.png',
    'sacre

In [23]:
# create interactive graph

interactive_graph(graph_flann_sift, cluster_flann_sift, os.path.join(visualization_dir, "flann_with_sift"))

evaluation/cluster_visualization/flann_with_sift/visualization_cluster_pt_sacrecoeur_trevi_tajmahal.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_pt_stpeters_stpauls.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_pt_brandenburg_british_buckingham.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_imc2023_heritage.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_imc2024_lizard_pond.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_fbk_vineyard.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_imc2024_dioscuri_baalshamin.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_amy_gardens.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_imc2023_haiper.html
evaluation/cluster_visualization/flann_with_sift/visualization_cluster_pt_piazzasanmarco_grandplace.html
evaluation/cluster_visu

## flann with disk

In [24]:
# clustering: flann with disk
graph_flann_disk, cluster_flann_disk = clustering_with_ransac(flann_disk_path, labels_df, "flann", "disk")

In [25]:
graph_flann_disk

{'fbk_vineyard': <networkx.classes.graph.Graph at 0x7f99ada2d5d0>,
 'pt_sacrecoeur_trevi_tajmahal': <networkx.classes.graph.Graph at 0x7f999eae2a50>,
 'pt_piazzasanmarco_grandplace': <networkx.classes.graph.Graph at 0x7f99abf79410>,
 'imc2024_lizard_pond': <networkx.classes.graph.Graph at 0x7f999d6f8390>,
 'pt_brandenburg_british_buckingham': <networkx.classes.graph.Graph at 0x7f999f30d550>,
 'amy_gardens': <networkx.classes.graph.Graph at 0x7f99adaa2050>,
 'imc2024_dioscuri_baalshamin': <networkx.classes.graph.Graph at 0x7f999ea10f90>,
 'imc2023_haiper': <networkx.classes.graph.Graph at 0x7f999f485590>,
 'pt_stpeters_stpauls': <networkx.classes.graph.Graph at 0x7f999f485b10>,
 'imc2023_heritage': <networkx.classes.graph.Graph at 0x7f999ea34090>,
 'imc2023_theather_imc2024_church': <networkx.classes.graph.Graph at 0x7f999f3ae6d0>,
 'stairs': <networkx.classes.graph.Graph at 0x7f999f44a450>,
 'ETs': <networkx.classes.graph.Graph at 0x7f99abcc4e90>}

In [26]:
graph_flann_disk["ETs"].edges(data=True)

EdgeDataView([('et_et007.png', 'et_et006.png', {'weight': 653}), ('et_et007.png', 'et_et005.png', {'weight': 567}), ('et_et007.png', 'et_et008.png', {'weight': 29}), ('et_et003.png', 'et_et001.png', {'weight': 156}), ('et_et003.png', 'et_et002.png', {'weight': 57}), ('et_et003.png', 'et_et000.png', {'weight': 848}), ('et_et003.png', 'et_et004.png', {'weight': 34}), ('et_et006.png', 'et_et008.png', {'weight': 7}), ('et_et006.png', 'et_et005.png', {'weight': 389}), ('et_et001.png', 'et_et000.png', {'weight': 310}), ('et_et001.png', 'et_et002.png', {'weight': 323}), ('et_et001.png', 'et_et004.png', {'weight': 37}), ('et_et004.png', 'et_et002.png', {'weight': 93}), ('et_et004.png', 'et_et000.png', {'weight': 34}), ('et_et002.png', 'et_et000.png', {'weight': 89}), ('et_et008.png', 'et_et005.png', {'weight': 19}), ('another_et_another_et006.png', 'another_et_another_et008.png', {'weight': 7}), ('another_et_another_et006.png', 'another_et_another_et007.png', {'weight': 144}), ('another_et_ano

In [27]:
cluster_flann_disk

{'fbk_vineyard': {'clusters': {0: ['vineyard_split_2_frame_1260.png',
    'vineyard_split_2_frame_1255.png',
    'vineyard_split_2_frame_1270.png',
    'vineyard_split_2_frame_1265.png'],
   2: ['vineyard_split_2_frame_1245.png',
    'vineyard_split_2_frame_1240.png',
    'vineyard_split_2_frame_1235.png',
    'vineyard_split_2_frame_1230.png'],
   4: ['vineyard_split_2_frame_1205.png',
    'vineyard_split_2_frame_1210.png',
    'vineyard_split_2_frame_1215.png'],
   5: ['vineyard_split_2_frame_1170.png',
    'vineyard_split_2_frame_1165.png',
    'vineyard_split_2_frame_1160.png',
    'vineyard_split_2_frame_1175.png'],
   7: ['vineyard_split_2_frame_1300.png',
    'vineyard_split_2_frame_1290.png',
    'vineyard_split_2_frame_1310.png',
    'vineyard_split_2_frame_1295.png',
    'vineyard_split_2_frame_1305.png',
    'vineyard_split_2_frame_1315.png'],
   8: ['vineyard_split_2_frame_1200.png',
    'vineyard_split_2_frame_1195.png',
    'vineyard_split_2_frame_1190.png',
    'vineyard

In [28]:
cluster_flann_disk["ETs"]

{'clusters': {3: ['et_et007.png',
   'et_et006.png',
   'et_et008.png',
   'et_et005.png'],
  5: ['et_et003.png',
   'et_et001.png',
   'et_et004.png',
   'et_et002.png',
   'et_et000.png'],
  6: ['another_et_another_et006.png',
   'another_et_another_et007.png',
   'another_et_another_et008.png',
   'another_et_another_et009.png'],
  7: ['another_et_another_et002.png',
   'another_et_another_et004.png',
   'another_et_another_et003.png',
   'another_et_another_et005.png',
   'another_et_another_et001.png']},
 'outliers': {0: ['outliers_out_et001.png'],
  1: ['outliers_out_et003.png'],
  2: ['outliers_out_et002.png'],
  4: ['another_et_another_et010.png']},
 'communities': {'outliers_out_et001.png': 0,
  'outliers_out_et003.png': 1,
  'outliers_out_et002.png': 2,
  'et_et007.png': 3,
  'et_et003.png': 5,
  'et_et006.png': 3,
  'et_et001.png': 5,
  'et_et004.png': 5,
  'et_et002.png': 5,
  'et_et008.png': 3,
  'et_et005.png': 3,
  'et_et000.png': 5,
  'another_et_another_et006.png': 6,


In [36]:
with open("evaluation/graphs/flann_with_disk/graph_ETs.json") as f:
    ets_disk_graph = json.load(f)

ets_disk_graph

{'directed': False,
 'multigraph': False,
 'graph': {},
 'nodes': [{'id': 'outliers_out_et001.png'},
  {'id': 'outliers_out_et003.png'},
  {'id': 'outliers_out_et002.png'},
  {'id': 'et_et007.png'},
  {'id': 'et_et003.png'},
  {'id': 'et_et006.png'},
  {'id': 'et_et001.png'},
  {'id': 'et_et004.png'},
  {'id': 'et_et002.png'},
  {'id': 'et_et008.png'},
  {'id': 'et_et005.png'},
  {'id': 'et_et000.png'},
  {'id': 'another_et_another_et006.png'},
  {'id': 'another_et_another_et002.png'},
  {'id': 'another_et_another_et010.png'},
  {'id': 'another_et_another_et004.png'},
  {'id': 'another_et_another_et007.png'},
  {'id': 'another_et_another_et008.png'},
  {'id': 'another_et_another_et003.png'},
  {'id': 'another_et_another_et005.png'},
  {'id': 'another_et_another_et001.png'},
  {'id': 'another_et_another_et009.png'}],
 'edges': [{'weight': 653, 'source': 'et_et007.png', 'target': 'et_et006.png'},
  {'weight': 567, 'source': 'et_et007.png', 'target': 'et_et005.png'},
  {'weight': 29, 'sou

In [37]:
with open("evaluation/clusters/clusters_flann_with_disk.json", "r") as f:
    disk_cluster = json.load(f)

disk_cluster

{'fbk_vineyard': {'clusters': {'0': ['vineyard_split_2_frame_1260.png',
    'vineyard_split_2_frame_1255.png',
    'vineyard_split_2_frame_1270.png',
    'vineyard_split_2_frame_1265.png'],
   '2': ['vineyard_split_2_frame_1245.png',
    'vineyard_split_2_frame_1240.png',
    'vineyard_split_2_frame_1235.png',
    'vineyard_split_2_frame_1230.png'],
   '4': ['vineyard_split_2_frame_1205.png',
    'vineyard_split_2_frame_1210.png',
    'vineyard_split_2_frame_1215.png'],
   '5': ['vineyard_split_2_frame_1170.png',
    'vineyard_split_2_frame_1165.png',
    'vineyard_split_2_frame_1160.png',
    'vineyard_split_2_frame_1175.png'],
   '7': ['vineyard_split_2_frame_1300.png',
    'vineyard_split_2_frame_1290.png',
    'vineyard_split_2_frame_1310.png',
    'vineyard_split_2_frame_1295.png',
    'vineyard_split_2_frame_1305.png',
    'vineyard_split_2_frame_1315.png'],
   '8': ['vineyard_split_2_frame_1200.png',
    'vineyard_split_2_frame_1195.png',
    'vineyard_split_2_frame_1190.png',
 

In [38]:
# create interactive graph

interactive_graph(graph_flann_disk, cluster_flann_disk, os.path.join(visualization_dir, "flann_with_disk"))

evaluation/cluster_visualization/flann_with_disk/visualization_cluster_fbk_vineyard.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_pt_sacrecoeur_trevi_tajmahal.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_pt_piazzasanmarco_grandplace.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_imc2024_lizard_pond.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_pt_brandenburg_british_buckingham.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_amy_gardens.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_imc2024_dioscuri_baalshamin.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_imc2023_haiper.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_pt_stpeters_stpauls.html
evaluation/cluster_visualization/flann_with_disk/visualization_cluster_imc2023_heritage.html
evaluation/cluster_visu