# Experiments for Graph Matching
This notebook contains the experiments to compare the graph matching results via embeddings produced by a GNN to graph matching results via embeddings that are represented by temporal centralities. 

In [1]:
import numpy as np

In [2]:
temporal_betweenness_vectors_sources = np.load('../data/temporal_betweenness_vectors_overtime.npy')
temporal_closeness_vectors_sources = np.load('../data/temporal_closeness_vectors_overtime.npy')
temporal_degree_vectors_sources = np.load('../data/temporal_degree_vectors_overtime.npy')

In [3]:
# TODO load temporal centralities vectors for targets (by Max)
temporal_betweenness_vectors_targets = np.load('temporal_betweennesses_unique.npy')
temporal_closeness_vectors_targets = np.load('temporal_closenesses_unique.npy')
temporal_degree_vectors_targets = np.load('temporal_degrees_unique.npy')

In [None]:
# TODO: load encoding vectors from GNN (by Til) for sources and targets

In [4]:
num_sources = len(temporal_betweenness_vectors_sources)
assert len(temporal_closeness_vectors_sources) == len(temporal_degree_vectors_sources) and len(temporal_closeness_vectors_sources) == num_sources
# TODO: do the same for targets and other sanity checks
num_targets = len(temporal_betweenness_vectors_targets)
assert len(temporal_closeness_vectors_targets) == len(temporal_degree_vectors_targets) and len(temporal_closeness_vectors_targets) == num_targets


## Comparison of GNN to temporal betweenness

In [30]:
from scipy.spatial import distance
from tqdm import tqdm
from sklearn.metrics import dcg_score

In [None]:
# TODO: do the graph matching ranking (as Til implemented), compare rankings of GNN and TCMs (comparison of top-k via DCG where the TCM is the truth?)
matching_scores_GNN_betweenness = []
euc_distances_betweenness = {}

for source_idx in tqdm(range(5)):
    source_temporal_closeness = temporal_closeness_vectors_sources[source_idx]
    #source_GNN = ...
    euc_distances_betweenness[source_idx] = []

    for target_idx in range(5):
        target_temporal_betweenness = temporal_betweenness_vectors_targets[target_idx]
        euc_distances_betweenness[source_idx].append(distance.euclidean(source_temporal_betweenness, target_temporal_betweenness))
    # TODO: do the graph matching for betweenness


    #source_GNN = GNN_vectors_sources[source_idx]
    # TODO: do the graph matching for GNN

    # TODO: compare both matchings and compute a score
    #matching_scores_GNN_betweenness.append(score)
# TODO: somehow aggregate the scores and generate a total score (average? histograms?)



In [25]:
# TODO: do the graph matching ranking (as Til implemented), compare rankings of GNN and TCMs (comparison of top-k via DCG where the TCM is the truth?)
matching_scores_GNN_closeness = []
euc_distances_closeness = {}

for source_idx in tqdm(range(5)):
    source_temporal_closeness = temporal_closeness_vectors_sources[source_idx]
    #source_GNN = ...
    euc_distances_closeness[source_idx] = []

    for target_idx in range(5):
        target_temporal_closeness = temporal_closeness_vectors_targets[target_idx]
        euc_distances_closeness[source_idx].append(distance.euclidean(source_temporal_closeness, target_temporal_closeness))
    
    # TODO: do the graph matching for closeness
    #rank vectors with index ranking based on euclidean distance
    ranking_tcms = ...


    #source_GNN = GNN_vectors_sources[source_idx]
    # TODO: do the graph matching for GNN
    ranking_gnn = ...

    # TODO: compare both matchings and compute a score
    matching_score = dcg_score(y_true=ranking_tcms, y_score=ranking_gnn)
    matching_scores_GNN_closeness.append(matching_score)
# TODO: somehow aggregate the scores and generate a total score (average? histograms?)




100%|██████████| 5/5 [00:00<00:00, 3342.07it/s]


In [26]:
distance.euclidean(temporal_closeness_vectors_sources[0], temporal_closeness_vectors_targets[0])

1.0019512213675874

In [27]:
temporal_closeness_vectors_sources[:5]

array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        1. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        1. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        1.5, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        1. , 1. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 1.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        1. , 0. , 0. , 0. ]])

In [28]:
temporal_closeness_vectors_targets[:5]

array([[0.        , 0.        , 0.        , 0.0625    , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.125     , 0.        ,
        0.        , 0.        , 0.06666667, 0.06666667, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.06666667],
       [0.        , 0.        , 0.        , 0.1875    , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.13809524],
       [0.07692308, 0.        , 0.        , 0.25      , 0.        ,
        0.07692308, 0.        , 0.        , 0.07692308, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.2047619 ],
       [0.16025641, 0.        , 0.        , 0.3125  

In [29]:
euc_distances_closeness

{0: [1.0019512213675874,
  1.0143758343598952,
  1.026755348067192,
  1.0593294657148287,
  1.0994754003353842],
 1: [1.4155939566132656,
  1.4244150846341572,
  1.4332573198084775,
  1.4029728304872873,
  1.4335271194143833],
 2: [1.5013015186830392,
  1.5096219173466359,
  1.5179678997872716,
  1.5401879485737007,
  1.5680708389427607],
 3: [1.733178077982756,
  1.740390281900394,
  1.7476345569896987,
  1.7228850115679544,
  1.747855829894588],
 4: [1.8038587112077265,
  1.8107894226920294,
  1.8177531583756306,
  1.7724022359956653,
  1.7966849821750674]}

In [None]:
# TODO: do the same for closeness and degree

In [None]:
# TODO: compare scores for closeness, betweenness and degree and find out something meaningful (regarding the use cases)