In [5]:
#Python 3.11.6
import torch
import torch_geometric
import torch_geometric.transforms as T
from torch_geometric.datasets import TUDataset
# from rewiring_files import PrecomputeGTREdges, AddPrecomputedGTREdges 
import sys
import networkx as nx
import pandas as pd
from IPython.display import display
import numpy as np
from scipy.sparse.csgraph import laplacian
from scipy.linalg import pinv, eigvalsh
from scipy.stats import wasserstein_distance
from grakel.kernels import GraphletSampling
from grakel import Graph
from evaluation.metrics import *


In [2]:
#Author: Mitchell Black
def gtr_nb_rewire(name):
    # precompute 30 edges with the gtr algorithm
    pre_transform = T.Compose([PrecomputeGTREdges(num_edges=30)])
    # add 20 of the precomputed edges to the graph
    transform = T.Compose([AddPrecomputedGTREdges(num_edges=20)])
    # load the dataset
    dataset = TUDataset(
        root="/tmp/",
        name=name,
        transform=transform,
        pre_transform=pre_transform
    )

    # Check that 60 edges have been precomputed for each graph.
    # (AddPrecomputedGTREdges adds both direction of an edge,
    # which is why we check that 60, not 30, edges have been precomputed.)
    if all([
        hasattr(data, "precomputed_gtr_edges") and data.precomputed_gtr_edges.shape[1] == 60
        for data in dataset
    ]):
        print("Edges succesfully precomputed!")
    # Load the dataset without any added edges
    dataset_wo_edges = TUDataset(
        root="/tmp/",
        name=name,
        pre_transform=pre_transform
    )

    # Check that 40 edges have been added to each graph in the dataset
    if all([ 
        (data.edge_index.shape[1]-data_wo_edges.edge_index.shape[1]) == 40
        for data, data_wo_edges 
        in zip(dataset, dataset_wo_edges) 
    ]):
        print("Edges succesfully added!")

    return dataset

In [3]:
datasets_small = ["ENZYMES", "MUTAG","PROTEINS", "IMDB-BINARY"]
datasets_large = ["REDDIT-BINARY", "COLLAB"]
rewired_small = {}
rewired_large = {}
for dataset in datasets_small:
    print(f"Name of dataset: {dataset}")
    new_ds = gtr_nb_rewire(dataset)
    rewired_small[dataset] = new_ds
    get_avg_metrics(new_ds)
    print()


Name of dataset: ENZYMES
Edges succesfully precomputed!
Edges succesfully added!


  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))


Diameter: Mean: 3.535 Std Dev: 1.1469270537687506
Effective Resistance: Mean: 0.4112668657232636 Std Dev: 0.18082309372095984
Modularity: Mean: 0.3497999507228292 Std Dev: 0.14105036957993067
Assortativity: Mean: -0.006547111595073569 Std Dev: 0.13440028675888047
Clustering Coefficient: Mean: 0.256513576076219 Std Dev: 0.16055617434720795
Spectral Gap: Mean: 0.3562393439239681 Std Dev: 0.22744718920874757
Forman Curvature: Mean: -6.593333333333334 Std Dev: 1.9548117272230818
Average Betweenness Centrality: Mean: 0.03761755543283413 Std Dev: 0.008519807360555659

Name of dataset: MUTAG
Edges succesfully precomputed!
Edges succesfully added!
Diameter: Mean: 2.8191489361702127 Std Dev: 0.5735808751790882
Effective Resistance: Mean: 0.4225860535454092 Std Dev: 0.06086363817551866
Modularity: Mean: 0.18065327873311493 Std Dev: 0.06627845520323654
Assortativity: Mean: -0.1509660276248219 Std Dev: 0.1368733288089425
Clustering Coefficient: Mean: 0.06050058575252487 Std Dev: 0.1066892730438672

In [None]:
#This will take a long time (75 min for RB, could not complete for COLLAB)
for dataset in datasets_large:
    print(f"Name of dataset: {dataset}")
    new_ds = gtr_nb_rewire(dataset)
    rewired_large[dataset] = new_ds
    get_avg_metrics(new_ds)
    print()

In [4]:
for name in datasets_small:
    #IMDB-BINARY takes upwards of 137 minutes
    if name=="IMDB-BINARY":
        break
    orig_dataset = TUDataset(root="./tmp/", name=name)
    rew_ds = rewired_small[name]

    print(f"Comparison metrics for {name}\n")
    comparison_metrics(orig_dataset, rew_ds)

Comparison metrics for ENZYMES

Graph Edit Distance: Mean: 19.76 Std Dev: 1.8918421357678514
Jaccard Similarity: Mean: 0.733286779600982 Std Dev: 0.08948155704355801
Laplacian Spectrum Distance: Mean: 7.841891656031461 Std Dev: 1.9608887056755382
Adjacency Spectrum Distance: Mean: 2.7198322369461225 Std Dev: 0.8552779275289724
Spectral Norm of Adjacency Difference: Mean: 2.086264593417396 Std Dev: 0.6284507411012444
Degree Distribution Distance: Mean: 1.4816537063911133 Std Dev: 0.7316251640677282
Graphlet Kernel Distance: Mean: -46338.02333333333 Std Dev: 53918.43603798972
Shortest Path Length Distribution Difference: Mean: 2.1894229715686047 Std Dev: 1.2317115049725367
Comparison metrics for MUTAG





Graph Edit Distance: Mean: 20.0 Std Dev: 0.0
Jaccard Similarity: Mean: 0.48688704453515 Std Dev: 0.07430197472862302
Laplacian Spectrum Distance: Mean: 10.148957901358457 Std Dev: 1.4283764278945141
Adjacency Spectrum Distance: Mean: 3.2571504327977623 Std Dev: 0.5254615549689234
Spectral Norm of Adjacency Difference: Mean: 2.8124667077007106 Std Dev: 0.5587458065950631
Degree Distribution Distance: Mean: 2.391937043997543 Std Dev: 0.6515836091431447
Graphlet Kernel Distance: Mean: -1966.8297872340424 Std Dev: 1565.3972178814608
Shortest Path Length Distribution Difference: Mean: 1.688117305630879 Std Dev: 0.390638951265121
Comparison metrics for PROTEINS

Graph Edit Distance: Mean: 18.80053908355795 Std Dev: 4.056620787814343
Jaccard Similarity: Mean: 0.7156529836445921 Std Dev: 0.13107047970179714
Laplacian Spectrum Distance: Mean: 7.934569063545286 Std Dev: 2.9039981599511355
Adjacency Spectrum Distance: Mean: 2.8761356549025328 Std Dev: 1.1992104332864657
Spectral Norm of Adjacency