In [40]:
import sys
import torch
import torch_geometric
import networkx as nx
import pandas as pd
from IPython.display import display
from torch_geometric.datasets import TUDataset
import numpy as np
from scipy.sparse.csgraph import laplacian
from scipy.linalg import pinv, eigvalsh
#Testing done with Python 3.12, Torch 2.6.0, Torch_Geometric 2.6.1
print(sys.executable)
print(torch.__version__)
print(torch_geometric.__version__)

/usr/local/bin/python3.12
2.6.0
2.6.1


In [28]:
#Function to convert dataset to a NetworkX Representation
def make_G(dataset):
    graph = dataset[0]
    edge_index = graph.edge_index.numpy().T
    G = nx.Graph()
    G.add_edges_from(edge_index)

    return G

In [29]:
#Function to get diameter
def get_diameter(G):
    if nx.is_connected(G):
        diameter = nx.diameter(G)
    else:
        diameter = max(nx.diameter(G.subgraph(c)) for c in nx.connected_components(G))

    return diameter

In [30]:
#Function to get effective resistance
def get_eff_res(G):
    nodes = list(G.nodes())
    u = nodes[0]
    v = nodes[1]

    L = laplacian(nx.to_numpy_array(G), normed=False)
    L_pinv = pinv(L)
    return L_pinv[u, u] + L_pinv[v, v] - 2 * L_pinv[u, v]

In [31]:
#Function to get modularity
from networkx.algorithms.community import greedy_modularity_communities

def get_modularity(G):
    communities = list(greedy_modularity_communities(G))
    modularity = nx.algorithms.community.modularity(G, communities)
    return modularity

In [32]:
#Function to get Graph Assortativity
def get_assort(G):
    assortativity = nx.degree_assortativity_coefficient(G)
    return assortativity

In [33]:
#Function to get clustering coefficient
def get_clust_coeff(G):
    clustering_coeff = nx.average_clustering(G)
    return clustering_coeff

In [34]:
#Function to get Spectral Gap
def get_spec_gap(G):
    L = laplacian(nx.to_numpy_array(G), normed=True)
    eigenvalues = eigvalsh(L)
    spectral_gap = eigenvalues[1]
    return spectral_gap

In [35]:
#Function to get curvature
def get_Forman_curve(G):
    curvature = {}
    for u, v in G.edges():
        k_u = G.degree[u]
        k_v = G.degree[v]
        curvature[(u, v)] = 4 - (k_u + k_v)

        avg_curvature = np.mean(list(curvature.values()))
        return avg_curvature

In [36]:
#Function to get average betweenness centrality
def get_bet_cent(G):
    bet_cent = nx.betweenness_centrality(G)
    avg_bet = sum(bet_cent.values()) / len(bet_cent)
    return avg_bet

In [37]:
#Overall function to complete all metrics for a specific dataset
def get_metrics(dataset):
    G = make_G(dataset)

    print("Diameter: ", get_diameter(G))
    print("Effective Resistance: ", get_eff_res(G))
    print("Modularity: ", get_modularity(G))
    print("Assortativity: ", get_assort(G))
    print("Clustering Coefficient:", get_clust_coeff(G))
    print("Spectral Gap:", get_spec_gap(G))
    print("Forman Curvature:", get_Forman_curve(G))
    print("Average Betweenness Centrality:", get_bet_cent(G))

In [53]:
def get_metrics_table(dataset, name):
    G = make_G(dataset)

    metrics = {
        "Diameter": get_diameter(G),
        "Effective Resistance": get_eff_res(G),
        "Modularity": get_modularity(G),
        "Assortativity": get_assort(G),
        "Clustering Coefficient": get_clust_coeff(G),
        "Spectral Gap": get_spec_gap(G),
        "Forman Curvature": get_Forman_curve(G),
        "Average Betweenness Centrality": get_bet_cent(G),
    }

    # Convert dictionary to a pandas DataFrame
    df = pd.DataFrame(metrics.items(), columns=["Metric", name])

    # Round to 3 decimal places
    df[name] = df[name].round(5)

    # Display the table
    display(df)

In [54]:
dataset = TUDataset(root="./tmp/", name="REDDIT-BINARY")
get_metrics_table(dataset, name="REDDIT-BINARY")


Unnamed: 0,Metric,REDDIT-BINARY
0,Diameter,8.0
1,Effective Resistance,3.00277
2,Modularity,0.5892
3,Assortativity,-0.40461
4,Clustering Coefficient,0.04106
5,Spectral Gap,0.0
6,Forman Curvature,0.0
7,Average Betweenness Centrality,0.00911


In [55]:
#Complete for all other datasets
datasets = ["REDDIT-BINARY", "IMDB-BINARY", "MUTAG", "ENZYMES", "PROTEINS", "COLLAB"]

data_dict = {name: TUDataset(root=f'./data/{name}', name=name) for name in datasets}

for name, dataset in data_dict.items():
    print(f"Dataset: {name}")
    get_metrics_table(dataset, name=name)

Dataset: REDDIT-BINARY


Unnamed: 0,Metric,REDDIT-BINARY
0,Diameter,8.0
1,Effective Resistance,3.00277
2,Modularity,0.5892
3,Assortativity,-0.40461
4,Clustering Coefficient,0.04106
5,Spectral Gap,0.0
6,Forman Curvature,0.0
7,Average Betweenness Centrality,0.00911


Dataset: IMDB-BINARY


Unnamed: 0,Metric,IMDB-BINARY
0,Diameter,2.0
1,Effective Resistance,0.33333
2,Modularity,0.30175
3,Assortativity,-0.16297
4,Clustering Coefficient,0.90291
5,Spectral Gap,0.17619
6,Forman Curvature,-20.0
7,Average Betweenness Centrality,0.03421


Dataset: MUTAG


Unnamed: 0,Metric,MUTAG
0,Diameter,9.0
1,Effective Resistance,0.82843
2,Modularity,0.45152
3,Assortativity,-0.20635
4,Clustering Coefficient,0.0
5,Spectral Gap,0.06421
6,Forman Curvature,0.0
7,Average Betweenness Centrality,0.17451


Dataset: ENZYMES


Unnamed: 0,Metric,ENZYMES
0,Diameter,12.0
1,Effective Resistance,0.45019
2,Modularity,0.59361
3,Assortativity,0.04587
4,Clustering Coefficient,0.56538
5,Spectral Gap,0.01007
6,Forman Curvature,-4.0
7,Average Betweenness Centrality,0.11369


Dataset: PROTEINS


Unnamed: 0,Metric,PROTEINS
0,Diameter,14.0
1,Effective Resistance,1.72185
2,Modularity,0.65783
3,Assortativity,-0.06979
4,Clustering Coefficient,0.34167
5,Spectral Gap,0.01167
6,Forman Curvature,-2.0
7,Average Betweenness Centrality,0.10912


Dataset: COLLAB


  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))


Unnamed: 0,Metric,COLLAB
0,Diameter,1.0
1,Effective Resistance,0.04444
2,Modularity,0.0
3,Assortativity,
4,Clustering Coefficient,1.0
5,Spectral Gap,1.02273
6,Forman Curvature,-84.0
7,Average Betweenness Centrality,0.0


In [57]:
from gtr_rewire import rewire_gtr

rewired_MUTAG = rewire_gtr(name='MUTAG', num_edges=30, add_edges=20)
get_metrics(rewired_MUTAG)

Edges succesfully precomputed!
Edges succesfully added!
Diameter:  3
Effective Resistance:  0.41910534208433964
Modularity:  0.1811308349769888
Assortativity:  -0.33714285714283415
Clustering Coefficient: 0.0
Spectral Gap: 0.5513776500346972
Forman Curvature: -5.0
Average Betweenness Centrality: 0.05098039215686274


In [59]:
for name, dataset in data_dict.items():
    print(f"Dataset: {name}")
    try:
        rewired_curData = rewire_gtr(name=name, num_edges=30, add_edges=20)
    except Exception as e:
        print(f"Issue with dataset: {name}")
        
    get_metrics_table(rewired_curData, name=name+" GTR Rewired")

Dataset: REDDIT-BINARY
Issue with dataset: REDDIT-BINARY




Unnamed: 0,Metric,REDDIT-BINARY GTR Rewired
0,Diameter,1.0
1,Effective Resistance,0.04444
2,Modularity,0.00094
3,Assortativity,-0.00051
4,Clustering Coefficient,1.0
5,Spectral Gap,1.02273
6,Forman Curvature,-86.0
7,Average Betweenness Centrality,0.0


Dataset: IMDB-BINARY
Edges succesfully precomputed!
Edges succesfully added!


Unnamed: 0,Metric,IMDB-BINARY GTR Rewired
0,Diameter,2.0
1,Effective Resistance,0.2276
2,Modularity,0.21066
3,Assortativity,-0.09048
4,Clustering Coefficient,0.5986
5,Spectral Gap,0.44786
6,Forman Curvature,-23.0
7,Average Betweenness Centrality,0.02836


Dataset: MUTAG
Edges succesfully precomputed!
Edges succesfully added!


Unnamed: 0,Metric,MUTAG GTR Rewired
0,Diameter,3.0
1,Effective Resistance,0.41911
2,Modularity,0.18113
3,Assortativity,-0.33714
4,Clustering Coefficient,0.0
5,Spectral Gap,0.55138
6,Forman Curvature,-5.0
7,Average Betweenness Centrality,0.05098


Dataset: ENZYMES
Edges succesfully precomputed!
Edges succesfully added!


Unnamed: 0,Metric,ENZYMES GTR Rewired
0,Diameter,4.0
1,Effective Resistance,0.33947
2,Modularity,0.42941
3,Assortativity,0.03763
4,Clustering Coefficient,0.32806
5,Spectral Gap,0.24227
6,Forman Curvature,-7.0
7,Average Betweenness Centrality,0.03724


Dataset: PROTEINS
Edges succesfully precomputed!
Edges succesfully added!


Unnamed: 0,Metric,PROTEINS GTR Rewired
0,Diameter,4.0
1,Effective Resistance,0.69098
2,Modularity,0.45584
3,Assortativity,-0.04714
4,Clustering Coefficient,0.19692
5,Spectral Gap,0.21008
6,Forman Curvature,-5.0
7,Average Betweenness Centrality,0.03856


Dataset: COLLAB
Edges succesfully precomputed!
Edges succesfully added!


Unnamed: 0,Metric,COLLAB GTR Rewired
0,Diameter,1.0
1,Effective Resistance,0.04444
2,Modularity,0.00094
3,Assortativity,-0.00051
4,Clustering Coefficient,1.0
5,Spectral Gap,1.02273
6,Forman Curvature,-86.0
7,Average Betweenness Centrality,0.0
