In [2]:
import sys
print(sys.executable)

/usr/local/bin/python3.12


In [3]:
import torch

In [4]:
import torch_geometric
print(torch.__version__)
print(torch_geometric.__version__)

2.6.0
2.6.1


In [7]:
#Attempt to download reddit-binary graph dataset
from torch_geometric.datasets import TUDataset

# Load the REDDIT-BINARY dataset
red_dataset = TUDataset(root='./data', name='REDDIT-BINARY')

OSError: [Errno 30] Read-only file system: '//data'

In [6]:
#Get other dependencies
import networkx as nx
import numpy as np
from scipy.sparse.csgraph import laplacian
from scipy.linalg import pinv, eigvalsh

redGraph = red_dataset[0]
edge_index = redGraph.edge_index.numpy().T
G_red = nx.Graph()
G_red.add_edges_from(edge_index)

In [7]:
#Get graph diameter
if nx.is_connected(G_red):
    diameter = nx.diameter(G_red)
else:
    diameter = max(nx.diameter(G_red.subgraph(c)) for c in nx.connected_components(G_red))

print("Reddit Graph Diameter:", diameter)

Reddit Graph Diameter: 8


In [8]:
#Function to get diameter

def get_diameter(G):
    if nx.is_connected(G):
        diameter = nx.diameter(G)
    else:
        diameter = max(nx.diameter(G.subgraph(c)) for c in nx.connected_components(G))

    return diameter

In [9]:
#Function to convert dataset to a NetworkX Representation
def make_G(dataset):
    graph = dataset[0]
    edge_index = graph.edge_index.numpy().T
    G = nx.Graph()
    G.add_edges_from(edge_index)

    return G

In [10]:
red_G = make_G(red_dataset)
print(get_diameter(red_G))

8


In [11]:
#Function to get effective resistance
def get_eff_res(G):
    nodes = list(G.nodes())
    u = nodes[0]
    v = nodes[1]

    L = laplacian(nx.to_numpy_array(G), normed=False)
    L_pinv = pinv(L)
    return L_pinv[u, u] + L_pinv[v, v] - 2 * L_pinv[u, v]

In [12]:
print(get_eff_res(red_G))

3.0027688909523844


In [12]:
#Function to get modularity
from networkx.algorithms.community import greedy_modularity_communities

def get_modularity(G):
    communities = list(greedy_modularity_communities(G))
    modularity = nx.algorithms.community.modularity(G, communities)
    return modularity

In [13]:
print(get_modularity(red_G))

0.589201388888889


In [14]:
#Function to get Graph Assortativity
def get_assort(G):
    assortativity = nx.degree_assortativity_coefficient(G)
    return assortativity

In [15]:
print(get_assort(red_G))

-0.4046069146077361


In [16]:
#Function to get clustering coefficient
def get_clust_coeff(G):
    clustering_coeff = nx.average_clustering(G)
    return clustering_coeff

In [17]:
#Function to get Spectral Gap
def get_spec_gap(G):
    L = laplacian(nx.to_numpy_array(G), normed=True)
    eigenvalues = eigvalsh(L)
    spectral_gap = eigenvalues[1]
    return spectral_gap

In [18]:
#Function to get curvature
def get_Forman_curve(G):
    curvature = {}
    for u, v in G.edges():
        k_u = G.degree[u]
        k_v = G.degree[v]
        curvature[(u, v)] = 4 - (k_u + k_v)

        avg_curvature = np.mean(list(curvature.values()))
        return avg_curvature

In [19]:
print("Clustering Coefficient:", get_clust_coeff(red_G))
print("Spectral Gap:", get_spec_gap(red_G))
print("Forman Curvature:", get_Forman_curve(red_G))

Clustering Coefficient: 0.04106324376158408
Spectral Gap: 6.333210574685239e-17
Forman Curvature: 0.0


In [20]:
#Function to get average betweenness centrality
def get_bet_cent(G):
    bet_cent = nx.betweenness_centrality(G)
    avg_bet = sum(bet_cent.values()) / len(bet_cent)
    return avg_bet

In [21]:
print("Average Betweenness Centrality:", get_bet_cent(red_G))

Average Betweenness Centrality: 0.00911383099160548


In [22]:
#Overall function to complete all metrics for a specific dataset
def get_metrics(dataset):
    G = make_G(dataset)

    print("Diameter: ", get_diameter(G))
    print("Effective Resistance: ", get_eff_res(G))
    print("Modularity: ", get_modularity(G))
    print("Assortativity: ", get_assort(G))
    print("Clustering Coefficient:", get_clust_coeff(G))
    print("Spectral Gap:", get_spec_gap(G))
    print("Forman Curvature:", get_Forman_curve(G))
    print("Average Betweenness Centrality:", get_bet_cent(G))

In [23]:
get_metrics(red_dataset)

Diameter:  8
Effective Resistance:  3.0027688909523844
Modularity:  0.589201388888889
Assortativity:  -0.4046069146077361
Clustering Coefficient: 0.04106324376158408
Spectral Gap: 6.333210574685239e-17
Forman Curvature: 0.0
Average Betweenness Centrality: 0.00911383099160548


In [1]:
#Complete for all other datasets
datasets = ["REDDIT-BINARY", "IMDB-BINARY", "MUTAG", "ENZYMES", "PROTEINS", "COLLAB"]

data_dict = {name: TUDataset(root=f'./data/{name}', name=name) for name in datasets}

for name, dataset in data_dict.items():
    print(f"Dataset: {name}")
    get_metrics(dataset)

NameError: name 'TUDataset' is not defined