In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append("src")

In [3]:
from ogb.nodeproppred                     import PygNodePropPredDataset
from pathlib                              import Path
from sklearn.metrics                      import adjusted_mutual_info_score
from torch_geometric.data                 import Data
from torch_geometric.datasets             import Amazon, Coauthor, Planetoid
from torch_geometric_signed_directed.data import Cora_ml, WikiCS
from training                             import Neuromap, DMoN, MinCut, Ortho, DiffPool, NOCD
from util                                 import *

import json
import numpy   as np
import pandas  as pd
import seaborn as sb
import torch

In [4]:
palette = sb.color_palette("colorblind")
sb.color_palette(palette)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [6]:
def run( data
       , methods = [Neuromap]
       , models  = [ ("lin",  [1e-1])
                   , ("mlp",  [1e-2])
                   , ("gin",  [1e-3])
                   , ("gcn",  [1e-3])
                   , ("sage", [1e-3])
                   ]
       , num_trials      : int  = 25
       , lr_schedule     : bool = False
       , verbose         : bool = False
       , fixed_size_arch : bool = False
       ):
    y_true = data.y.cpu().numpy()

    if fixed_size_arch:
        hidden_channels = 512
        out_channels    = data.num_classes
    else:
        hidden_channels = int(round(4*np.sqrt(data.num_nodes)))
        out_channels    = int(round(np.sqrt(data.num_nodes)))

    res_str = "method,arch,avg ami,std ami,avg m,std m\n"

    for model, lrs in models:
        print(model)
        print("=====")
        for C in methods:
            out = Path(f"./results/real-world{'-fixed-size-arch' if fixed_size_arch else ''}/{C.__name__}-{model}-{data.name}.json")

            if out.exists():
                with open(out, "r") as fh:
                    res = json.load(fh)
            else:
                res = dict()
            
            for trial in range(1, num_trials + 1):
                if str(trial) not in res:
                    clusterer = C(device = device, use_model = model, in_channels = data.num_node_features, hidden_channels = hidden_channels, out_channels = out_channels, num_layers = 2, dropout = 0.5)
                    l, s      = clusterer.fit(data = data, epochs = 10000, patience = 100, lrs = lrs, lr_schedule = lr_schedule, num_trials = 1, verbose = verbose)
                    if l is not None and s is not None:
                        y_pred = get_hard_clusters(s)

                        ami = adjusted_mutual_info_score(y_true, y_pred)
                        m   = len(set(y_pred))

                        res[str(trial)] = dict(loss = l, y_pred = y_pred, ami = ami, m = m)

                        with open(out, "w") as fh:
                            json.dump(res, fh)
        
            if len(res) > 0:
                best_result = min(res.values(), key = lambda r: r["loss"])
                avg_ami     = np.average([v['ami'] for v in res.values()])
                std_ami     = np.std([v['ami'] for v in res.values()])
                avg_m       = np.average([v['m'] for v in res.values()])
                std_m       = np.std([v['m'] for v in res.values()])
                print(f"[{C.__name__:10}] <AMI> = {100 * avg_ami:4.1f} ± {100 * std_ami:4.1f}, <|M|> = {avg_m:6.1f} ± {std_m:6.1f}, AMI* = {100 * best_result['ami']:4.1f}, |M|* = {best_result['m']:6}")
                res_str += f"{C.__name__},{model},{100 * avg_ami:.1f},{100 * std_ami:.1f},{avg_m:.1f},{std_m:.1f}\n"
    
    with open(f"./results/real-world{'-fixed-size-arch' if fixed_size_arch else ''}/{data.name}.csv", "w") as fh:
        fh.write(res_str)

In [7]:
def run_infomap(data, directed, num_trials = 25, fixed_size_arch = False):
    G      = data_to_networkx(data, directed = directed)
    y_true = data.y.cpu().numpy()

    np.random.seed(47)
    seeds = np.random.randint(0,100000, size = 25)

    out = Path(f"./results/real-world{'-fixed-size-arch' if fixed_size_arch else ''}/Infomap-{data.name}.json")

    if out.exists():
        with open(out, "r") as fh:
            res = json.load(fh)
    else:
        res = dict()

    for trial, seed in enumerate(seeds, start = 1):
        if str(trial) not in res:
            im = Infomap(silent = True, two_level = True, num_trials = 1, seed = seed)
            im.add_networkx_graph(G)
            im.run()
            modules = dict(im.modules)

            y_pred = [modules[u] for u in sorted(G.nodes)]
            ami    = adjusted_mutual_info_score(y_true, y_pred)
            m      = len(set(y_pred))
        
            res[str(trial)] = dict(loss = im.codelength, y_pred = y_pred, ami = ami, m = m)

            with open(out, "w") as fh:
                json.dump(res, fh)

    best_result = min(res.values(), key = lambda r: r["loss"])
    avg_ami     = np.average([v['ami'] for v in res.values()])
    std_ami     = np.std([v['ami'] for v in res.values()])
    avg_m       = np.average([v['m'] for v in res.values()])
    std_m       = np.std([v['m'] for v in res.values()])
    print(f"[Infomap   ] <AMI> = {100 * avg_ami:4.1f} ± {100 * std_ami:4.1f}, <|M|> = {avg_m:6.1f} ± {std_m:6.1f}, AMI* = {100 * best_result['ami']:4.1f}, |M|* = {best_result['m']:6}")
    
    res_str = f"Infomap,infomap,{100 * avg_ami:.1f},{100 * std_ami:.1f},{avg_m:.1f},{std_m:.1f}\n"
    
    with open(f"./results/real-world{'-fixed-size-arch' if fixed_size_arch else ''}/{data.name}.csv", "a") as fh:
        fh.write(res_str)

# Cora

In [8]:
dataset = Planetoid(root = "data/Planetoid", name = "Cora")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

cora = Data()
cora.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
cora.x           = data.x
cora.y           = data.y
cora.num_classes = dataset.num_classes
cora.name        = "Cora"

print(f"Mixing µ = {get_mixing(cora, directed = False):.2f}")

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...



Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of edges: 10556
Average node degree: 3.9
Has isolated nodes: False
Has self-loops: False
Is undirected: True
Mixing µ = 0.19


Done!


In [9]:
for fixed_size_arch in [False, True]:
    run(data = cora, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(cora, directed = False, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> = 31.9 ±  7.9, <|M|> =   14.4 ±    3.7, AMI* = 37.5, |M|* =     20
[NOCD      ] <AMI> =  5.1 ±  1.9, <|M|> =   20.5 ±   10.2, AMI* =  4.8, |M|* =     48
[DiffPool  ] <AMI> =  0.4 ±  1.1, <|M|> =    2.6 ±    1.1, AMI* =  5.1, |M|* =      6
[MinCut    ] <AMI> = 26.1 ±  2.5, <|M|> =   51.3 ±    0.9, AMI* = 28.5, |M|* =     52
[Ortho     ] <AMI> =  7.0 ±  0.7, <|M|> =   48.7 ±    1.3, AMI* =  6.5, |M|* =     51
[DMoN      ] <AMI> =  1.2 ±  1.6, <|M|> =   13.6 ±   13.3, AMI* =  0.3, |M|* =      7
mlp
=====
[Neuromap  ] <AMI> = 36.6 ±  2.4, <|M|> =   44.4 ±    2.5, AMI* = 37.5, |M|* =     44
[NOCD      ] <AMI> = 46.1 ±  1.1, <|M|> =   29.3 ±    2.7, AMI* = 46.1, |M|* =     29
[DiffPool  ] <AMI> =  9.0 ±  1.2, <|M|> =   51.8 ±    0.4, AMI* =  9.1, |M|* =     52
[MinCut    ] <AMI> = 34.7 ±  6.6, <|M|> =   51.5 ±    1.6, AMI* = 36.4, |M|* =     52
[Ortho     ] <AMI> =  5.3 ±  0.5, <|M|> =   52.0 ±    0.0, AMI* =  5.7, |M|* =     52
[DMoN      ] <AMI> = 36.6 ±  1.2, 

# CiteSeer

In [10]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root = "data/Planetoid", name = "CiteSeer")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

citeseer = Data()
citeseer.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
citeseer.x           = data.x
citeseer.y           = data.y
citeseer.num_classes = dataset.num_classes
citeseer.name        = "CiteSeer"

print(f"Mixing µ = {get_mixing(citeseer, directed = False):.2f}")

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...



Dataset: CiteSeer():
Number of graphs: 1
Number of features: 3703
Number of classes: 6

Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])
Number of edges: 9104
Average node degree: 2.7
Has isolated nodes: True
Has self-loops: False
Is undirected: True
Mixing µ = 0.26


Done!


In [11]:
for fixed_size_arch in [False, True]:
    run(data = citeseer, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(citeseer, directed = False, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> = 19.5 ±  1.6, <|M|> =   38.0 ±    3.9, AMI* = 21.9, |M|* =     43
[NOCD      ] <AMI> =  2.4 ±  1.3, <|M|> =   17.5 ±   21.5, AMI* =  4.5, |M|* =     57
[DiffPool  ] <AMI> =  1.1 ±  1.9, <|M|> =    4.6 ±    3.3, AMI* =  2.3, |M|* =     10
[MinCut    ] <AMI> = 16.5 ±  1.3, <|M|> =   54.8 ±    1.9, AMI* = 15.2, |M|* =     58
[Ortho     ] <AMI> =  5.6 ±  0.7, <|M|> =   53.4 ±    1.7, AMI* =  5.1, |M|* =     57
[DMoN      ] <AMI> =  3.2 ±  1.6, <|M|> =   25.1 ±   10.8, AMI* =  3.7, |M|* =     46
mlp
=====
[Neuromap  ] <AMI> = 15.8 ±  1.1, <|M|> =   57.3 ±    0.9, AMI* = 16.8, |M|* =     58
[NOCD      ] <AMI> = 34.8 ±  0.9, <|M|> =   23.6 ±    4.1, AMI* = 35.4, |M|* =     22
[DiffPool  ] <AMI> =  6.7 ±  1.0, <|M|> =   58.0 ±    0.2, AMI* =  6.3, |M|* =     58
[MinCut    ] <AMI> = 20.5 ±  0.9, <|M|> =   58.0 ±    0.0, AMI* = 19.9, |M|* =     58
[Ortho     ] <AMI> =  3.8 ±  0.4, <|M|> =   58.0 ±    0.0, AMI* =  4.0, |M|* =     58
[DMoN      ] <AMI> = 22.5 ±  0.9, 

# PubMed

In [12]:
dataset = Planetoid(root = "data/Planetoid", name = "PubMed")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

pubmed = Data()
pubmed.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
pubmed.x           = data.x
pubmed.y           = data.y
pubmed.num_classes = dataset.num_classes
pubmed.name        = "PubMed"

print(f"Mixing µ = {get_mixing(pubmed, directed = False):.2f}")

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.test.index
Processing...



Dataset: PubMed():
Number of graphs: 1
Number of features: 500
Number of classes: 3

Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717])
Number of edges: 88648
Average node degree: 4.5
Has isolated nodes: False
Has self-loops: False
Is undirected: True
Mixing µ = 0.20


Done!


In [13]:
for fixed_size_arch in [False, True]:
    run(data = pubmed, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(pubmed, directed = False, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> = 17.2 ± 12.2, <|M|> =   12.4 ±    8.1, AMI* = 27.7, |M|* =     22
[NOCD      ] <AMI> =  3.7 ±  1.7, <|M|> =   12.1 ±    3.1, AMI* =  6.1, |M|* =      8
[DiffPool  ] <AMI> =  0.0 ±  0.0, <|M|> =    1.0 ±    0.0, AMI* =  0.0, |M|* =      1
[MinCut    ] <AMI> =  2.2 ±  1.0, <|M|> =   40.0 ±    6.1, AMI* =  2.9, |M|* =     50
[Ortho     ] <AMI> = 16.5 ±  0.4, <|M|> =   68.1 ±    5.7, AMI* = 16.4, |M|* =     81
[DMoN      ] <AMI> =  0.7 ±  1.0, <|M|> =    2.5 ±    1.7, AMI* =  3.4, |M|* =      6
mlp
=====
[Neuromap  ] <AMI> = 23.2 ±  1.8, <|M|> =   30.8 ±    3.8, AMI* = 22.9, |M|* =     34
[NOCD      ] <AMI> = 20.5 ±  2.4, <|M|> =   29.8 ±    2.7, AMI* = 22.3, |M|* =     27
[DiffPool  ] <AMI> = 18.9 ±  1.4, <|M|> =   43.6 ±    4.7, AMI* = 17.1, |M|* =     44
[MinCut    ] <AMI> =  5.1 ±  2.4, <|M|> =   25.7 ±    7.4, AMI* =  8.0, |M|* =     35
[Ortho     ] <AMI> = 10.6 ±  0.4, <|M|> =  126.9 ±    4.7, AMI* = 10.9, |M|* =    135
[DMoN      ] <AMI> =  6.7 ±  4.1, 

# Amazon Computer (PC)

In [14]:
dataset = Amazon(root = "data/Amazon", name = "Computers")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

pc = Data()
pc.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
pc.x           = data.x
pc.y           = data.y
pc.num_classes = dataset.num_classes
pc.name        = "PC"

print(f"Mixing µ = {get_mixing(pc, directed = False):.2f}")

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz
Processing...
Done!



Dataset: AmazonComputers():
Number of graphs: 1
Number of features: 767
Number of classes: 10

Data(x=[13752, 767], edge_index=[2, 491722], y=[13752])
Number of edges: 491722
Average node degree: 35.8
Has isolated nodes: True
Has self-loops: False
Is undirected: True
Mixing µ = 0.22


In [15]:
for fixed_size_arch in [False, True]:
    run(data = pc, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(pc, directed = False, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> =  0.0 ±  0.0, <|M|> =    1.0 ±    0.0, AMI* =  0.0, |M|* =      1
[NOCD      ] <AMI> =  1.2 ±  2.7, <|M|> =    1.7 ±    1.0, AMI* =  8.9, |M|* =      3
[DiffPool  ] <AMI> = -0.0 ±  0.0, <|M|> =    1.2 ±    0.5, AMI* =  0.0, |M|* =      1
[MinCut    ] <AMI> = -0.0 ±  0.0, <|M|> =    1.0 ±    0.2, AMI* =  0.0, |M|* =      1
[Ortho     ] <AMI> = 18.8 ±  9.0, <|M|> =    9.5 ±    4.1, AMI* = 25.7, |M|* =     16
[DMoN      ] <AMI> =  3.4 ±  2.9, <|M|> =    2.4 ±    1.0, AMI* =  9.9, |M|* =      4
mlp
=====
[Neuromap  ] <AMI> = 22.4 ±  8.6, <|M|> =    3.0 ±    0.8, AMI* = 40.3, |M|* =      4
[NOCD      ] <AMI> = 45.5 ±  2.5, <|M|> =   13.9 ±    3.9, AMI* = 48.8, |M|* =     12
[DiffPool  ] <AMI> =  1.6 ±  0.4, <|M|> =    2.2 ±    0.5, AMI* =  1.6, |M|* =      3
[MinCut    ] <AMI> =  0.0 ±  0.0, <|M|> =    1.0 ±    0.0, AMI* =  0.0, |M|* =      1
[Ortho     ] <AMI> = 19.8 ±  5.5, <|M|> =    8.8 ±    2.3, AMI* = 26.4, |M|* =     14
[DMoN      ] <AMI> = 15.4 ± 11.6, 

# Amazon Photo

In [16]:
dataset = Amazon(root = "data/Amazon", name = "Photo")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

photo = Data()
photo.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
photo.x           = data.x
photo.y           = data.y
photo.num_classes = dataset.num_classes
photo.name        = "Photo"

print(f"Mixing µ = {get_mixing(photo, directed = False):.2f}")

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_photo.npz
Processing...
Done!



Dataset: AmazonPhoto():
Number of graphs: 1
Number of features: 745
Number of classes: 8

Data(x=[7650, 745], edge_index=[2, 238162], y=[7650])
Number of edges: 238162
Average node degree: 31.1
Has isolated nodes: True
Has self-loops: False
Is undirected: True
Mixing µ = 0.17


In [17]:
for fixed_size_arch in [False, True]:
    run(data = photo, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(photo, directed = False, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> =  0.0 ±  0.0, <|M|> =    1.0 ±    0.0, AMI* =  0.0, |M|* =      1
[NOCD      ] <AMI> =  0.1 ±  0.3, <|M|> =    1.0 ±    0.2, AMI* =  1.5, |M|* =      2
[DiffPool  ] <AMI> =  0.0 ±  0.0, <|M|> =    1.0 ±    0.0, AMI* =  0.0, |M|* =      1
[MinCut    ] <AMI> =  0.0 ±  0.0, <|M|> =    1.1 ±    0.3, AMI* =  0.0, |M|* =      1
[Ortho     ] <AMI> = 24.7 ± 11.4, <|M|> =    9.4 ±    5.1, AMI* = 32.9, |M|* =     19
[DMoN      ] <AMI> =  3.7 ±  4.0, <|M|> =    2.6 ±    1.4, AMI* =  1.7, |M|* =      5
mlp
=====
[Neuromap  ] <AMI> = 39.8 ± 11.2, <|M|> =    4.0 ±    1.2, AMI* = 59.8, |M|* =      7
[NOCD      ] <AMI> = 60.5 ±  3.4, <|M|> =   11.0 ±    2.5, AMI* = 67.6, |M|* =     10
[DiffPool  ] <AMI> =  3.1 ±  0.9, <|M|> =    2.2 ±    0.4, AMI* =  2.8, |M|* =      3
[MinCut    ] <AMI> =  5.2 ±  9.9, <|M|> =    1.2 ±    0.4, AMI* = 30.6, |M|* =      2
[Ortho     ] <AMI> = 30.6 ±  4.0, <|M|> =   11.2 ±    2.0, AMI* = 38.4, |M|* =     17
[DMoN      ] <AMI> = 43.5 ±  8.9, 

# Coauthor CS

In [18]:
dataset = Coauthor(root = "data/Coauthor", name = "CS")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

cs = Data()
cs.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
cs.x           = data.x
cs.y           = data.y
cs.num_classes = dataset.num_classes
cs.name        = "CS"

print(f"Mixing µ = {get_mixing(cs, directed = False):.2f}")

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/ms_academic_cs.npz
Processing...
Done!



Dataset: CoauthorCS():
Number of graphs: 1
Number of features: 6805
Number of classes: 15

Data(x=[18333, 6805], edge_index=[2, 163788], y=[18333])
Number of edges: 163788
Average node degree: 8.9
Has isolated nodes: False
Has self-loops: False
Is undirected: True
Mixing µ = 0.19


In [19]:
for fixed_size_arch in [False, True]:
    run(data = cs, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(cs, directed = False, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> = 76.7 ±  1.4, <|M|> =   50.1 ±    5.2, AMI* = 75.9, |M|* =     54
[NOCD      ] <AMI> = 40.2 ±  9.8, <|M|> =   34.7 ±    8.5, AMI* = 51.0, |M|* =     39
[DiffPool  ] <AMI> = 22.2 ± 14.3, <|M|> =    6.7 ±    6.1, AMI* = 43.4, |M|* =     16
[MinCut    ] <AMI> = 57.5 ±  0.6, <|M|> =   91.0 ±    3.9, AMI* = 57.4, |M|* =    101
[Ortho     ] <AMI> = 45.4 ±  2.0, <|M|> =   97.4 ±    6.6, AMI* = 41.6, |M|* =    111
[DMoN      ] <AMI> = 25.2 ± 10.8, <|M|> =   10.0 ±    6.3, AMI* = 25.8, |M|* =     31
mlp
=====
[Neuromap  ] <AMI> = 78.1 ±  2.0, <|M|> =   22.2 ±    2.4, AMI* = 81.1, |M|* =     24
[NOCD      ] <AMI> = 73.9 ±  1.7, <|M|> =   20.4 ±    3.7, AMI* = 72.2, |M|* =     24
[DiffPool  ] <AMI> = 64.5 ±  4.8, <|M|> =   16.5 ±    2.6, AMI* = 66.5, |M|* =     21
[MinCut    ] <AMI> = 53.9 ±  6.4, <|M|> =   21.2 ±   41.8, AMI* = 44.3, |M|* =    135
[Ortho     ] <AMI> = 56.3 ±  1.7, <|M|> =   48.1 ±    9.1, AMI* = 52.7, |M|* =     68
[DMoN      ] <AMI> = 40.2 ±  6.5, 

# Coauthor Physics

In [20]:
dataset = Coauthor(root = "data/Coauthor", name="Physics")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

physics = Data()
physics.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
physics.x           = data.x
physics.y           = data.y
physics.num_classes = dataset.num_classes
physics.name        = "Physics"

print(f"Mixing µ = {get_mixing(physics, directed = False):.2f}")

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/ms_academic_phy.npz
Processing...
Done!



Dataset: CoauthorPhysics():
Number of graphs: 1
Number of features: 8415
Number of classes: 5

Data(x=[34493, 8415], edge_index=[2, 495924], y=[34493])
Number of edges: 495924
Average node degree: 14.4
Has isolated nodes: False
Has self-loops: False
Is undirected: True
Mixing µ = 0.07


In [21]:
for fixed_size_arch in [False, True]:
    run(data = physics, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(physics, directed = False, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> = 56.5 ±  1.2, <|M|> =   34.5 ±    4.5, AMI* = 56.4, |M|* =     36
[NOCD      ] <AMI> = 19.3 ±  7.0, <|M|> =   21.6 ±    2.4, AMI* = 13.3, |M|* =     25
[DiffPool  ] <AMI> = 18.9 ± 15.9, <|M|> =    8.1 ±    5.5, AMI* = 35.7, |M|* =     16
[MinCut    ] <AMI> = 36.8 ±  0.9, <|M|> =   89.6 ±    6.2, AMI* = 35.4, |M|* =    101
[Ortho     ] <AMI> = 26.7 ±  1.5, <|M|> =  149.5 ±    5.4, AMI* = 24.6, |M|* =    161
[DMoN      ] <AMI> =  5.7 ±  7.3, <|M|> =    7.7 ±    6.7, AMI* =  9.5, |M|* =     13
mlp
=====
[Neuromap  ] <AMI> = 55.9 ±  2.2, <|M|> =   35.8 ±    3.3, AMI* = 55.5, |M|* =     40
[NOCD      ] <AMI> = 50.2 ±  1.0, <|M|> =   22.8 ±    2.7, AMI* = 48.8, |M|* =     27
[DiffPool  ] <AMI> = 49.1 ±  4.4, <|M|> =   39.8 ±    6.1, AMI* = 49.3, |M|* =     43
[MinCut    ] <AMI> = 29.4 ±  9.9, <|M|> =  118.6 ±   24.6, AMI* = 76.1, |M|* =      6
[Ortho     ] <AMI> = 32.0 ±  1.1, <|M|> =  101.7 ±   11.2, AMI* = 31.8, |M|* =    129
[DMoN      ] <AMI> = 34.7 ±  3.5, 

# Cora ML

In [22]:
dataset = Cora_ml(root = "data/Cora-ML")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

dataset.name = "Cora-ML-Directed"

cora_ml = Data()
cora_ml.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
cora_ml.x           = data.x
cora_ml.y           = data.y
cora_ml.num_classes = dataset.num_classes
cora_ml.name        = "Cora-ML"

print(f"Mixing µ = {get_mixing(cora_ml, directed = False):.2f}")

Downloading https://github.com/SherylHYX/pytorch_geometric_signed_directed/raw/main/datasets/cora_ml.npz
Processing...



Dataset: Cora_ml():
Number of graphs: 1
Number of features: 2879
Number of classes: 7

Data(x=[2995, 2879], edge_index=[2, 8416], y=[2995], edge_weight=[8416], train_mask=[2995, 10], val_mask=[2995, 10], test_mask=[2995, 10], seed_mask=[2995, 10])
Number of edges: 8416
Average node degree: 2.8
Has isolated nodes: False
Has self-loops: False
Is undirected: False
Mixing µ = 0.21


Done!


In [23]:
for fixed_size_arch in [False, True]:
    run(data = cora_ml, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(cora_ml, directed = True, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> = 24.4 ± 11.9, <|M|> =   11.4 ±    2.5, AMI* = 39.2, |M|* =     12
[NOCD      ] <AMI> =  3.3 ±  1.1, <|M|> =   24.8 ±    3.9, AMI* =  0.9, |M|* =     21
[DiffPool  ] <AMI> =  0.0 ±  0.1, <|M|> =    1.0 ±    0.2, AMI* =  0.0, |M|* =      1
[MinCut    ] <AMI> = 30.4 ±  1.2, <|M|> =   55.0 ±    0.0, AMI* = 30.8, |M|* =     55
[Ortho     ] <AMI> = 15.6 ±  1.1, <|M|> =   34.5 ±    3.3, AMI* = 14.7, |M|* =     41
[DMoN      ] <AMI> =  0.4 ±  0.8, <|M|> =    4.5 ±    6.2, AMI* =  0.1, |M|* =      4
mlp
=====
[Neuromap  ] <AMI> = 34.5 ±  1.6, <|M|> =   48.3 ±    1.9, AMI* = 34.0, |M|* =     43
[NOCD      ] <AMI> = 46.5 ±  1.4, <|M|> =   27.6 ±    1.7, AMI* = 47.6, |M|* =     28
[DiffPool  ] <AMI> =  9.5 ±  1.2, <|M|> =   55.0 ±    0.0, AMI* =  8.3, |M|* =     55
[MinCut    ] <AMI> = 39.6 ±  1.3, <|M|> =   38.1 ±    5.8, AMI* = 39.8, |M|* =     50
[Ortho     ] <AMI> =  5.3 ±  0.5, <|M|> =   55.0 ±    0.0, AMI* =  5.7, |M|* =     55
[DMoN      ] <AMI> = 36.2 ±  1.2, 

# Wiki CS

In [24]:
dataset = WikiCS(root = "data/WikiCS")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

dataset.name = "WikiCS-Directed"

wiki_cs = Data()
wiki_cs.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
wiki_cs.x           = data.x
wiki_cs.y           = data.y
wiki_cs.num_classes = dataset.num_classes
wiki_cs.name        = "WikiCS"

print(f"Mixing µ = {get_mixing(wiki_cs, directed = False):.2f}")

Downloading https://github.com/pmernyei/wiki-cs-dataset/raw/master/dataset/data.json
Processing...
Done!



Dataset: WikiCS():
Number of graphs: 1
Number of features: 300
Number of classes: 10

Data(x=[11701, 300], edge_index=[2, 297110], y=[11701], train_mask=[11701, 20], val_mask=[11701, 20], test_mask=[11701], stopping_mask=[11701, 20])
Number of edges: 297110
Average node degree: 25.4
Has isolated nodes: True
Has self-loops: True
Is undirected: False
Mixing µ = 0.34


In [25]:
for fixed_size_arch in [False, True]:
    run(data = wiki_cs, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(wiki_cs, directed = True, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> = 41.9 ±  2.3, <|M|> =   17.0 ±    1.7, AMI* = 40.5, |M|* =     19
[NOCD      ] <AMI> = 10.8 ±  3.1, <|M|> =   11.2 ±    0.9, AMI* = 13.2, |M|* =     12
[DiffPool  ] <AMI> =  2.4 ±  2.1, <|M|> =    4.4 ±    1.0, AMI* =  3.7, |M|* =      3
[MinCut    ] <AMI> = 16.0 ±  1.4, <|M|> =   48.5 ±    4.9, AMI* = 14.9, |M|* =     42
[Ortho     ] <AMI> = 29.6 ±  0.9, <|M|> =   33.2 ±    2.6, AMI* = 30.3, |M|* =     37
[DMoN      ] <AMI> =  4.8 ±  3.9, <|M|> =   10.9 ±    5.6, AMI* =  5.3, |M|* =      6
mlp
=====
[Neuromap  ] <AMI> = 43.9 ±  2.4, <|M|> =   12.3 ±    1.3, AMI* = 41.6, |M|* =     16
[NOCD      ] <AMI> = 40.9 ±  1.2, <|M|> =   28.3 ±    2.0, AMI* = 41.5, |M|* =     28
[DiffPool  ] <AMI> = 25.8 ±  2.6, <|M|> =   10.0 ±    1.4, AMI* = 24.4, |M|* =      8
[MinCut    ] <AMI> = 28.1 ±  4.2, <|M|> =  101.4 ±   19.2, AMI* = 28.6, |M|* =    108
[Ortho     ] <AMI> = 28.8 ±  0.9, <|M|> =   41.0 ±    6.2, AMI* = 29.5, |M|* =     52
[DMoN      ] <AMI> = 31.5 ±  8.0, 

# ogb-arxiv

In [26]:
dataset = PygNodePropPredDataset(root = "data/ogbn-arxiv", name="ogbn-arxiv")
data = dataset[0]  # Get the first graph object.

get_dataset_stats(dataset, data)

arxiv = Data()
arxiv.edge_index  = torch.sparse_coo_tensor(indices = data.edge_index, values = torch.FloatTensor(data.num_edges*[1]), size = (data.num_nodes, data.num_nodes)).coalesce()
arxiv.x           = data.x
arxiv.y           = data.y.reshape(-1)
arxiv.num_classes = dataset.num_classes
arxiv.name        = "arxiv"

print(f"Mixing µ = {get_mixing(arxiv, directed = False):.2f}")


Dataset: PygNodePropPredDataset():
Number of graphs: 1
Number of features: 128
Number of classes: 40

Data(num_nodes=169343, edge_index=[2, 1166243], x=[169343, 128], node_year=[169343, 1], y=[169343, 1])
Number of edges: 1166243
Average node degree: 6.9
Has isolated nodes: False
Has self-loops: False
Is undirected: False
Mixing µ = 0.35


In [27]:
for fixed_size_arch in [False, True]:
    run(data = arxiv, methods = [Neuromap, NOCD, DiffPool, MinCut, Ortho, DMoN], num_trials = 25, verbose = False, fixed_size_arch = fixed_size_arch)
    print("=====")
    run_infomap(arxiv, directed = True, num_trials = 25, fixed_size_arch = fixed_size_arch)

lin
=====
[Neuromap  ] <AMI> =  4.7 ±  7.7, <|M|> =    2.4 ±    1.6, AMI* = 20.6, |M|* =      6
[NOCD      ] <AMI> =  4.2 ±  2.5, <|M|> =   12.9 ±    2.8, AMI* =  6.5, |M|* =     19
CUDA out of memory. Tried to allocate 106.83 GiB. GPU 0 has a total capacity of 24.00 GiB of which 21.47 GiB is free. Of the allocated memory 1.01 GiB is allocated by PyTorch, and 234.45 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
CUDA out of memory. Tried to allocate 106.83 GiB. GPU 0 has a total capacity of 24.00 GiB of which 21.47 GiB is free. Of the allocated memory 1.01 GiB is allocated by PyTorch, and 234.45 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid