In [1]:
import os
import pickle
import networkx as nx
from tqdm import tqdm
import torch
from torch.nn import (
    BatchNorm1d,
    Embedding,
    Linear,
    ModuleList,
    ReLU,
    Sequential,
)
from torch.optim.lr_scheduler import ReduceLROnPlateau

import numpy as np
import math

from torch_geometric.datasets import TUDataset
from torch_geometric.data import Data
from torch_geometric.utils import to_networkx, from_networkx, to_dense_adj
import torch_geometric.transforms as T
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool, global_add_pool #, global_max_pool

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def generate_max_degree_graph(num_nodes: int, topology: str="complete",
                              random_features: str="gaussian", feature_dim: int=1) -> Data:
    assert num_nodes > 0
    assert topology in ["complete", "path", "cycle", "regular", "tree", "ER"], "Error: unknown topology" # need to implement more
    assert random_features in ["gaussian"], "Error: unknown feature distribution" # need to implement more
    assert feature_dim > 0
    
    # create a networkx graph with the desired topology
    if topology == "complete":
        raw_graph = create_complete_graph(num_nodes)
        
    if topology == "path":
        raw_graph = create_path_graph(num_nodes)
        
    if topology == "cycle":
        raw_graph = create_cycle_graph(num_nodes)
        
    if topology == "regular":
        raw_graph = create_4_regular_grid_graph(num_nodes, num_nodes)
        
    if topology == "tree":
        raw_graph = create_binary_tree(num_nodes)
        
    if topology == "ER":
        raw_graph = create_er_graph(num_nodes)
        
    # add random features from the desired distribution
    if random_features == "gaussian":
        attributed_graph = add_gaussian_node_features(raw_graph, feature_dim)
    
    # convert the networkx graph to pytorch geometric's Data format
    pyg_graph = from_networkx(attributed_graph)
    
    # add the max degree as the graph label
    pyg_graph.y = torch.tensor([max(dict(attributed_graph.degree()).values())])
    
    return pyg_graph

In [7]:
# max degree task on complete graphs

random_integers = np.random.randint(10, 101, size=250)
graphs = [generate_max_degree_graph(num_nodes=nodes, topology='ER', feature_dim=10) for nodes in random_integers]

  data[key] = torch.tensor(value)


In [228]:
encoding = "LDP"

for i in tqdm(range(len(graphs))):
    if encoding == "LAPE":
        num_nodes = dataset[i].num_nodes
        eigvecs = np.min([num_nodes, 8]) - 2
        transform = T.AddLaplacianEigenvectorPE(k=eigvecs)

    elif encoding == "RWPE":
        transform = T.AddRandomWalkPE(walk_length=16)

    elif encoding == "LDP":
        transform = T.LocalDegreeProfile()

    elif encoding == "SUB":
        transform = T.RootedRWSubgraph(walk_length=10)

    elif encoding == "EGO":
        transform = T.RootedEgoNets(num_hops=2)

    elif encoding == "VN":
        transform = T.VirtualNode()
        
    graphs[i] = transform(graphs[i])
    
print("Feature Dimension: ", graphs[0].x.shape[1])

100%|██████████| 250/250 [00:00<00:00, 1287.60it/s]

Feature Dimension:  15





In [229]:
test_dataset = graphs

test_loader = DataLoader(test_dataset, batch_size=64)

test_mae = test(test_loader, model, device, optimizer)
print(test_mae)

555.1316328125


In [112]:
file_path = "synthetic_data/shortest_path_task/tree_graphs.pkl"

with open(file_path, 'wb') as f:
    pickle.dump(graphs, f)

In [6]:
# topologies

def create_complete_graph(num_nodes: int) -> nx.graph:
    complete_graph = nx.complete_graph(num_nodes).to_undirected()
    return complete_graph

def create_path_graph(num_nodes: int) -> nx.Graph:
    path_graph = nx.path_graph(num_nodes)
    return path_graph

def create_cycle_graph(num_nodes: int) -> nx.Graph:
    cycle_graph = nx.cycle_graph(num_nodes)
    return cycle_graph

def create_4_regular_grid_graph(rows: int, cols: int) -> nx.Graph:    
    grid_graph = nx.grid_2d_graph(rows, cols, periodic=True)  # Wraps around for 4-regular structure
    grid_graph =  nx.convert_node_labels_to_integers(grid_graph)
    for node in grid_graph.nodes:
        grid_graph.nodes[node].clear()
    return grid_graph

def create_binary_tree(num_nodes: int) -> nx.Graph:
    max_depth = math.ceil(math.log2(num_nodes + 1)) - 1
    tree = nx.balanced_tree(r=2, h=max_depth)    
    return tree

def create_er_graph(num_nodes, probability=0.5):
    G = nx.erdos_renyi_graph(n=num_nodes, p=probability)
    return G

In [4]:
# node feature distributions

def add_gaussian_node_features(G: nx.graph, k: int) -> nx.graph:
    mean = np.zeros(k)
    cov = np.eye(k)

    for node in G.nodes():
        G.nodes[node]['x'] = np.random.multivariate_normal(mean, cov)

    return G

## Train an example model

In [25]:
from typing import Optional
from torch import Tensor
from torch_geometric.utils import scatter

def max_pool(x: Tensor, batch: Optional[Tensor],
                    size: Optional[int] = None) -> Tensor:
    dim = -1 if isinstance(x, Tensor) and x.dim() == 1 else -2

    if batch is None:
        return x.max(dim=dim, keepdim=x.dim() <= 2)[0]
    return scatter(x, batch, dim=dim, dim_size=size, reduce='max')

In [185]:
class GCN(torch.nn.Module):
    def __init__(self, channels, num_layers):
        super().__init__()
        
        self.node_emb = Linear(15, channels)
        self.pe_norm = BatchNorm1d(20)
        self.edge_emb = Linear(3, channels)
        
        self.convs = ModuleList()
        for _ in range(num_layers):
            conv = GCNConv(channels, channels, normalize=True)
            self.convs.append(conv)       
            
        self.mlp = Sequential(
            # Linear(channels, channels),
            # ReLU(),
            # Linear(channels, channels // 2),
            # ReLU(),
            # Linear(channels // 2, channels // 4),
            # ReLU(),
            Linear(channels, 1),
        )
        

    def forward(self, x, edge_index, edge_attr, batch):
        # dropout = Dropout(0.5)
        x = x.float()
        x = self.node_emb(x.squeeze(-1))

        for conv in self.convs:
            x = conv(x, edge_index)
            # x = dropout(x)
        x = global_add_pool(x, batch)
        return self.mlp(x)

In [186]:
def train(train_loader, model, device, optimizer):
    model.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        # model.redraw_projection.redraw_projections()
        # out = model(data.x, data.pe, data.edge_index, data.edge_attr, data.batch)
        out = model(data.x, data.edge_index, data.edge_attr, data.batch)
        loss = (out.squeeze() - data.y).abs().mean()
        loss.backward()
        total_loss += loss.item() * data.num_graphs
        optimizer.step()
    return total_loss / len(train_loader.dataset)


@torch.no_grad()
def test(loader, model, device, optimizer):
    model.eval()

    total_error = 0
    for data in loader:
        data = data.to(device)
        # out = model(data.x, data.pe, data.edge_index, data.edge_attr, data.batch)
        out = model(data.x, data.edge_index, data.edge_attr, data.batch)
        total_error += (out.squeeze() - data.y).abs().sum().item()
    return total_error / len(loader.dataset)

In [208]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(channels=32, num_layers=4).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10,
                              min_lr=0.000001)

In [209]:
train_dataset = graphs[:500]
val_dataset = graphs[500:750]
test_dataset = graphs[750:]

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)
test_loader = DataLoader(test_dataset, batch_size=64)

In [213]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001, weight_decay=1e-5)

In [214]:
for epoch in range(1, 101):
    loss = train(train_loader, model, device, optimizer)
    val_mae = test(val_loader, model, device, optimizer)
    test_mae = test(test_loader, model, device, optimizer)
    scheduler.step(val_mae)
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_mae:.4f}, '
          f'Test: {test_mae:.4f}')

Epoch: 01, Loss: 0.0067, Val: 0.0049, Test: 0.0049
Epoch: 02, Loss: 0.0033, Val: 0.0042, Test: 0.0043
Epoch: 03, Loss: 0.0036, Val: 0.0044, Test: 0.0045
Epoch: 04, Loss: 0.0026, Val: 0.0029, Test: 0.0029
Epoch: 05, Loss: 0.0029, Val: 0.0029, Test: 0.0029
Epoch: 06, Loss: 0.0041, Val: 0.0031, Test: 0.0031
Epoch: 07, Loss: 0.0037, Val: 0.0043, Test: 0.0044
Epoch: 08, Loss: 0.0033, Val: 0.0032, Test: 0.0031
Epoch: 09, Loss: 0.0029, Val: 0.0048, Test: 0.0048
Epoch: 10, Loss: 0.0034, Val: 0.0036, Test: 0.0036
Epoch: 11, Loss: 0.0030, Val: 0.0038, Test: 0.0038
Epoch: 12, Loss: 0.0028, Val: 0.0029, Test: 0.0029
Epoch: 13, Loss: 0.0029, Val: 0.0052, Test: 0.0054
Epoch: 14, Loss: 0.0039, Val: 0.0044, Test: 0.0043
Epoch: 15, Loss: 0.0031, Val: 0.0031, Test: 0.0031
Epoch: 16, Loss: 0.0029, Val: 0.0030, Test: 0.0030
Epoch: 17, Loss: 0.0028, Val: 0.0030, Test: 0.0031
Epoch: 18, Loss: 0.0030, Val: 0.0040, Test: 0.0040
Epoch: 19, Loss: 0.0029, Val: 0.0031, Test: 0.0031
Epoch: 20, Loss: 0.0029, Val: 0