In [11]:
import os
import pickle
import networkx as nx
from tqdm import tqdm
import torch
import numpy as np

from torch_geometric.datasets import TUDataset
from torch_geometric.data import Data
from torch_geometric.utils import to_networkx, from_networkx, to_dense_adj
import torch_geometric.transforms as T
from torch_geometric.loader import DataLoader

In [30]:
def generate_max_degree_graph(num_nodes: int, topology: str="complete",
                              random_features: str="gaussian", feature_dim: int=1) -> Data:
    assert num_nodes > 0
    assert topology in ["complete"], "Error: unknown topology" # need to implement more
    assert random_features in ["gaussian"], "Error: unknown feature distribution" # need to implement more
    assert feature_dim > 0
    
    # create a networkx graph with the desired topology
    if topology == "complete":
        raw_graph = create_complete_graph(num_nodes)
        
    # add random features from the desired distribution
    if random_features == "gaussian":
        attributed_graph = add_gaussian_node_features(raw_graph, feature_dim)
    
    # convert the networkx graph to pytorch geometric's Data format
    pyg_graph = from_networkx(attributed_graph)
    
    # add the max degree as the graph label
    pyg_graph.y = torch.tensor([max(dict(attributed_graph.degree()).values())])
    
    return pyg_graph

In [36]:
# max degree task on complete graphs

random_integers = np.random.randint(10, 101, size=1000)
complete_graphs = [generate_max_degree_graph(num_nodes=nodes, feature_dim=10) for nodes in random_integers]

In [38]:
file_path = "synthetic_data/max_degree_task/complete_graphs.pkl"

with open(file_path, 'wb') as f:
    pickle.dump(complete_graphs, f)

In [28]:
# topologies

def create_complete_graph(num_nodes: int) -> nx.graph:
    complete_graph = nx.complete_graph(num_nodes).to_undirected()
    return complete_graph

In [12]:
# node feature distributions

def add_gaussian_node_features(G: nx.graph, k: int) -> nx.graph:
    mean = np.zeros(k)
    cov = np.eye(k)

    for node in G.nodes():
        G.nodes[node]['features'] = np.random.multivariate_normal(mean, cov)

    return G