In [None]:
#hide
%load_ext autoreload
%autoreload 2

In [None]:
# default_exp data

# Data

> Utilities for retrieving benchmarks and generating random graphs

In [None]:
#export 
from nbdev.showdoc import *
import numpy as np
import networkx as nx
import scipy
import pygsp
from pathlib import Path

## Benchmarks

These datasets are downloaded and preprocessed using https://github.com/shchur/gnn-benchmark. I aim to replace them once I work out how to make gnn-benchmark a dependency

### Citation networks

In [None]:
#export 
def cora():
    return _preprocessed_dataset('cora')

def pubmed():
    return _preprocessed_dataset('pubmed')

def citeseer():
    return _preprocessed_dataset('citeseer')

### Amazon networks

In [None]:
#export 
def amazon_photo():
    return _preprocessed_dataset('amazon_electronics_photo')

def amazon_computers():
    return _preprocessed_dataset('amazon_electronics_computers')

### Microsoft networks

In [None]:
#export 
def microsoft_physics():
    return _preprocessed_dataset('ms_academic_phy')
    
def microsoft_cs():
    return _preprocessed_dataset('ms_academic_cs')

In [None]:
#export
def _preprocessed_dataset(dataset):
    fname = Path(__file__).parents[1].joinpath(f'data/{dataset}_gnnbench.npz')
    data = np.load(fname, allow_pickle=True)
    A, X, y = data['A'].tolist(), data['X'].tolist(), data['y']
    return A, X, y

## Synethetic

In [None]:
#export
def make_planar_graph(n):
    """
    Makes a planar graph with n nodes
    
    Code adapted from https://stackoverflow.com/questions/26681899/how-to-make-networkx-graph-from-delaunay-preserving-attributes-of-the-input-node
    """
    points = np.random.rand(n, 2)
    delTri = scipy.spatial.Delaunay(points)
    edges = set()
    for n in range(delTri.nsimplex):
        edge = sorted([delTri.vertices[n,0], delTri.vertices[n,1]])
        edges.add((edge[0], edge[1]))
        edge = sorted([delTri.vertices[n,0], delTri.vertices[n,2]])
        edges.add((edge[0], edge[1]))
        edge = sorted([delTri.vertices[n,1], delTri.vertices[n,2]])
        edges.add((edge[0], edge[1]))
    graph = nx.Graph(list(edges))
    pos = pos = dict(zip(range(len(points)), points))
    return graph, pos

# Class versions 

Following an interface, maybe standardise in the future... 

In [None]:
#export 
class BAGraph():
    
    def __init__(self, n, m):
        self.n = n
        self.m = m
        
    def generate(self):
        return nx.barabasi_albert_graph(self.n, self.m, seed=seed)
    
    def number_of_edges(self):
        return self.generate().number_of_edges()
    
    def __str__(self):
        return f'BA-{self.n}-{self.m}'
    
class SensorGraph():
    " KNN sensor graph, this used the github pygsp.graphs.Sensor implementation, not the stable release (i.e. as described in the docs) "
    
    def __init__(self, n):
        self.n = n
    
    def generate(self):
        G = pygsp.graphs.Sensor(self.n)
        while not G.is_connected():
            G = pygsp.graphs.Sensor(self.n)
        return nx.Graph(G.W)
    
    def number_of_edges(self, samples=100):
        graphs = [self.generate() for _ in range(samples)]
        return np.mean([G.number_of_edges() for G in graphs])
    
    def __str__(self):
        return f'Sensor-{self.n}'
    
class CoraGraph():
    
    def __init__(self, save_location='/tmp/cora'):
        A, _, _ = cora()
        self.cora = nx.from_scipy_sparse_matrix(A)
    
    def generate(self):
        return self.cora
    
    def number_of_edges(self):
        return self.cora.number_of_edges()
    
    def __str__(self):
        return 'Cora'

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_graphtools.ipynb.
Converted 01_sampling.ipynb.
Converted 02_metrics.ipynb.
Converted 03_perturb.ipynb.
Converted 04_plotting.ipynb.
Converted 05_data.ipynb.
Converted index.ipynb.
