## 1 - Load Dataset

In [None]:
from rep_graph_sample.dataset import ogb_dataset_to_nx_graph

dataset_name = 'ogbn-arxiv'

graph = ogb_dataset_to_nx_graph(dataset_name)

print(f'# of Nodes: {graph.number_of_nodes():_}\n# of Edges: {graph.number_of_edges():_}')

## 2 - Sample Random Subgraph

In [None]:
import random
from tqdm import tqdm
import numpy as np
from rep_graph_sample.graph import SubgraphHandler

subgraph_size = 100

# Get an initial subgraph node set
#subgraph_nodes = list(np.random.choice(graph.nodes(), size=(subgraph_size), replace=False))
subgraph_nodes = list(np.load('0.151-60k.npy'))

subgraph = SubgraphHandler(
    full_graph = graph, 
    initial_node_set = subgraph_nodes
)

print(f'Initial KS Distance: {subgraph.ks_distance():.5}')

## 3 - Metropolis-Hastings

In [None]:
num_iters = 1_000

exponent = 150

In [None]:
from rep_graph_sample.logger import RunHistory

logger = RunHistory(subgraph_handler=subgraph,
                    save_interval=100,
                    p=exponent)

print(f'Saving every {logger.save_interval:_} iterations for a total of {num_iters // logger.save_interval} saves over {num_iters:_} iterations.')

In [None]:
prev_ks_dist = subgraph.ks_distance()

for i in (pbar := tqdm(range(num_iters))):
    # Randomly pick a node to remove and add to the subgraph
    remove_node = np.random.choice(subgraph.nodes)
    add_node = np.random.choice(subgraph.nodes_not_in_subgraph)

    subgraph.remove(remove_node)
    subgraph.add(add_node)

    # Calculate the new Degree KS Distance
    new_ks_dist = subgraph.ks_distance()

    ratio = prev_ks_dist / new_ks_dist
    if ratio >= 1.0:
        # Accept
        prev_ks_dist = new_ks_dist
        accepted = True
    else:
        score = ratio ** exponent
        prop = random.random()
        if score > prop:
            # Accept
            prev_ks_dist = new_ks_dist
            accepted = True
        else:
            # Reject and revert back
            subgraph.add(remove_node)
            subgraph.remove(add_node)
            accepted = False

    pbar.set_description(f'KS Dist: {prev_ks_dist:.5}')
    logger.log(ratio=ratio, accepted=accepted)

In [None]:
import matplotlib.pyplot as plt

logger.plot_distances()

print(f'Acceptance Ratio: {logger.acceptance_ratio:.2%}')

In [None]:
logger.plot_degree_distributions()

In [None]:
logger.plot_sample_similarity()

In [None]:
logger.plot_acceptance_ratio()

In [None]:
_ = plt.hist(logger.distances, bins=20, edgecolor='k')