In [1]:
import logging
import tensorflow as tf

from ppnp import PPNP
from ppnp.training import train_model
from ppnp.earlystopping import stopping_args
from ppnp.data.io import load_dataset
from ppnp.propagation import PPRExact, PPRPowerIteration

In [2]:
tf.logging.set_verbosity(tf.logging.INFO)
logging.basicConfig(
        format='%(asctime)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        level=logging.INFO)

# Load dataset

First we need to load the dataset we want to train on. The datasets used are in the `SparseGraph` format. This is just a class providing the adjacency, attribute and label matrices in a dense (`np.ndarray`) or sparse (`scipy.sparse.csr_matrix`) matrix format and some (in principle unnecessary) convenience functions.

The four datasets from the paper (Cora-ML, Citeseer, PubMed and MS Academic) can be found in the directory `data`.

For this example we choose the Cora-ML graph.

In [3]:
graph_name = 'cora_ml'
graph = load_dataset(graph_name)
graph.standardize(select_lcc=True)

<Undirected, unweighted and connected SparseGraph with 15962 edges (no self-loops). Data: adj_matrix (2810x2810), attr_matrix (2810x2879), labels (2810), node_names (2810), attr_names (2879), class_names (7)>

# Set up propagation

Next we need to set up the proper propagation scheme. In the paper we've introduced the exact PPR propagation used in PPNP and the PPR power iteration propagation used in APPNP.

Here we use the hyperparameters from the paper. Note that we should use a different `alpha = 0.2` for MS Academic.

In [4]:
prop_ppnp = PPRExact(graph.adj_matrix, alpha=0.1)
prop_appnp = PPRPowerIteration(graph.adj_matrix, alpha=0.1, niter=10)

# Choose model hyperparameters

Now we choose the hyperparameters. These are the ones used in the paper for all datasets.

Note that we choose the propagation for APPNP.

In [5]:
model_args = {
    'hiddenunits': [64],
    'reg_lambda': 5e-3,
    'learning_rate': 0.01,
    'keep_prob': 0.5,
    'propagation': prop_appnp}

# Train model

Now we can train the model.

In [6]:
idx_split_args = {'ntrain_per_class': 20, 'nstopping': 500, 'nknown': 1500, 'seed': 2413340114}
test = False
save_result = False
print_interval = 20

In [7]:
result = train_model(
        graph_name, PPNP, graph, model_args, idx_split_args,
        stopping_args, test, save_result, None, print_interval)

2019-02-25 16:13:58: PPNP: {'hiddenunits': [64], 'reg_lambda': 0.005, 'learning_rate': 0.01, 'keep_prob': 0.5, 'propagation': <ppnp.propagation.PPRPowerIteration object at 0x7f8280758b00>}
2019-02-25 16:13:58: Tensorflow seed: 4090832601
2019-02-25 16:14:01: Step 0: Train loss = 2.26, train acc = 61.4, early stopping loss = 2.11, early stopping acc = 41.2 (1.153 sec)
2019-02-25 16:14:02: Step 20: Train loss = 1.50, train acc = 97.1, early stopping loss = 1.66, early stopping acc = 81.4 (0.706 sec)
2019-02-25 16:14:03: Step 40: Train loss = 1.25, train acc = 100.0, early stopping loss = 1.41, early stopping acc = 80.6 (0.615 sec)
2019-02-25 16:14:03: Step 60: Train loss = 0.98, train acc = 100.0, early stopping loss = 1.24, early stopping acc = 82.0 (0.617 sec)
2019-02-25 16:14:04: Step 80: Train loss = 0.91, train acc = 100.0, early stopping loss = 1.16, early stopping acc = 81.6 (0.617 sec)
2019-02-25 16:14:04: Step 100: Train loss = 0.88, train acc = 100.0, early stopping loss = 1.10