In [1]:
import logging
import tensorflow as tf

from ppnp import PPNP
from ppnp.training import train_model
from ppnp.earlystopping import stopping_args
from ppnp.data.io import load_dataset
from ppnp.propagation import PPRExact, PPRPowerIteration

In [2]:
tf.logging.set_verbosity(tf.logging.INFO)
logging.basicConfig(
        format='%(asctime)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        level=logging.INFO)

# Load dataset

First we need to load the dataset we want to train on. The datasets used are in the `SparseGraph` format. This is just a class providing the adjacency, attribute and label matrices in a dense (`np.ndarray`) or sparse (`scipy.sparse.csr_matrix`) matrix format and some (in principle unnecessary) convenience functions.

The four datasets from the paper (Cora-ML, Citeseer, PubMed and MS Academic) can be found in the directory `data`.

For this example we choose the Cora-ML graph.

In [3]:
graph_name = 'cora_ml'
graph = load_dataset(graph_name)
graph.standardize(select_lcc=True)

<Undirected, unweighted and connected SparseGraph with 15962 edges (no self-loops). Data: adj_matrix (2810x2810), attr_matrix (2810x2879), labels (2810), node_names (2810), attr_names (2879), class_names (7)>

# Set up propagation

Next we need to set up the proper propagation scheme. In the paper we've introduced the exact PPR propagation used in PPNP and the PPR power iteration propagation used in APPNP.

Here we use the hyperparameters from the paper. We only need a different `alpha = 0.2` for MS Academic.

In [4]:
prop_ppnp = PPRExact(graph.adj_matrix, alpha=0.1)
prop_appnp = PPRPowerIteration(graph.adj_matrix, alpha=0.1, niter=10)

# Choose model hyperparameters

Now we choose the hyperparameters. These are the ones used in the paper for all datasets.

Notice that we choose the propagation for APPNP.

In [5]:
model_args = {
    'hiddenunits': [64],
    'reg_lambda': 5e-3,
    'learning_rate': 0.01,
    'keep_prob': 0.5,
    'propagation': prop_appnp}

# Train model

Now we can train the model.

In [6]:
idx_split_args = {'ntrain_per_class': 20, 'nstopping': 500, 'nknown': 1500, 'seed': 2413340114}
test = False
save_result = False
print_interval = 20

In [7]:
result = train_model(
        graph_name, PPNP, graph, model_args, idx_split_args,
        stopping_args, test, save_result, print_interval)

2019-02-25 12:40:21: PPNP: {'hiddenunits': [64], 'reg_lambda': 0.005, 'learning_rate': 0.01, 'keep_prob': 0.5, 'propagation': <ppnp.propagation.PPRPowerIteration object at 0x7f9736a80b70>}
2019-02-25 12:40:21: Tensorflow seed: 3285412691
2019-02-25 12:40:24: Step 0: Train loss = 2.26, train acc = 66.4, early stopping loss = 2.10, early stopping acc = 37.8 (1.180 sec)
2019-02-25 12:40:25: Step 20: Train loss = 1.51, train acc = 97.1, early stopping loss = 1.66, early stopping acc = 80.8 (0.739 sec)
2019-02-25 12:40:26: Step 40: Train loss = 1.26, train acc = 100.0, early stopping loss = 1.39, early stopping acc = 80.2 (0.624 sec)
2019-02-25 12:40:26: Step 60: Train loss = 1.06, train acc = 100.0, early stopping loss = 1.25, early stopping acc = 81.0 (0.629 sec)
2019-02-25 12:40:27: Step 80: Train loss = 0.94, train acc = 99.3, early stopping loss = 1.17, early stopping acc = 80.0 (0.638 sec)
2019-02-25 12:40:28: Step 100: Train loss = 0.80, train acc = 100.0, early stopping loss = 1.10,