In [2]:
import logging
import argparse
import os
import numpy as np

from ppnp.pytorch import PPNP
from ppnp.pytorch.training import train_model
from ppnp.pytorch.earlystopping import stopping_args
from ppnp.pytorch.propagation import PPRExact, PPRPowerIteration
from ppnp.data.io import load_dataset

In [3]:
logging.basicConfig(
        format='%(asctime)s: %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        level=logging.INFO + 2)

# Load dataset

In [4]:
graph_name = 'cora_ml'
graph = load_dataset(graph_name)
graph.standardize(select_lcc=True)

<Undirected, unweighted and connected SparseGraph with 15962 edges (no self-loops). Data: adj_matrix (2810x2810), attr_matrix (2810x2879), labels (2810), node_names (2810), attr_names (2879), class_names (7)>

# Set up data splits

First of all we need to decide whether to use the test or validation set. Be mindful that we can only look at the test set exactly _once_ and then can't change any hyperparameters oder model details, no matter what. Everything else would cause overfitting.

In [5]:
test = True

These are the seeds for the dataset splits used in the paper for test/validation.

In [6]:
test_seeds = [
        2144199730,  794209841, 2985733717, 2282690970, 1901557222,
        2009332812, 2266730407,  635625077, 3538425002,  960893189,
        497096336, 3940842554, 3594628340,  948012117, 3305901371,
        3644534211, 2297033685, 4092258879, 2590091101, 1694925034]
val_seeds = [
        2413340114, 3258769933, 1789234713, 2222151463, 2813247115,
        1920426428, 4272044734, 2092442742, 841404887, 2188879532,
        646784207, 1633698412, 2256863076,  374355442,  289680769,
        4281139389, 4263036964,  900418539,  119332950, 1628837138]

if test:
    seeds = test_seeds
else:
    seeds = val_seeds

Now we can choose the remaining settings for the training/early stopping/validation(test) split. These are the ones chosen in the paper

In [7]:
if graph_name == 'microsoft_academic':
    nknown = 5000
else:
    nknown = 1500
    
idx_split_args = {'ntrain_per_class': 20, 'nstopping': 500, 'nknown': nknown}

# Set up propagation

Next we need to set up the proper pmropagation scheme. In the paper we've introduced the exact PPR propagation used in PPNP and the PPR power iteration propagation used in APPNP.

We use the hyperparameters from the paper.

In [8]:
if graph_name == 'microsoft_academic':
    alpha = 0.2
else:
    alpha = 0.1

prop_ppnp = PPRExact(graph.adj_matrix, alpha=alpha)
prop_appnp = PPRPowerIteration(graph.adj_matrix, alpha=alpha, niter=10)

# Choose model hyperparameters

Now we choose the hyperparameters. These are the ones used in the paper for all datasets.

Note that we choose the propagation for APPNP.

In [9]:
reg_lambda    = 5e-3
learning_rate = 0.01

tf_model_args = {
    'hiddenunits': [64],
    'reg_lambda': reg_lambda,
    'learning_rate': learning_rate,
    'keep_prob': 0.5,
    'propagation': prop_appnp}

pytorch_model_args = {
    'hiddenunits': [64],
    'drop_prob': 0.5,
    'propagation': prop_appnp
}



# Train model

First we set the remaining settings for training.

In [10]:
niter_per_seed = 5
save_result = False
print_interval = 100

We use 20 different seeds for splitting and 5 iterations (different random initializations) per split, so we train 100 times altogether. This will take a while.

In [None]:
# results = []
used_seeds = []
niter_tot = niter_per_seed * len(seeds)
i_tot = 0
for seed in seeds[-40:]:
    idx_split_args['seed'] = seed
    for _ in range(niter_per_seed):
        i_tot += 1
        logging_string = f"Iteration {i_tot} of {niter_tot}"
        logging.log(22, logging_string + "\n                     "+ '-' * len(logging_string))
    
#         # tensorflow
#         result = train_model(
#                 name=graph_name,
#                 model_class=PPNP,
#                 graph=graph, 
#                 build_args=tf_model_args, 
#                 idx_split_args=idx_split_args,
#                 stopping_args=stopping_args, 
#                 test=test, 
#                 save_result=save_result, 
#                 tf_seed=None, 
#                 print_interval=print_interval
#         )

        # pytorch
        model, result = train_model(
            name=graph_name,
            model_class=PPNP,
            graph=graph,
            model_args=pytorch_model_args,
            reg_lambda=reg_lambda,
            learning_rate=learning_rate,
            idx_split_args=idx_split_args,
            stopping_args=stopping_args,
            test=test,
            torch_seed=None,
            print_interval=print_interval
        )

        results.append({})
        results[-1]['stopping_accuracy'] = result['stopping_acc']
#         results[-1]['stopping_f1_score'] = result['stopping']['f1_score']
        results[-1]['valtest_accuracy']  = result['valtest_acc']
#         results[-1]['valtest_f1_score']  = result['valtest']['f1_score']
#         results[-1]['runtime']           = result['runtime']
#         results[-1]['runtime_perepoch']  = result['runtime_perepoch']
        results[-1]['split_seed']        = seed

2019-11-13 15:10:49: Iteration 1 of 100
                     ------------------


{'epoch': 0, 'train_loss': 1.9992010593414307, 'train_acc': 0.14285714285714285, 'stop_loss': 1.958577036857605, 'stop_acc': 0.462}
{'epoch': 100, 'train_loss': 1.6320810317993164, 'train_acc': 0.7285714285714285, 'stop_loss': 1.8021395206451416, 'stop_acc': 0.634}
{'epoch': 200, 'train_loss': 1.2943041324615479, 'train_acc': 0.9214285714285714, 'stop_loss': 1.5530493259429932, 'stop_acc': 0.772}
{'epoch': 300, 'train_loss': 1.079572319984436, 'train_acc': 0.9714285714285714, 'stop_loss': 1.3974164724349976, 'stop_acc': 0.79}
{'epoch': 400, 'train_loss': 0.9509291648864746, 'train_acc': 0.9642857142857143, 'stop_loss': 1.2893569469451904, 'stop_acc': 0.794}
{'epoch': 500, 'train_loss': 0.8628926277160645, 'train_acc': 0.9857142857142858, 'stop_loss': 1.229329228401184, 'stop_acc': 0.794}
{'epoch': 600, 'train_loss': 0.7725332975387573, 'train_acc': 0.9785714285714285, 'stop_loss': 1.1681612730026245, 'stop_acc': 0.798}
{'epoch': 700, 'train_loss': 0.6923733353614807, 'train_acc': 0.992

2019-11-13 15:11:21: Iteration 2 of 100
                     ------------------


{'epoch': 1600, 'train_loss': 0.4465727210044861, 'train_acc': 0.9928571428571429, 'stop_loss': 0.9352177381515503, 'stop_acc': 0.8}
{'epoch': 0, 'train_loss': 1.9993115663528442, 'train_acc': 0.14285714285714285, 'stop_loss': 1.9587469100952148, 'stop_acc': 0.424}
{'epoch': 100, 'train_loss': 1.6304686069488525, 'train_acc': 0.8142857142857143, 'stop_loss': 1.8586913347244263, 'stop_acc': 0.584}


# Evaluation

To evaluate the data we use Pandas and Seaborn (for bootstrapping).

In [15]:
import pandas as pd
# import seaborn as sns

In [19]:
result_df = pd.DataFrame(results)
result_df.head()

result_df.valtest_accuracy.mean()

0.8482856773191874

The standard deviation doesn't really say much about the uncertainty of our results and the standard error of the mean (SEM) assumes a normal distribution. So the best way to get a valid estimate for our results' uncertainty is via bootstrapping.

In [13]:
def calc_uncertainty(values: np.ndarray, n_boot: int = 1000, ci: int = 95) -> dict:
    stats = {}
    stats['mean'] = values.mean()
    boots_series = sns.algorithms.bootstrap(values, func=np.mean, n_boot=n_boot)
    stats['CI'] = sns.utils.ci(boots_series, ci)
    stats['uncertainty'] = np.max(np.abs(stats['CI'] - stats['mean']))
    return stats

In [14]:
stopping_acc = calc_uncertainty(result_df['stopping_accuracy'])
stopping_f1 = calc_uncertainty(result_df['stopping_f1_score'])
valtest_acc = calc_uncertainty(result_df['valtest_accuracy'])
valtest_f1 = calc_uncertainty(result_df['valtest_f1_score'])
runtime = calc_uncertainty(result_df['runtime'])
runtime_perepoch = calc_uncertainty(result_df['runtime_perepoch'])

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


In [15]:
print("APPNP\n"
      "Early stopping: Accuracy: {:.2f} ± {:.2f}%, "
      "F1 score: {:.4f} ± {:.4f}\n"
      "{}: Accuracy: {:.2f} ± {:.2f}%, "
      "F1 score: {:.4f} ± {:.4f}\n"
      "Runtime: {:.3f} ± {:.3f} sec, per epoch: {:.2f} ± {:.2f}ms"
      .format(
          stopping_acc['mean'] * 100,
          stopping_acc['uncertainty'] * 100,
          stopping_f1['mean'],
          stopping_f1['uncertainty'],
          'Test' if test else 'Validation',
          valtest_acc['mean'] * 100,
          valtest_acc['uncertainty'] * 100,
          valtest_f1['mean'],
          valtest_f1['uncertainty'],
          runtime['mean'],
          runtime['uncertainty'],
          runtime_perepoch['mean'] * 1e3,
          runtime_perepoch['uncertainty'] * 1e3,
      ))

APPNP
Early stopping: Accuracy: 84.17 ± 0.37%, F1 score: 0.8296 ± 0.0041
Test: Accuracy: 85.05 ± 0.23%, F1 score: 0.8431 ± 0.0021
Runtime: 45.661 ± 1.793 sec, per epoch: 32.72 ± 0.04ms
