In [1]:
from netgan.netgan import *
import tensorflow as tf
from netgan import utils
import scipy.sparse as sp
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import roc_auc_score, average_precision_score
import time

%matplotlib inline

  from ._conv import register_converters as _register_converters


#### Load the data

In [2]:
_A_obs, _X_obs, _z_obs = utils.load_npz('data/cora_ml.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [3]:
val_share = 0.1
test_share = 0.05
seed = 481516234

#### Load the train, validation, test split from file

In [4]:
loader = np.load('pretrained/cora_ml/split.npy').item()

In [5]:
train_ones = loader['train_ones']
val_ones = loader['val_ones']
val_zeros = loader['val_zeros']
test_ones = loader['test_ones']
test_zeros = loader['test_zeros']

In [6]:
train_graph = sp.coo_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1]))).tocsr()
assert (train_graph.toarray() == train_graph.toarray().T).all()

#### Parameters

In [7]:
rw_len = 16
batch_size = 128

In [8]:
walker = utils.RandomWalker(train_graph, rw_len, p=1, q=1, batch_size=batch_size)

#### An example random walk

In [9]:
walker.walk().__next__()

array([[2571, 2273, 1456, ..., 2739, 2742, 2724],
       [1808, 1974, 1422, ..., 2013, 2014,  933],
       [2001, 1808, 1137, ..., 1467, 2451, 1467],
       ...,
       [ 733, 2011, 1126, ..., 1809,  784, 1112],
       [ 278,  425,  426, ...,  493,  418,  446],
       [ 419,  372,  271, ..., 1701,  266,  444]])

#### Create our NetGAN model

In [10]:
netgan = NetGAN(_N, rw_len, walk_generator= walker.walk, gpu_id=3, use_gumbel=True, disc_iters=3,
                W_down_discriminator_size=128, W_down_generator_size=128,
                l2_penalty_generator=1e-7, l2_penalty_discriminator=5e-5,
                generator_layers=[40], discriminator_layers=[30], temp_start=5, temperature_decay=0.99998, learning_rate=0.0003, legacy_generator=True)

#### Load pretrained model

In [11]:
saver = tf.train.Saver()
saver.restore(netgan.session, "pretrained/cora_ml/pretrained_lp.ckpt")

INFO:tensorflow:Restoring parameters from pretrained/cora_ml/pretrained_lp.ckpt


#### Generate random walks on the trained model

In [12]:
sample_many = netgan.generate_discrete(10000, reuse=True, legacy=True)

In [13]:
samples = []

In [14]:
for _ in range(10000):
    if (_+1) % 1000 == 0:
        print(_+1)
    samples.append(sample_many.eval({netgan.tau: 0.5}))

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000


#### Assemble score matrix from the random walks

In [15]:
rws = np.array(samples).reshape([-1, rw_len])
scores_matrix = utils.score_matrix_from_random_walks(rws, _N).tocsr()

#### Compute the test error on the held-out edges and non-edges (more relevant for VAL criterion)

In [16]:
test_labels = np.concatenate((np.ones(len(test_ones)), np.zeros(len(test_zeros))))
test_scores = np.concatenate((scores_matrix[tuple(test_ones.T)].A1, scores_matrix[tuple(test_zeros.T)].A1))

In [24]:
roc_auc_score(test_labels, test_scores)

0.9509499999999999

In [25]:
average_precision_score(test_labels, test_scores)

0.951926106225528