In [1]:
from __future__ import division
from __future__ import print_function

import argparse
import time

import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch
from torch import optim
import networkx as nx

from gae.model import GCNModelVAE
from gae.optimizer import loss_function
from gae.utils import mask_test_edges, preprocess_graph, get_roc_score

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, default='gcn_vae', help="models used")
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200, help='Number of epochs to train.')
parser.add_argument('--hidden1', type=int, default=32, help='Number of units in hidden layer 1.')
parser.add_argument('--hidden2', type=int, default=16, help='Number of units in hidden layer 2.')
parser.add_argument('--lr', type=float, default=0.01, help='Initial learning rate.')
parser.add_argument('--dropout', type=float, default=0., help='Dropout rate (1 - keep probability).')
parser.add_argument('--dataset-str', type=str, default='polbooks', help='type of dataset.')

args,_ = parser.parse_known_args()

In [3]:
def load_data(adj_name):
    if adj_name == 'polbooks':
        nodes_numbers = 105
        edges = pd.read_csv('datasets/polbooks.csv', header=None).iloc[1:,]
        raw_edges = edges.astype('int')
        
    elif adj_name == 'Citeseer':
        nodes_numbers = 3327
        datasets = Planetoid('./datasets', adj_name)
        edges = datasets[0].edge_index
        raw_edges = pd.DataFrame([[edges[0,i].item(), edges[1,i].item()] for i in range(edges.shape[1])])
    else:
        print("Dataset is not exist!")
    
    drop_self_loop = raw_edges[raw_edges[0]!=raw_edges[1]]
    
    graph_np = np.zeros((nodes_numbers, nodes_numbers))
    
    for i in range(drop_self_loop.shape[0]):
        graph_np[drop_self_loop.iloc[i,0], drop_self_loop.iloc[i,1]]=1
        graph_np[drop_self_loop.iloc[i,1], drop_self_loop.iloc[i,0]]=1
    
    adj = nx.adjacency_matrix(nx.from_numpy_matrix(graph_np))
    
    features = torch.eye(nodes_numbers)
    
    return adj, features

In [4]:
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features = load_data(args.dataset_str)
    
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = torch.tensor(float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum())
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    hidden_emb = None
    for epoch in range(args.epochs):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        recovered, mu, logvar = model(features, adj_norm)
        
        loss = loss_function(preds=recovered, labels=adj_label,
                             mu=mu, logvar=logvar, n_nodes=n_nodes,
                             norm=norm, pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()
        #roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(cur_loss),
              "time=", "{:.5f}".format(time.time() - t)
              )

    print("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
    print('Test ROC score: ' + str(roc_score))
    print('Test AP score: ' + str(ap_score))
    
    return ap_score, roc_score

In [5]:
if __name__ == '__main__':
    all_ap = []
    all_auc = []
    for i in range(10):
        ap, auc = gae_for(args)
        all_ap.append(ap)
        all_auc.append(auc)
    
    print('AP MEAN : ', np.array(all_ap).mean())
    print('AP STD : ', np.array(all_ap).std())

    print('AUC MEAN : ', np.array(all_auc).mean())
    print('AUC STD : ', np.array(all_auc).std())

Using polbooks dataset
Epoch: 0001 train_loss= 1.77142 time= 0.04296
Epoch: 0002 train_loss= 1.65424 time= 0.00107
Epoch: 0003 train_loss= 1.85873 time= 0.00107
Epoch: 0004 train_loss= 1.62341 time= 0.00107
Epoch: 0005 train_loss= 1.57411 time= 0.00200
Epoch: 0006 train_loss= 1.52874 time= 0.00204
Epoch: 0007 train_loss= 1.51486 time= 0.00235
Epoch: 0008 train_loss= 1.49407 time= 0.00224
Epoch: 0009 train_loss= 1.55759 time= 0.00222
Epoch: 0010 train_loss= 1.45567 time= 0.00106
Epoch: 0011 train_loss= 1.43761 time= 0.00106
Epoch: 0012 train_loss= 1.39828 time= 0.00105
Epoch: 0013 train_loss= 1.15970 time= 0.00156
Epoch: 0014 train_loss= 1.17070 time= 0.00206
Epoch: 0015 train_loss= 1.20022 time= 0.00105
Epoch: 0016 train_loss= 1.01860 time= 0.00107
Epoch: 0017 train_loss= 1.03915 time= 0.00109
Epoch: 0018 train_loss= 0.94577 time= 0.00216
Epoch: 0019 train_loss= 0.93422 time= 0.00200
Epoch: 0020 train_loss= 0.89886 time= 0.00106
Epoch: 0021 train_loss= 0.86579 time= 0.00177
Epoch: 0022

Epoch: 0195 train_loss= 0.59984 time= 0.00301
Epoch: 0196 train_loss= 0.58871 time= 0.00199
Epoch: 0197 train_loss= 0.59327 time= 0.00135
Epoch: 0198 train_loss= 0.59389 time= 0.00130
Epoch: 0199 train_loss= 0.59235 time= 0.00131
Epoch: 0200 train_loss= 0.59891 time= 0.00241
Optimization Finished!
Test ROC score: 0.918904958677686
Test AP score: 0.9086319009690254
Using polbooks dataset
Epoch: 0001 train_loss= 1.66383 time= 0.00202
Epoch: 0002 train_loss= 1.50841 time= 0.00398
Epoch: 0003 train_loss= 1.75443 time= 0.00195
Epoch: 0004 train_loss= 1.64654 time= 0.00198
Epoch: 0005 train_loss= 1.65697 time= 0.00200
Epoch: 0006 train_loss= 1.52783 time= 0.00199
Epoch: 0007 train_loss= 1.49580 time= 0.00300
Epoch: 0008 train_loss= 1.43397 time= 0.00201
Epoch: 0009 train_loss= 1.48888 time= 0.00298
Epoch: 0010 train_loss= 1.40833 time= 0.00210
Epoch: 0011 train_loss= 1.39778 time= 0.00195
Epoch: 0012 train_loss= 1.29344 time= 0.00202
Epoch: 0013 train_loss= 1.16581 time= 0.00232
Epoch: 0014 

Epoch: 0199 train_loss= 0.58890 time= 0.00300
Epoch: 0200 train_loss= 0.59938 time= 0.00299
Optimization Finished!
Test ROC score: 0.9411157024793388
Test AP score: 0.9135818394942621
Using polbooks dataset
Epoch: 0001 train_loss= 1.68846 time= 0.00274
Epoch: 0002 train_loss= 1.66059 time= 0.00204
Epoch: 0003 train_loss= 1.63218 time= 0.00302
Epoch: 0004 train_loss= 1.79003 time= 0.00162
Epoch: 0005 train_loss= 1.40813 time= 0.00199
Epoch: 0006 train_loss= 1.55263 time= 0.00297
Epoch: 0007 train_loss= 1.57635 time= 0.00202
Epoch: 0008 train_loss= 1.53779 time= 0.00194
Epoch: 0009 train_loss= 1.37354 time= 0.00205
Epoch: 0010 train_loss= 1.35591 time= 0.00302
Epoch: 0011 train_loss= 1.18425 time= 0.00201
Epoch: 0012 train_loss= 1.16757 time= 0.00292
Epoch: 0013 train_loss= 1.08686 time= 0.00206
Epoch: 0014 train_loss= 1.04741 time= 0.00201
Epoch: 0015 train_loss= 0.97907 time= 0.00292
Epoch: 0016 train_loss= 0.98122 time= 0.00200
Epoch: 0017 train_loss= 0.92168 time= 0.00200
Epoch: 0018

Epoch: 0001 train_loss= 1.80039 time= 0.00200
Epoch: 0002 train_loss= 1.61251 time= 0.00304
Epoch: 0003 train_loss= 1.84389 time= 0.00199
Epoch: 0004 train_loss= 1.51484 time= 0.00156
Epoch: 0005 train_loss= 1.59984 time= 0.00156
Epoch: 0006 train_loss= 1.50519 time= 0.00146
Epoch: 0007 train_loss= 1.43685 time= 0.00143
Epoch: 0008 train_loss= 1.37027 time= 0.00222
Epoch: 0009 train_loss= 1.33728 time= 0.00299
Epoch: 0010 train_loss= 1.36732 time= 0.00182
Epoch: 0011 train_loss= 1.16535 time= 0.00184
Epoch: 0012 train_loss= 1.19700 time= 0.00088
Epoch: 0013 train_loss= 1.10455 time= 0.00103
Epoch: 0014 train_loss= 1.11515 time= 0.00120
Epoch: 0015 train_loss= 1.07655 time= 0.00231
Epoch: 0016 train_loss= 1.00363 time= 0.00157
Epoch: 0017 train_loss= 0.93413 time= 0.00195
Epoch: 0018 train_loss= 0.92440 time= 0.00169
Epoch: 0019 train_loss= 0.90115 time= 0.00207
Epoch: 0020 train_loss= 0.89046 time= 0.00300
Epoch: 0021 train_loss= 0.84458 time= 0.00293
Epoch: 0022 train_loss= 0.82822 ti

Epoch: 0009 train_loss= 1.47272 time= 0.00198
Epoch: 0010 train_loss= 1.41860 time= 0.00207
Epoch: 0011 train_loss= 1.37456 time= 0.00100
Epoch: 0012 train_loss= 1.30199 time= 0.00295
Epoch: 0013 train_loss= 1.26581 time= 0.00257
Epoch: 0014 train_loss= 1.16862 time= 0.00242
Epoch: 0015 train_loss= 1.18605 time= 0.00210
Epoch: 0016 train_loss= 1.06607 time= 0.00298
Epoch: 0017 train_loss= 1.00875 time= 0.00258
Epoch: 0018 train_loss= 0.97798 time= 0.00178
Epoch: 0019 train_loss= 0.93763 time= 0.00267
Epoch: 0020 train_loss= 0.89056 time= 0.00158
Epoch: 0021 train_loss= 0.85501 time= 0.00206
Epoch: 0022 train_loss= 0.85000 time= 0.00193
Epoch: 0023 train_loss= 0.83300 time= 0.00193
Epoch: 0024 train_loss= 0.81422 time= 0.00300
Epoch: 0025 train_loss= 0.81871 time= 0.00306
Epoch: 0026 train_loss= 0.79861 time= 0.00303
Epoch: 0027 train_loss= 0.80335 time= 0.00267
Epoch: 0028 train_loss= 0.78527 time= 0.00197
Epoch: 0029 train_loss= 0.78268 time= 0.00201
Epoch: 0030 train_loss= 0.77794 ti

Epoch: 0014 train_loss= 1.14184 time= 0.00300
Epoch: 0015 train_loss= 0.98651 time= 0.00300
Epoch: 0016 train_loss= 1.02816 time= 0.00166
Epoch: 0017 train_loss= 0.97108 time= 0.00217
Epoch: 0018 train_loss= 0.91966 time= 0.00178
Epoch: 0019 train_loss= 0.89898 time= 0.00300
Epoch: 0020 train_loss= 0.85135 time= 0.00300
Epoch: 0021 train_loss= 0.84570 time= 0.00200
Epoch: 0022 train_loss= 0.83620 time= 0.00200
Epoch: 0023 train_loss= 0.81054 time= 0.00159
Epoch: 0024 train_loss= 0.80390 time= 0.00220
Epoch: 0025 train_loss= 0.81548 time= 0.00183
Epoch: 0026 train_loss= 0.80795 time= 0.00200
Epoch: 0027 train_loss= 0.79633 time= 0.00121
Epoch: 0028 train_loss= 0.80713 time= 0.00194
Epoch: 0029 train_loss= 0.79750 time= 0.00200
Epoch: 0030 train_loss= 0.78121 time= 0.00200
Epoch: 0031 train_loss= 0.77329 time= 0.00169
Epoch: 0032 train_loss= 0.78862 time= 0.00300
Epoch: 0033 train_loss= 0.77202 time= 0.00155
Epoch: 0034 train_loss= 0.77455 time= 0.00300
Epoch: 0035 train_loss= 0.75511 ti

Epoch: 0023 train_loss= 0.84349 time= 0.00399
Epoch: 0024 train_loss= 0.84879 time= 0.00200
Epoch: 0025 train_loss= 0.83017 time= 0.00200
Epoch: 0026 train_loss= 0.82626 time= 0.00201
Epoch: 0027 train_loss= 0.82109 time= 0.00094
Epoch: 0028 train_loss= 0.82590 time= 0.00190
Epoch: 0029 train_loss= 0.82665 time= 0.00247
Epoch: 0030 train_loss= 0.82365 time= 0.00214
Epoch: 0031 train_loss= 0.82050 time= 0.00186
Epoch: 0032 train_loss= 0.81596 time= 0.00200
Epoch: 0033 train_loss= 0.80057 time= 0.00174
Epoch: 0034 train_loss= 0.81658 time= 0.00145
Epoch: 0035 train_loss= 0.81627 time= 0.00114
Epoch: 0036 train_loss= 0.80817 time= 0.00296
Epoch: 0037 train_loss= 0.80252 time= 0.00202
Epoch: 0038 train_loss= 0.78318 time= 0.00200
Epoch: 0039 train_loss= 0.76521 time= 0.00200
Epoch: 0040 train_loss= 0.76336 time= 0.00139
Epoch: 0041 train_loss= 0.74172 time= 0.00099
Epoch: 0042 train_loss= 0.72822 time= 0.00200
Epoch: 0043 train_loss= 0.69716 time= 0.00156
Epoch: 0044 train_loss= 0.70073 ti

Epoch: 0025 train_loss= 0.80425 time= 0.00109
Epoch: 0026 train_loss= 0.80753 time= 0.00194
Epoch: 0027 train_loss= 0.80465 time= 0.00207
Epoch: 0028 train_loss= 0.80879 time= 0.00107
Epoch: 0029 train_loss= 0.79750 time= 0.00200
Epoch: 0030 train_loss= 0.80068 time= 0.00201
Epoch: 0031 train_loss= 0.78863 time= 0.00301
Epoch: 0032 train_loss= 0.78937 time= 0.00129
Epoch: 0033 train_loss= 0.78399 time= 0.00107
Epoch: 0034 train_loss= 0.77297 time= 0.00100
Epoch: 0035 train_loss= 0.74971 time= 0.00205
Epoch: 0036 train_loss= 0.74194 time= 0.00205
Epoch: 0037 train_loss= 0.71926 time= 0.00168
Epoch: 0038 train_loss= 0.71178 time= 0.00164
Epoch: 0039 train_loss= 0.68210 time= 0.00200
Epoch: 0040 train_loss= 0.67985 time= 0.00157
Epoch: 0041 train_loss= 0.67489 time= 0.00164
Epoch: 0042 train_loss= 0.68343 time= 0.00200
Epoch: 0043 train_loss= 0.69390 time= 0.00198
Epoch: 0044 train_loss= 0.71246 time= 0.00140
Epoch: 0045 train_loss= 0.70247 time= 0.00154
Epoch: 0046 train_loss= 0.69510 ti

Epoch: 0042 train_loss= 0.69326 time= 0.00200
Epoch: 0043 train_loss= 0.67634 time= 0.00251
Epoch: 0044 train_loss= 0.68146 time= 0.00200
Epoch: 0045 train_loss= 0.67810 time= 0.00200
Epoch: 0046 train_loss= 0.66872 time= 0.00225
Epoch: 0047 train_loss= 0.68098 time= 0.00301
Epoch: 0048 train_loss= 0.69263 time= 0.00299
Epoch: 0049 train_loss= 0.68779 time= 0.00200
Epoch: 0050 train_loss= 0.69604 time= 0.00134
Epoch: 0051 train_loss= 0.67673 time= 0.00300
Epoch: 0052 train_loss= 0.67105 time= 0.00200
Epoch: 0053 train_loss= 0.66028 time= 0.00300
Epoch: 0054 train_loss= 0.65988 time= 0.00200
Epoch: 0055 train_loss= 0.65631 time= 0.00300
Epoch: 0056 train_loss= 0.64475 time= 0.00179
Epoch: 0057 train_loss= 0.65932 time= 0.00169
Epoch: 0058 train_loss= 0.65511 time= 0.00187
Epoch: 0059 train_loss= 0.66025 time= 0.00227
Epoch: 0060 train_loss= 0.65709 time= 0.00255
Epoch: 0061 train_loss= 0.65922 time= 0.00299
Epoch: 0062 train_loss= 0.66599 time= 0.00306
Epoch: 0063 train_loss= 0.66867 ti

Epoch: 0057 train_loss= 0.68668 time= 0.00275
Epoch: 0058 train_loss= 0.68215 time= 0.00200
Epoch: 0059 train_loss= 0.65801 time= 0.00300
Epoch: 0060 train_loss= 0.65676 time= 0.00200
Epoch: 0061 train_loss= 0.66387 time= 0.00235
Epoch: 0062 train_loss= 0.66484 time= 0.00267
Epoch: 0063 train_loss= 0.65848 time= 0.00233
Epoch: 0064 train_loss= 0.65737 time= 0.00199
Epoch: 0065 train_loss= 0.66040 time= 0.00147
Epoch: 0066 train_loss= 0.65259 time= 0.00141
Epoch: 0067 train_loss= 0.66918 time= 0.00100
Epoch: 0068 train_loss= 0.65612 time= 0.00184
Epoch: 0069 train_loss= 0.64453 time= 0.00258
Epoch: 0070 train_loss= 0.65039 time= 0.00209
Epoch: 0071 train_loss= 0.65601 time= 0.00191
Epoch: 0072 train_loss= 0.66258 time= 0.00300
Epoch: 0073 train_loss= 0.64189 time= 0.00200
Epoch: 0074 train_loss= 0.65346 time= 0.00200
Epoch: 0075 train_loss= 0.65340 time= 0.00201
Epoch: 0076 train_loss= 0.64726 time= 0.00254
Epoch: 0077 train_loss= 0.64965 time= 0.00246
Epoch: 0078 train_loss= 0.64673 ti