In [1]:
from __future__ import division
from __future__ import print_function

import argparse
import time

import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch
from torch import optim
import networkx as nx

from gae.model import GCNModelVAE
from gae.optimizer import loss_function
from gae.utils import mask_test_edges, preprocess_graph, get_roc_score

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, default='gcn_vae', help="models used")
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200, help='Number of epochs to train.')
parser.add_argument('--hidden1', type=int, default=32, help='Number of units in hidden layer 1.')
parser.add_argument('--hidden2', type=int, default=16, help='Number of units in hidden layer 2.')
parser.add_argument('--lr', type=float, default=0.01, help='Initial learning rate.')
parser.add_argument('--dropout', type=float, default=0., help='Dropout rate (1 - keep probability).')
parser.add_argument('--dataset-str', type=str, default='polblogs', help='type of dataset.')

args,_ = parser.parse_known_args()

In [3]:
def load_data(adj_name):
    if adj_name == 'polblogs':
        nodes_numbers = 183
        edges = pd.read_csv('datasets/texas.csv', header=None).iloc[1:,]
        raw_edges = edges.astype('int')
        
    elif adj_name == 'Citeseer':
        nodes_numbers = 3327
        datasets = Planetoid('./datasets', adj_name)
        edges = datasets[0].edge_index
        raw_edges = pd.DataFrame([[edges[0,i].item(), edges[1,i].item()] for i in range(edges.shape[1])])
    else:
        print("Dataset is not exist!")
    
    drop_self_loop = raw_edges[raw_edges[0]!=raw_edges[1]]
    
    graph_np = np.zeros((nodes_numbers, nodes_numbers))
    
    for i in range(drop_self_loop.shape[0]):
        graph_np[drop_self_loop.iloc[i,0], drop_self_loop.iloc[i,1]]=1
        graph_np[drop_self_loop.iloc[i,1], drop_self_loop.iloc[i,0]]=1
    
    adj = nx.adjacency_matrix(nx.from_numpy_matrix(graph_np))
    
    features = torch.eye(nodes_numbers)
    
    return adj, features

In [4]:
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features = load_data(args.dataset_str)
    
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = torch.tensor(float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum())
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    hidden_emb = None
    for epoch in range(args.epochs):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        recovered, mu, logvar = model(features, adj_norm)
        
        loss = loss_function(preds=recovered, labels=adj_label,
                             mu=mu, logvar=logvar, n_nodes=n_nodes,
                             norm=norm, pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()
        #roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(cur_loss),
              "time=", "{:.5f}".format(time.time() - t)
              )

    print("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
    print('Test ROC score: ' + str(roc_score))
    print('Test AP score: ' + str(ap_score))
    
    return ap_score, roc_score

In [5]:
if __name__ == '__main__':
    all_ap = []
    all_auc = []
    for i in range(10):
        ap, auc = gae_for(args)
        all_ap.append(ap)
        all_auc.append(auc)
    
    print('AP MEAN : ', np.array(all_ap).mean())
    print('AP STD : ', np.array(all_ap).std())

    print('AUC MEAN : ', np.array(all_auc).mean())
    print('AUC STD : ', np.array(all_auc).std())

Using polblogs dataset
Epoch: 0001 train_loss= 1.60924 time= 0.03599
Epoch: 0002 train_loss= 1.63536 time= 0.00430
Epoch: 0003 train_loss= 1.56988 time= 0.00302
Epoch: 0004 train_loss= 1.59822 time= 0.00303
Epoch: 0005 train_loss= 1.45042 time= 0.00301
Epoch: 0006 train_loss= 1.39400 time= 0.00100
Epoch: 0007 train_loss= 1.54374 time= 0.00105
Epoch: 0008 train_loss= 1.45636 time= 0.00199
Epoch: 0009 train_loss= 1.38767 time= 0.00205
Epoch: 0010 train_loss= 1.24642 time= 0.00194
Epoch: 0011 train_loss= 1.28676 time= 0.00265
Epoch: 0012 train_loss= 1.12536 time= 0.00257
Epoch: 0013 train_loss= 1.11109 time= 0.00256
Epoch: 0014 train_loss= 1.12636 time= 0.00204
Epoch: 0015 train_loss= 1.04773 time= 0.00206
Epoch: 0016 train_loss= 1.00705 time= 0.00200
Epoch: 0017 train_loss= 1.03448 time= 0.00130
Epoch: 0018 train_loss= 0.94550 time= 0.00305
Epoch: 0019 train_loss= 0.90584 time= 0.00435
Epoch: 0020 train_loss= 0.91416 time= 0.00165
Epoch: 0021 train_loss= 0.80711 time= 0.00203
Epoch: 0022

Epoch: 0008 train_loss= 1.37762 time= 0.00399
Epoch: 0009 train_loss= 1.51714 time= 0.00308
Epoch: 0010 train_loss= 1.44353 time= 0.00300
Epoch: 0011 train_loss= 1.39570 time= 0.00199
Epoch: 0012 train_loss= 1.30736 time= 0.00208
Epoch: 0013 train_loss= 1.20638 time= 0.00301
Epoch: 0014 train_loss= 1.21831 time= 0.00397
Epoch: 0015 train_loss= 1.08812 time= 0.00302
Epoch: 0016 train_loss= 1.05751 time= 0.00300
Epoch: 0017 train_loss= 1.06520 time= 0.00300
Epoch: 0018 train_loss= 1.01648 time= 0.00398
Epoch: 0019 train_loss= 0.97842 time= 0.00300
Epoch: 0020 train_loss= 0.99892 time= 0.00303
Epoch: 0021 train_loss= 0.88864 time= 0.00298
Epoch: 0022 train_loss= 0.92001 time= 0.00300
Epoch: 0023 train_loss= 0.82685 time= 0.00293
Epoch: 0024 train_loss= 0.84750 time= 0.00298
Epoch: 0025 train_loss= 0.80470 time= 0.00302
Epoch: 0026 train_loss= 0.77320 time= 0.00300
Epoch: 0027 train_loss= 0.75959 time= 0.00300
Epoch: 0028 train_loss= 0.76408 time= 0.00294
Epoch: 0029 train_loss= 0.72542 ti

Epoch: 0032 train_loss= 0.72708 time= 0.00370
Epoch: 0033 train_loss= 0.72672 time= 0.00304
Epoch: 0034 train_loss= 0.73185 time= 0.00211
Epoch: 0035 train_loss= 0.71724 time= 0.00299
Epoch: 0036 train_loss= 0.73454 time= 0.00300
Epoch: 0037 train_loss= 0.72569 time= 0.00301
Epoch: 0038 train_loss= 0.71733 time= 0.00328
Epoch: 0039 train_loss= 0.71469 time= 0.00371
Epoch: 0040 train_loss= 0.71752 time= 0.00259
Epoch: 0041 train_loss= 0.70780 time= 0.00176
Epoch: 0042 train_loss= 0.71354 time= 0.00293
Epoch: 0043 train_loss= 0.70529 time= 0.00305
Epoch: 0044 train_loss= 0.71386 time= 0.00399
Epoch: 0045 train_loss= 0.69246 time= 0.00295
Epoch: 0046 train_loss= 0.70764 time= 0.00221
Epoch: 0047 train_loss= 0.69938 time= 0.00322
Epoch: 0048 train_loss= 0.69497 time= 0.00305
Epoch: 0049 train_loss= 0.68205 time= 0.00302
Epoch: 0050 train_loss= 0.69553 time= 0.00398
Epoch: 0051 train_loss= 0.69350 time= 0.00301
Epoch: 0052 train_loss= 0.68468 time= 0.00302
Epoch: 0053 train_loss= 0.67683 ti

Epoch: 0069 train_loss= 0.62348 time= 0.00424
Epoch: 0070 train_loss= 0.61553 time= 0.00394
Epoch: 0071 train_loss= 0.61476 time= 0.00500
Epoch: 0072 train_loss= 0.61336 time= 0.00400
Epoch: 0073 train_loss= 0.60833 time= 0.00200
Epoch: 0074 train_loss= 0.61626 time= 0.00405
Epoch: 0075 train_loss= 0.60917 time= 0.00295
Epoch: 0076 train_loss= 0.60997 time= 0.00407
Epoch: 0077 train_loss= 0.60365 time= 0.00300
Epoch: 0078 train_loss= 0.60693 time= 0.00304
Epoch: 0079 train_loss= 0.60799 time= 0.00303
Epoch: 0080 train_loss= 0.59439 time= 0.00299
Epoch: 0081 train_loss= 0.59995 time= 0.00300
Epoch: 0082 train_loss= 0.59669 time= 0.00199
Epoch: 0083 train_loss= 0.59165 time= 0.00295
Epoch: 0084 train_loss= 0.59602 time= 0.00207
Epoch: 0085 train_loss= 0.59155 time= 0.00298
Epoch: 0086 train_loss= 0.58689 time= 0.00302
Epoch: 0087 train_loss= 0.58778 time= 0.00229
Epoch: 0088 train_loss= 0.58073 time= 0.00200
Epoch: 0089 train_loss= 0.58473 time= 0.00355
Epoch: 0090 train_loss= 0.58303 ti

Epoch: 0096 train_loss= 0.58835 time= 0.00470
Epoch: 0097 train_loss= 0.59262 time= 0.00301
Epoch: 0098 train_loss= 0.58740 time= 0.00202
Epoch: 0099 train_loss= 0.58225 time= 0.00201
Epoch: 0100 train_loss= 0.57566 time= 0.00238
Epoch: 0101 train_loss= 0.58143 time= 0.00465
Epoch: 0102 train_loss= 0.57594 time= 0.00300
Epoch: 0103 train_loss= 0.58021 time= 0.00201
Epoch: 0104 train_loss= 0.56867 time= 0.00299
Epoch: 0105 train_loss= 0.57409 time= 0.00295
Epoch: 0106 train_loss= 0.57165 time= 0.00505
Epoch: 0107 train_loss= 0.56822 time= 0.00305
Epoch: 0108 train_loss= 0.56947 time= 0.00301
Epoch: 0109 train_loss= 0.57090 time= 0.00301
Epoch: 0110 train_loss= 0.56250 time= 0.00295
Epoch: 0111 train_loss= 0.57133 time= 0.00299
Epoch: 0112 train_loss= 0.56842 time= 0.00302
Epoch: 0113 train_loss= 0.56707 time= 0.00294
Epoch: 0114 train_loss= 0.56212 time= 0.00306
Epoch: 0115 train_loss= 0.56296 time= 0.00301
Epoch: 0116 train_loss= 0.56023 time= 0.00397
Epoch: 0117 train_loss= 0.56236 ti

Epoch: 0122 train_loss= 0.55262 time= 0.00333
Epoch: 0123 train_loss= 0.54886 time= 0.00307
Epoch: 0124 train_loss= 0.54649 time= 0.00300
Epoch: 0125 train_loss= 0.54309 time= 0.00199
Epoch: 0126 train_loss= 0.55140 time= 0.00300
Epoch: 0127 train_loss= 0.54467 time= 0.00301
Epoch: 0128 train_loss= 0.54767 time= 0.00294
Epoch: 0129 train_loss= 0.54487 time= 0.00404
Epoch: 0130 train_loss= 0.54813 time= 0.00299
Epoch: 0131 train_loss= 0.55245 time= 0.00303
Epoch: 0132 train_loss= 0.54199 time= 0.00299
Epoch: 0133 train_loss= 0.53664 time= 0.00403
Epoch: 0134 train_loss= 0.54900 time= 0.00295
Epoch: 0135 train_loss= 0.53842 time= 0.00248
Epoch: 0136 train_loss= 0.54513 time= 0.00300
Epoch: 0137 train_loss= 0.54369 time= 0.00300
Epoch: 0138 train_loss= 0.54205 time= 0.00302
Epoch: 0139 train_loss= 0.53866 time= 0.00218
Epoch: 0140 train_loss= 0.53743 time= 0.00300
Epoch: 0141 train_loss= 0.54025 time= 0.00300
Epoch: 0142 train_loss= 0.54152 time= 0.00300
Epoch: 0143 train_loss= 0.54066 ti

Epoch: 0154 train_loss= 0.54353 time= 0.00505
Epoch: 0155 train_loss= 0.54349 time= 0.00301
Epoch: 0156 train_loss= 0.54388 time= 0.00250
Epoch: 0157 train_loss= 0.53986 time= 0.00302
Epoch: 0158 train_loss= 0.54253 time= 0.00300
Epoch: 0159 train_loss= 0.53927 time= 0.00207
Epoch: 0160 train_loss= 0.53917 time= 0.00449
Epoch: 0161 train_loss= 0.54916 time= 0.00300
Epoch: 0162 train_loss= 0.54182 time= 0.00305
Epoch: 0163 train_loss= 0.53825 time= 0.00295
Epoch: 0164 train_loss= 0.54439 time= 0.00436
Epoch: 0165 train_loss= 0.53459 time= 0.00269
Epoch: 0166 train_loss= 0.53445 time= 0.00305
Epoch: 0167 train_loss= 0.53734 time= 0.00292
Epoch: 0168 train_loss= 0.53977 time= 0.00399
Epoch: 0169 train_loss= 0.54608 time= 0.00334
Epoch: 0170 train_loss= 0.53840 time= 0.00272
Epoch: 0171 train_loss= 0.54474 time= 0.00302
Epoch: 0172 train_loss= 0.53320 time= 0.00193
Epoch: 0173 train_loss= 0.53619 time= 0.00307
Epoch: 0174 train_loss= 0.54088 time= 0.00192
Epoch: 0175 train_loss= 0.53274 ti

Epoch: 0183 train_loss= 0.53219 time= 0.00501
Epoch: 0184 train_loss= 0.53403 time= 0.00255
Epoch: 0185 train_loss= 0.52793 time= 0.00266
Epoch: 0186 train_loss= 0.52726 time= 0.00248
Epoch: 0187 train_loss= 0.52878 time= 0.00173
Epoch: 0188 train_loss= 0.53329 time= 0.00305
Epoch: 0189 train_loss= 0.53192 time= 0.00300
Epoch: 0190 train_loss= 0.52817 time= 0.00228
Epoch: 0191 train_loss= 0.52566 time= 0.00256
Epoch: 0192 train_loss= 0.52957 time= 0.00300
Epoch: 0193 train_loss= 0.53174 time= 0.00393
Epoch: 0194 train_loss= 0.52930 time= 0.00328
Epoch: 0195 train_loss= 0.53174 time= 0.00379
Epoch: 0196 train_loss= 0.52682 time= 0.00300
Epoch: 0197 train_loss= 0.52734 time= 0.00300
Epoch: 0198 train_loss= 0.52876 time= 0.00292
Epoch: 0199 train_loss= 0.53252 time= 0.00230
Epoch: 0200 train_loss= 0.52637 time= 0.00304
Optimization Finished!
Test ROC score: 0.3635116598079561
Test AP score: 0.5467406980993652
Using polblogs dataset
Epoch: 0001 train_loss= 1.63944 time= 0.00397
Epoch: 0002

Epoch: 0005 train_loss= 1.51797 time= 0.00106
Epoch: 0006 train_loss= 1.46255 time= 0.00300
Epoch: 0007 train_loss= 1.66449 time= 0.00205
Epoch: 0008 train_loss= 1.41155 time= 0.00282
Epoch: 0009 train_loss= 1.46662 time= 0.00282
Epoch: 0010 train_loss= 1.28934 time= 0.00280
Epoch: 0011 train_loss= 1.24896 time= 0.00475
Epoch: 0012 train_loss= 1.23690 time= 0.00306
Epoch: 0013 train_loss= 1.14944 time= 0.00208
Epoch: 0014 train_loss= 1.06792 time= 0.00188
Epoch: 0015 train_loss= 1.00442 time= 0.00173
Epoch: 0016 train_loss= 1.01464 time= 0.00213
Epoch: 0017 train_loss= 0.99896 time= 0.00232
Epoch: 0018 train_loss= 0.92229 time= 0.00239
Epoch: 0019 train_loss= 0.89515 time= 0.00236
Epoch: 0020 train_loss= 0.83884 time= 0.00302
Epoch: 0021 train_loss= 0.86296 time= 0.00300
Epoch: 0022 train_loss= 0.83811 time= 0.00202
Epoch: 0023 train_loss= 0.82985 time= 0.00300
Epoch: 0024 train_loss= 0.77361 time= 0.00300
Epoch: 0025 train_loss= 0.77233 time= 0.00200
Epoch: 0026 train_loss= 0.76915 ti

Epoch: 0186 train_loss= 0.53583 time= 0.00305
Epoch: 0187 train_loss= 0.53312 time= 0.00388
Epoch: 0188 train_loss= 0.53753 time= 0.00499
Epoch: 0189 train_loss= 0.53663 time= 0.00307
Epoch: 0190 train_loss= 0.53968 time= 0.00171
Epoch: 0191 train_loss= 0.53234 time= 0.00304
Epoch: 0192 train_loss= 0.53472 time= 0.00425
Epoch: 0193 train_loss= 0.53597 time= 0.00201
Epoch: 0194 train_loss= 0.53676 time= 0.00207
Epoch: 0195 train_loss= 0.53249 time= 0.00155
Epoch: 0196 train_loss= 0.53909 time= 0.00199
Epoch: 0197 train_loss= 0.53525 time= 0.00277
Epoch: 0198 train_loss= 0.53502 time= 0.00364
Epoch: 0199 train_loss= 0.53910 time= 0.00201
Epoch: 0200 train_loss= 0.53391 time= 0.00300
Optimization Finished!
Test ROC score: 0.51440329218107
Test AP score: 0.6304209140618424
AP MEAN :  0.5568642481615018
AP STD :  0.06321265608815232
AUC MEAN :  0.39423868312757204
AUC STD :  0.08450243166714433
