In [1]:
from __future__ import division
from __future__ import print_function

import argparse
import time

import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch
from torch import optim
import networkx as nx

from gae.model import GCNModelVAE
from gae.optimizer import loss_function
from gae.utils import mask_test_edges, preprocess_graph, get_roc_score

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, default='gcn_vae', help="models used")
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200, help='Number of epochs to train.')
parser.add_argument('--hidden1', type=int, default=32, help='Number of units in hidden layer 1.')
parser.add_argument('--hidden2', type=int, default=16, help='Number of units in hidden layer 2.')
parser.add_argument('--lr', type=float, default=0.01, help='Initial learning rate.')
parser.add_argument('--dropout', type=float, default=0., help='Dropout rate (1 - keep probability).')
parser.add_argument('--dataset-str', type=str, default='wisconsin', help='type of dataset.')

args,_ = parser.parse_known_args()

In [3]:
def load_data(adj_name):
    if adj_name == 'wisconsin':
        nodes_numbers = 251
        edges = pd.read_csv('datasets/wisconsin.csv', header=None).iloc[1:,]
        raw_edges = edges.astype('int')
        
    elif adj_name == 'Citeseer':
        nodes_numbers = 3327
        datasets = Planetoid('./datasets', adj_name)
        edges = datasets[0].edge_index
        raw_edges = pd.DataFrame([[edges[0,i].item(), edges[1,i].item()] for i in range(edges.shape[1])])
    else:
        print("Dataset is not exist!")
    
    drop_self_loop = raw_edges[raw_edges[0]!=raw_edges[1]]
    
    graph_np = np.zeros((nodes_numbers, nodes_numbers))
    
    for i in range(drop_self_loop.shape[0]):
        graph_np[drop_self_loop.iloc[i,0], drop_self_loop.iloc[i,1]]=1
        graph_np[drop_self_loop.iloc[i,1], drop_self_loop.iloc[i,0]]=1
    
    adj = nx.adjacency_matrix(nx.from_numpy_matrix(graph_np))
    
    features = torch.eye(nodes_numbers)
    
    return adj, features

In [4]:
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features = load_data(args.dataset_str)
    
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = torch.tensor(float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum())
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    hidden_emb = None
    for epoch in range(args.epochs):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        recovered, mu, logvar = model(features, adj_norm)
        
        loss = loss_function(preds=recovered, labels=adj_label,
                             mu=mu, logvar=logvar, n_nodes=n_nodes,
                             norm=norm, pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()
        #roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(cur_loss),
              "time=", "{:.5f}".format(time.time() - t)
              )

    print("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
    print('Test ROC score: ' + str(roc_score))
    print('Test AP score: ' + str(ap_score))
    
    return ap_score, roc_score

In [5]:
if __name__ == '__main__':
    all_ap = []
    all_auc = []
    for i in range(10):
        ap, auc = gae_for(args)
        all_ap.append(ap)
        all_auc.append(auc)
    
    print('AP MEAN : ', np.array(all_ap).mean())
    print('AP STD : ', np.array(all_ap).std())

    print('AUC MEAN : ', np.array(all_auc).mean())
    print('AUC STD : ', np.array(all_auc).std())

Using wisconsin dataset
Epoch: 0001 train_loss= 1.71923 time= 0.03508
Epoch: 0002 train_loss= 1.80727 time= 0.00100
Epoch: 0003 train_loss= 1.76777 time= 0.00305
Epoch: 0004 train_loss= 1.55391 time= 0.00391
Epoch: 0005 train_loss= 1.64058 time= 0.00171
Epoch: 0006 train_loss= 1.54663 time= 0.00106
Epoch: 0007 train_loss= 1.57055 time= 0.00106
Epoch: 0008 train_loss= 1.47047 time= 0.00274
Epoch: 0009 train_loss= 1.49876 time= 0.00211
Epoch: 0010 train_loss= 1.34524 time= 0.00262
Epoch: 0011 train_loss= 1.38088 time= 0.00107
Epoch: 0012 train_loss= 1.32342 time= 0.00107
Epoch: 0013 train_loss= 1.19297 time= 0.00121
Epoch: 0014 train_loss= 1.14971 time= 0.00338
Epoch: 0015 train_loss= 1.17098 time= 0.00184
Epoch: 0016 train_loss= 1.06026 time= 0.00107
Epoch: 0017 train_loss= 1.01928 time= 0.00107
Epoch: 0018 train_loss= 0.94176 time= 0.00108
Epoch: 0019 train_loss= 0.91691 time= 0.00308
Epoch: 0020 train_loss= 0.91385 time= 0.00139
Epoch: 0021 train_loss= 0.84433 time= 0.00317
Epoch: 002

Epoch: 0011 train_loss= 1.27796 time= 0.00400
Epoch: 0012 train_loss= 1.25475 time= 0.00410
Epoch: 0013 train_loss= 1.33252 time= 0.00300
Epoch: 0014 train_loss= 1.10707 time= 0.00400
Epoch: 0015 train_loss= 1.09424 time= 0.00400
Epoch: 0016 train_loss= 0.99701 time= 0.00400
Epoch: 0017 train_loss= 1.00802 time= 0.00300
Epoch: 0018 train_loss= 0.96859 time= 0.00400
Epoch: 0019 train_loss= 0.94026 time= 0.00400
Epoch: 0020 train_loss= 0.88406 time= 0.00400
Epoch: 0021 train_loss= 0.88021 time= 0.00400
Epoch: 0022 train_loss= 0.82449 time= 0.00400
Epoch: 0023 train_loss= 0.80584 time= 0.00300
Epoch: 0024 train_loss= 0.78778 time= 0.00400
Epoch: 0025 train_loss= 0.76200 time= 0.00246
Epoch: 0026 train_loss= 0.75714 time= 0.00300
Epoch: 0027 train_loss= 0.73231 time= 0.00290
Epoch: 0028 train_loss= 0.73206 time= 0.00313
Epoch: 0029 train_loss= 0.73702 time= 0.00432
Epoch: 0030 train_loss= 0.72378 time= 0.00300
Epoch: 0031 train_loss= 0.72266 time= 0.00400
Epoch: 0032 train_loss= 0.72258 ti

Epoch: 0001 train_loss= 1.77494 time= 0.00343
Epoch: 0002 train_loss= 1.71793 time= 0.00422
Epoch: 0003 train_loss= 1.65417 time= 0.00381
Epoch: 0004 train_loss= 1.66158 time= 0.00300
Epoch: 0005 train_loss= 1.57081 time= 0.00300
Epoch: 0006 train_loss= 1.50774 time= 0.00405
Epoch: 0007 train_loss= 1.48067 time= 0.00302
Epoch: 0008 train_loss= 1.49829 time= 0.00306
Epoch: 0009 train_loss= 1.42822 time= 0.00390
Epoch: 0010 train_loss= 1.38502 time= 0.00402
Epoch: 0011 train_loss= 1.36068 time= 0.00423
Epoch: 0012 train_loss= 1.24986 time= 0.00377
Epoch: 0013 train_loss= 1.26194 time= 0.00294
Epoch: 0014 train_loss= 1.21746 time= 0.00383
Epoch: 0015 train_loss= 1.05819 time= 0.00293
Epoch: 0016 train_loss= 1.08818 time= 0.00300
Epoch: 0017 train_loss= 1.02015 time= 0.00292
Epoch: 0018 train_loss= 0.95709 time= 0.00500
Epoch: 0019 train_loss= 0.92669 time= 0.00412
Epoch: 0020 train_loss= 0.90485 time= 0.00387
Epoch: 0021 train_loss= 0.88964 time= 0.00400
Epoch: 0022 train_loss= 0.86135 ti

Epoch: 0194 train_loss= 0.51549 time= 0.00400
Epoch: 0195 train_loss= 0.51168 time= 0.00412
Epoch: 0196 train_loss= 0.51479 time= 0.00388
Epoch: 0197 train_loss= 0.51062 time= 0.00400
Epoch: 0198 train_loss= 0.51119 time= 0.00400
Epoch: 0199 train_loss= 0.51175 time= 0.00400
Epoch: 0200 train_loss= 0.51006 time= 0.00388
Optimization Finished!
Test ROC score: 0.7135802469135802
Test AP score: 0.7669401021934097
Using wisconsin dataset
Epoch: 0001 train_loss= 1.80092 time= 0.00347
Epoch: 0002 train_loss= 1.58862 time= 0.00256
Epoch: 0003 train_loss= 1.65252 time= 0.00300
Epoch: 0004 train_loss= 1.63913 time= 0.00280
Epoch: 0005 train_loss= 1.63734 time= 0.00399
Epoch: 0006 train_loss= 1.49347 time= 0.00400
Epoch: 0007 train_loss= 1.49249 time= 0.00260
Epoch: 0008 train_loss= 1.43516 time= 0.00200
Epoch: 0009 train_loss= 1.47979 time= 0.00304
Epoch: 0010 train_loss= 1.37063 time= 0.00300
Epoch: 0011 train_loss= 1.31474 time= 0.00300
Epoch: 0012 train_loss= 1.18378 time= 0.00300
Epoch: 001

Epoch: 0176 train_loss= 0.51040 time= 0.00471
Epoch: 0177 train_loss= 0.50816 time= 0.00267
Epoch: 0178 train_loss= 0.50612 time= 0.00500
Epoch: 0179 train_loss= 0.50761 time= 0.00400
Epoch: 0180 train_loss= 0.50680 time= 0.00280
Epoch: 0181 train_loss= 0.50847 time= 0.00399
Epoch: 0182 train_loss= 0.50833 time= 0.00500
Epoch: 0183 train_loss= 0.50723 time= 0.00371
Epoch: 0184 train_loss= 0.50422 time= 0.00300
Epoch: 0185 train_loss= 0.50520 time= 0.00295
Epoch: 0186 train_loss= 0.50795 time= 0.00399
Epoch: 0187 train_loss= 0.50257 time= 0.00401
Epoch: 0188 train_loss= 0.50515 time= 0.00356
Epoch: 0189 train_loss= 0.50448 time= 0.00274
Epoch: 0190 train_loss= 0.50744 time= 0.00297
Epoch: 0191 train_loss= 0.50833 time= 0.00353
Epoch: 0192 train_loss= 0.50774 time= 0.00294
Epoch: 0193 train_loss= 0.50159 time= 0.00300
Epoch: 0194 train_loss= 0.50931 time= 0.00281
Epoch: 0195 train_loss= 0.50529 time= 0.00288
Epoch: 0196 train_loss= 0.50629 time= 0.00400
Epoch: 0197 train_loss= 0.50713 ti

Epoch: 0160 train_loss= 0.52183 time= 0.00465
Epoch: 0161 train_loss= 0.52538 time= 0.00357
Epoch: 0162 train_loss= 0.52260 time= 0.00493
Epoch: 0163 train_loss= 0.52114 time= 0.00305
Epoch: 0164 train_loss= 0.52304 time= 0.00447
Epoch: 0165 train_loss= 0.52207 time= 0.00546
Epoch: 0166 train_loss= 0.51888 time= 0.00359
Epoch: 0167 train_loss= 0.51894 time= 0.00440
Epoch: 0168 train_loss= 0.51959 time= 0.00500
Epoch: 0169 train_loss= 0.51785 time= 0.00406
Epoch: 0170 train_loss= 0.52059 time= 0.00353
Epoch: 0171 train_loss= 0.52404 time= 0.00398
Epoch: 0172 train_loss= 0.51930 time= 0.00495
Epoch: 0173 train_loss= 0.51728 time= 0.00431
Epoch: 0174 train_loss= 0.52000 time= 0.00302
Epoch: 0175 train_loss= 0.51883 time= 0.00109
Epoch: 0176 train_loss= 0.51836 time= 0.00107
Epoch: 0177 train_loss= 0.51930 time= 0.00239
Epoch: 0178 train_loss= 0.51419 time= 0.00197
Epoch: 0179 train_loss= 0.52012 time= 0.00107
Epoch: 0180 train_loss= 0.51849 time= 0.00107
Epoch: 0181 train_loss= 0.51844 ti

Epoch: 0143 train_loss= 0.52032 time= 0.00300
Epoch: 0144 train_loss= 0.52216 time= 0.00300
Epoch: 0145 train_loss= 0.51867 time= 0.00400
Epoch: 0146 train_loss= 0.51596 time= 0.00400
Epoch: 0147 train_loss= 0.52065 time= 0.00535
Epoch: 0148 train_loss= 0.51987 time= 0.00400
Epoch: 0149 train_loss= 0.51720 time= 0.00372
Epoch: 0150 train_loss= 0.51794 time= 0.00499
Epoch: 0151 train_loss= 0.52216 time= 0.00300
Epoch: 0152 train_loss= 0.51978 time= 0.00200
Epoch: 0153 train_loss= 0.51781 time= 0.00300
Epoch: 0154 train_loss= 0.52059 time= 0.00426
Epoch: 0155 train_loss= 0.51630 time= 0.00474
Epoch: 0156 train_loss= 0.51660 time= 0.00401
Epoch: 0157 train_loss= 0.52054 time= 0.00299
Epoch: 0158 train_loss= 0.51950 time= 0.00337
Epoch: 0159 train_loss= 0.51758 time= 0.00399
Epoch: 0160 train_loss= 0.51676 time= 0.00300
Epoch: 0161 train_loss= 0.51857 time= 0.00301
Epoch: 0162 train_loss= 0.51549 time= 0.00394
Epoch: 0163 train_loss= 0.51515 time= 0.00600
Epoch: 0164 train_loss= 0.51379 ti

Epoch: 0168 train_loss= 0.51110 time= 0.00400
Epoch: 0169 train_loss= 0.50792 time= 0.00332
Epoch: 0170 train_loss= 0.50796 time= 0.00400
Epoch: 0171 train_loss= 0.50888 time= 0.00400
Epoch: 0172 train_loss= 0.50852 time= 0.00300
Epoch: 0173 train_loss= 0.50519 time= 0.00428
Epoch: 0174 train_loss= 0.50917 time= 0.00267
Epoch: 0175 train_loss= 0.50743 time= 0.00327
Epoch: 0176 train_loss= 0.50831 time= 0.00372
Epoch: 0177 train_loss= 0.50799 time= 0.00400
Epoch: 0178 train_loss= 0.51045 time= 0.00300
Epoch: 0179 train_loss= 0.51064 time= 0.00260
Epoch: 0180 train_loss= 0.50451 time= 0.00295
Epoch: 0181 train_loss= 0.50529 time= 0.00200
Epoch: 0182 train_loss= 0.50848 time= 0.00200
Epoch: 0183 train_loss= 0.50448 time= 0.00300
Epoch: 0184 train_loss= 0.50571 time= 0.00400
Epoch: 0185 train_loss= 0.50738 time= 0.00300
Epoch: 0186 train_loss= 0.50466 time= 0.00297
Epoch: 0187 train_loss= 0.50572 time= 0.00400
Epoch: 0188 train_loss= 0.50707 time= 0.00400
Epoch: 0189 train_loss= 0.50401 ti

Epoch: 0155 train_loss= 0.51519 time= 0.00492
Epoch: 0156 train_loss= 0.51656 time= 0.00298
Epoch: 0157 train_loss= 0.51666 time= 0.00419
Epoch: 0158 train_loss= 0.51638 time= 0.00400
Epoch: 0159 train_loss= 0.51230 time= 0.00285
Epoch: 0160 train_loss= 0.51783 time= 0.00280
Epoch: 0161 train_loss= 0.51378 time= 0.00300
Epoch: 0162 train_loss= 0.51479 time= 0.00267
Epoch: 0163 train_loss= 0.51128 time= 0.00201
Epoch: 0164 train_loss= 0.51192 time= 0.00200
Epoch: 0165 train_loss= 0.51296 time= 0.00296
Epoch: 0166 train_loss= 0.51392 time= 0.00405
Epoch: 0167 train_loss= 0.51043 time= 0.00401
Epoch: 0168 train_loss= 0.51051 time= 0.00324
Epoch: 0169 train_loss= 0.50903 time= 0.00375
Epoch: 0170 train_loss= 0.51167 time= 0.00454
Epoch: 0171 train_loss= 0.51312 time= 0.00304
Epoch: 0172 train_loss= 0.51374 time= 0.00401
Epoch: 0173 train_loss= 0.51132 time= 0.00400
Epoch: 0174 train_loss= 0.51349 time= 0.00395
Epoch: 0175 train_loss= 0.50993 time= 0.00307
Epoch: 0176 train_loss= 0.50949 ti

Epoch: 0145 train_loss= 0.51604 time= 0.00284
Epoch: 0146 train_loss= 0.51613 time= 0.00381
Epoch: 0147 train_loss= 0.51514 time= 0.00300
Epoch: 0148 train_loss= 0.51964 time= 0.00300
Epoch: 0149 train_loss= 0.51665 time= 0.00290
Epoch: 0150 train_loss= 0.51815 time= 0.00299
Epoch: 0151 train_loss= 0.51967 time= 0.00400
Epoch: 0152 train_loss= 0.51541 time= 0.00344
Epoch: 0153 train_loss= 0.51181 time= 0.00399
Epoch: 0154 train_loss= 0.51430 time= 0.00300
Epoch: 0155 train_loss= 0.51504 time= 0.00455
Epoch: 0156 train_loss= 0.51362 time= 0.00251
Epoch: 0157 train_loss= 0.51145 time= 0.00300
Epoch: 0158 train_loss= 0.51397 time= 0.00200
Epoch: 0159 train_loss= 0.51247 time= 0.00300
Epoch: 0160 train_loss= 0.51562 time= 0.00287
Epoch: 0161 train_loss= 0.50990 time= 0.00274
Epoch: 0162 train_loss= 0.51336 time= 0.00363
Epoch: 0163 train_loss= 0.51271 time= 0.00289
Epoch: 0164 train_loss= 0.51323 time= 0.00300
Epoch: 0165 train_loss= 0.51190 time= 0.00327
Epoch: 0166 train_loss= 0.51252 ti

Epoch: 0134 train_loss= 0.52764 time= 0.00523
Epoch: 0135 train_loss= 0.52292 time= 0.00316
Epoch: 0136 train_loss= 0.52125 time= 0.00407
Epoch: 0137 train_loss= 0.52064 time= 0.00405
Epoch: 0138 train_loss= 0.52276 time= 0.00401
Epoch: 0139 train_loss= 0.52072 time= 0.00334
Epoch: 0140 train_loss= 0.52095 time= 0.00301
Epoch: 0141 train_loss= 0.52027 time= 0.00300
Epoch: 0142 train_loss= 0.51789 time= 0.00306
Epoch: 0143 train_loss= 0.52189 time= 0.00394
Epoch: 0144 train_loss= 0.51781 time= 0.00407
Epoch: 0145 train_loss= 0.52298 time= 0.00259
Epoch: 0146 train_loss= 0.51863 time= 0.00300
Epoch: 0147 train_loss= 0.51609 time= 0.00326
Epoch: 0148 train_loss= 0.51672 time= 0.00299
Epoch: 0149 train_loss= 0.51839 time= 0.00111
Epoch: 0150 train_loss= 0.51945 time= 0.00366
Epoch: 0151 train_loss= 0.51853 time= 0.00108
Epoch: 0152 train_loss= 0.51600 time= 0.00604
Epoch: 0153 train_loss= 0.51740 time= 0.00099
Epoch: 0154 train_loss= 0.51485 time= 0.00108
Epoch: 0155 train_loss= 0.51516 ti