In [1]:
from __future__ import division
from __future__ import print_function

import argparse
import time

import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch
from torch import optim
import networkx as nx

from gae.model import GCNModelVAE
from gae.optimizer import loss_function
from gae.utils import mask_test_edges, preprocess_graph, get_roc_score

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, default='gcn_vae', help="models used")
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200, help='Number of epochs to train.')
parser.add_argument('--hidden1', type=int, default=32, help='Number of units in hidden layer 1.')
parser.add_argument('--hidden2', type=int, default=16, help='Number of units in hidden layer 2.')
parser.add_argument('--lr', type=float, default=0.01, help='Initial learning rate.')
parser.add_argument('--dropout', type=float, default=0., help='Dropout rate (1 - keep probability).')
parser.add_argument('--dataset-str', type=str, default='email', help='type of dataset.')

args,_ = parser.parse_known_args()

In [3]:
def load_data(adj_name):
    if adj_name == 'email':
        nodes_numbers = 986
        edges = pd.read_csv('datasets/email.csv', header=None).iloc[1:,]
        raw_edges = edges.astype('int')
        
    elif adj_name == 'Citeseer':
        nodes_numbers = 3327
        datasets = Planetoid('./datasets', adj_name)
        edges = datasets[0].edge_index
        raw_edges = pd.DataFrame([[edges[0,i].item(), edges[1,i].item()] for i in range(edges.shape[1])])
    else:
        print("Dataset is not exist!")
    
    drop_self_loop = raw_edges[raw_edges[0]!=raw_edges[1]]
    
    graph_np = np.zeros((nodes_numbers, nodes_numbers))
    
    for i in range(drop_self_loop.shape[0]):
        graph_np[drop_self_loop.iloc[i,0], drop_self_loop.iloc[i,1]]=1
        graph_np[drop_self_loop.iloc[i,1], drop_self_loop.iloc[i,0]]=1
    
    adj = nx.adjacency_matrix(nx.from_numpy_matrix(graph_np))
    
    features = torch.eye(nodes_numbers)
    
    return adj, features

In [4]:
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features = load_data(args.dataset_str)
    
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = torch.tensor(float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum())
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    hidden_emb = None
    for epoch in range(args.epochs):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        recovered, mu, logvar = model(features, adj_norm)
        
        loss = loss_function(preds=recovered, labels=adj_label,
                             mu=mu, logvar=logvar, n_nodes=n_nodes,
                             norm=norm, pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()
        #roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(cur_loss),
              "time=", "{:.5f}".format(time.time() - t)
              )

    print("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
    print('Test ROC score: ' + str(roc_score))
    print('Test AP score: ' + str(ap_score))
    
    return ap_score, roc_score

In [5]:
if __name__ == '__main__':
    all_ap = []
    all_auc = []
    for i in range(10):
        ap, auc = gae_for(args)
        all_ap.append(ap)
        all_auc.append(auc)
    
    print('AP MEAN : ', np.array(all_ap).mean())
    print('AP STD : ', np.array(all_ap).std())

    print('AUC MEAN : ', np.array(all_auc).mean())
    print('AUC STD : ', np.array(all_auc).std())

Using email dataset
Epoch: 0001 train_loss= 1.77569 time= 0.05073
Epoch: 0002 train_loss= 1.70873 time= 0.03447
Epoch: 0003 train_loss= 1.67483 time= 0.02757
Epoch: 0004 train_loss= 1.67190 time= 0.02034
Epoch: 0005 train_loss= 1.63603 time= 0.03990
Epoch: 0006 train_loss= 1.54513 time= 0.02599
Epoch: 0007 train_loss= 1.48212 time= 0.02918
Epoch: 0008 train_loss= 1.43510 time= 0.02402
Epoch: 0009 train_loss= 1.38395 time= 0.03099
Epoch: 0010 train_loss= 1.32574 time= 0.02700
Epoch: 0011 train_loss= 1.26280 time= 0.02598
Epoch: 0012 train_loss= 1.19015 time= 0.03000
Epoch: 0013 train_loss= 1.16169 time= 0.03102
Epoch: 0014 train_loss= 1.07121 time= 0.02798
Epoch: 0015 train_loss= 1.02884 time= 0.03001
Epoch: 0016 train_loss= 0.99220 time= 0.02926
Epoch: 0017 train_loss= 0.94895 time= 0.03000
Epoch: 0018 train_loss= 0.90730 time= 0.02706
Epoch: 0019 train_loss= 0.87004 time= 0.03162
Epoch: 0020 train_loss= 0.84865 time= 0.02605
Epoch: 0021 train_loss= 0.81930 time= 0.02697
Epoch: 0022 tr

Epoch: 0181 train_loss= 0.54510 time= 0.03100
Epoch: 0182 train_loss= 0.54510 time= 0.02900
Epoch: 0183 train_loss= 0.54491 time= 0.03053
Epoch: 0184 train_loss= 0.54242 time= 0.02947
Epoch: 0185 train_loss= 0.54348 time= 0.03052
Epoch: 0186 train_loss= 0.54331 time= 0.03100
Epoch: 0187 train_loss= 0.54155 time= 0.03066
Epoch: 0188 train_loss= 0.54189 time= 0.03000
Epoch: 0189 train_loss= 0.54110 time= 0.03219
Epoch: 0190 train_loss= 0.54149 time= 0.03148
Epoch: 0191 train_loss= 0.54005 time= 0.02999
Epoch: 0192 train_loss= 0.54054 time= 0.03400
Epoch: 0193 train_loss= 0.54059 time= 0.03101
Epoch: 0194 train_loss= 0.54025 time= 0.03000
Epoch: 0195 train_loss= 0.54045 time= 0.02341
Epoch: 0196 train_loss= 0.54010 time= 0.01697
Epoch: 0197 train_loss= 0.53878 time= 0.03312
Epoch: 0198 train_loss= 0.53826 time= 0.01717
Epoch: 0199 train_loss= 0.53899 time= 0.03339
Epoch: 0200 train_loss= 0.53832 time= 0.03176
Optimization Finished!
Test ROC score: 0.8994934934220832
Test AP score: 0.90871

Epoch: 0161 train_loss= 0.54286 time= 0.02696
Epoch: 0162 train_loss= 0.54126 time= 0.02760
Epoch: 0163 train_loss= 0.54028 time= 0.03004
Epoch: 0164 train_loss= 0.54026 time= 0.02826
Epoch: 0165 train_loss= 0.54088 time= 0.03076
Epoch: 0166 train_loss= 0.53972 time= 0.03003
Epoch: 0167 train_loss= 0.53992 time= 0.03092
Epoch: 0168 train_loss= 0.53951 time= 0.03007
Epoch: 0169 train_loss= 0.53872 time= 0.02998
Epoch: 0170 train_loss= 0.53799 time= 0.02700
Epoch: 0171 train_loss= 0.53733 time= 0.02903
Epoch: 0172 train_loss= 0.53950 time= 0.02999
Epoch: 0173 train_loss= 0.53724 time= 0.03355
Epoch: 0174 train_loss= 0.53765 time= 0.03500
Epoch: 0175 train_loss= 0.53508 time= 0.03100
Epoch: 0176 train_loss= 0.53514 time= 0.03000
Epoch: 0177 train_loss= 0.53539 time= 0.03069
Epoch: 0178 train_loss= 0.53457 time= 0.03002
Epoch: 0179 train_loss= 0.53318 time= 0.03093
Epoch: 0180 train_loss= 0.53375 time= 0.02800
Epoch: 0181 train_loss= 0.53360 time= 0.03205
Epoch: 0182 train_loss= 0.53171 ti

Epoch: 0140 train_loss= 0.54833 time= 0.03099
Epoch: 0141 train_loss= 0.54717 time= 0.02902
Epoch: 0142 train_loss= 0.54722 time= 0.03100
Epoch: 0143 train_loss= 0.54658 time= 0.03012
Epoch: 0144 train_loss= 0.54458 time= 0.03307
Epoch: 0145 train_loss= 0.54547 time= 0.03100
Epoch: 0146 train_loss= 0.54476 time= 0.02600
Epoch: 0147 train_loss= 0.54380 time= 0.03096
Epoch: 0148 train_loss= 0.54295 time= 0.03299
Epoch: 0149 train_loss= 0.54326 time= 0.03100
Epoch: 0150 train_loss= 0.54415 time= 0.02857
Epoch: 0151 train_loss= 0.54421 time= 0.03037
Epoch: 0152 train_loss= 0.54377 time= 0.03000
Epoch: 0153 train_loss= 0.54290 time= 0.02825
Epoch: 0154 train_loss= 0.54176 time= 0.03035
Epoch: 0155 train_loss= 0.54173 time= 0.02900
Epoch: 0156 train_loss= 0.54172 time= 0.02901
Epoch: 0157 train_loss= 0.54116 time= 0.03000
Epoch: 0158 train_loss= 0.54232 time= 0.02901
Epoch: 0159 train_loss= 0.54141 time= 0.03094
Epoch: 0160 train_loss= 0.54104 time= 0.03406
Epoch: 0161 train_loss= 0.54058 ti

Epoch: 0116 train_loss= 0.56675 time= 0.03009
Epoch: 0117 train_loss= 0.56612 time= 0.02969
Epoch: 0118 train_loss= 0.56562 time= 0.02974
Epoch: 0119 train_loss= 0.56353 time= 0.02899
Epoch: 0120 train_loss= 0.56375 time= 0.03094
Epoch: 0121 train_loss= 0.56346 time= 0.02806
Epoch: 0122 train_loss= 0.56335 time= 0.03000
Epoch: 0123 train_loss= 0.56327 time= 0.03002
Epoch: 0124 train_loss= 0.56359 time= 0.03199
Epoch: 0125 train_loss= 0.56209 time= 0.02900
Epoch: 0126 train_loss= 0.56315 time= 0.03095
Epoch: 0127 train_loss= 0.56146 time= 0.02905
Epoch: 0128 train_loss= 0.56243 time= 0.03400
Epoch: 0129 train_loss= 0.56208 time= 0.03101
Epoch: 0130 train_loss= 0.56071 time= 0.02902
Epoch: 0131 train_loss= 0.56163 time= 0.02998
Epoch: 0132 train_loss= 0.56092 time= 0.02592
Epoch: 0133 train_loss= 0.56032 time= 0.02683
Epoch: 0134 train_loss= 0.56025 time= 0.03000
Epoch: 0135 train_loss= 0.56053 time= 0.03000
Epoch: 0136 train_loss= 0.55865 time= 0.02800
Epoch: 0137 train_loss= 0.55900 ti

Epoch: 0096 train_loss= 0.57329 time= 0.03098
Epoch: 0097 train_loss= 0.57065 time= 0.03101
Epoch: 0098 train_loss= 0.57043 time= 0.02999
Epoch: 0099 train_loss= 0.56942 time= 0.02685
Epoch: 0100 train_loss= 0.56890 time= 0.03181
Epoch: 0101 train_loss= 0.56835 time= 0.01799
Epoch: 0102 train_loss= 0.56848 time= 0.03213
Epoch: 0103 train_loss= 0.56848 time= 0.03441
Epoch: 0104 train_loss= 0.56789 time= 0.02900
Epoch: 0105 train_loss= 0.56521 time= 0.01966
Epoch: 0106 train_loss= 0.56358 time= 0.01826
Epoch: 0107 train_loss= 0.56446 time= 0.03379
Epoch: 0108 train_loss= 0.56158 time= 0.03370
Epoch: 0109 train_loss= 0.55924 time= 0.02819
Epoch: 0110 train_loss= 0.55795 time= 0.02998
Epoch: 0111 train_loss= 0.55944 time= 0.03100
Epoch: 0112 train_loss= 0.55517 time= 0.03000
Epoch: 0113 train_loss= 0.55603 time= 0.03038
Epoch: 0114 train_loss= 0.55403 time= 0.02560
Epoch: 0115 train_loss= 0.55369 time= 0.02904
Epoch: 0116 train_loss= 0.55226 time= 0.03097
Epoch: 0117 train_loss= 0.55086 ti

Epoch: 0073 train_loss= 0.62310 time= 0.03101
Epoch: 0074 train_loss= 0.62262 time= 0.03200
Epoch: 0075 train_loss= 0.61872 time= 0.02678
Epoch: 0076 train_loss= 0.61821 time= 0.02699
Epoch: 0077 train_loss= 0.61719 time= 0.03000
Epoch: 0078 train_loss= 0.61772 time= 0.03000
Epoch: 0079 train_loss= 0.61702 time= 0.03102
Epoch: 0080 train_loss= 0.61320 time= 0.03000
Epoch: 0081 train_loss= 0.61425 time= 0.03002
Epoch: 0082 train_loss= 0.61320 time= 0.02998
Epoch: 0083 train_loss= 0.61552 time= 0.02996
Epoch: 0084 train_loss= 0.61084 time= 0.02806
Epoch: 0085 train_loss= 0.61114 time= 0.02800
Epoch: 0086 train_loss= 0.60758 time= 0.02893
Epoch: 0087 train_loss= 0.60658 time= 0.02900
Epoch: 0088 train_loss= 0.60466 time= 0.03100
Epoch: 0089 train_loss= 0.60265 time= 0.03000
Epoch: 0090 train_loss= 0.60275 time= 0.02560
Epoch: 0091 train_loss= 0.60022 time= 0.03015
Epoch: 0092 train_loss= 0.59492 time= 0.02700
Epoch: 0093 train_loss= 0.59385 time= 0.03238
Epoch: 0094 train_loss= 0.59144 ti

Epoch: 0052 train_loss= 0.64688 time= 0.03100
Epoch: 0053 train_loss= 0.64116 time= 0.02900
Epoch: 0054 train_loss= 0.63898 time= 0.03087
Epoch: 0055 train_loss= 0.64185 time= 0.02846
Epoch: 0056 train_loss= 0.64350 time= 0.03000
Epoch: 0057 train_loss= 0.63934 time= 0.02949
Epoch: 0058 train_loss= 0.63953 time= 0.03115
Epoch: 0059 train_loss= 0.63654 time= 0.02885
Epoch: 0060 train_loss= 0.63543 time= 0.03200
Epoch: 0061 train_loss= 0.63151 time= 0.03065
Epoch: 0062 train_loss= 0.63087 time= 0.03000
Epoch: 0063 train_loss= 0.63388 time= 0.02800
Epoch: 0064 train_loss= 0.63035 time= 0.03000
Epoch: 0065 train_loss= 0.62989 time= 0.02900
Epoch: 0066 train_loss= 0.62845 time= 0.02800
Epoch: 0067 train_loss= 0.62877 time= 0.03000
Epoch: 0068 train_loss= 0.62648 time= 0.02901
Epoch: 0069 train_loss= 0.62375 time= 0.03300
Epoch: 0070 train_loss= 0.62461 time= 0.03132
Epoch: 0071 train_loss= 0.62465 time= 0.03368
Epoch: 0072 train_loss= 0.62495 time= 0.03305
Epoch: 0073 train_loss= 0.62273 ti

Epoch: 0028 train_loss= 0.72552 time= 0.02701
Epoch: 0029 train_loss= 0.71490 time= 0.02982
Epoch: 0030 train_loss= 0.70865 time= 0.02713
Epoch: 0031 train_loss= 0.70633 time= 0.03200
Epoch: 0032 train_loss= 0.69599 time= 0.03016
Epoch: 0033 train_loss= 0.68921 time= 0.02989
Epoch: 0034 train_loss= 0.68670 time= 0.02991
Epoch: 0035 train_loss= 0.68207 time= 0.03139
Epoch: 0036 train_loss= 0.68214 time= 0.03100
Epoch: 0037 train_loss= 0.67697 time= 0.02900
Epoch: 0038 train_loss= 0.67342 time= 0.03700
Epoch: 0039 train_loss= 0.67063 time= 0.02900
Epoch: 0040 train_loss= 0.66531 time= 0.03133
Epoch: 0041 train_loss= 0.67037 time= 0.03179
Epoch: 0042 train_loss= 0.66501 time= 0.03000
Epoch: 0043 train_loss= 0.66574 time= 0.02900
Epoch: 0044 train_loss= 0.65862 time= 0.03000
Epoch: 0045 train_loss= 0.66049 time= 0.02700
Epoch: 0046 train_loss= 0.66013 time= 0.02700
Epoch: 0047 train_loss= 0.65595 time= 0.02473
Epoch: 0048 train_loss= 0.65228 time= 0.02528
Epoch: 0049 train_loss= 0.64873 ti

Epoch: 0008 train_loss= 1.48573 time= 0.03372
Epoch: 0009 train_loss= 1.40667 time= 0.02411
Epoch: 0010 train_loss= 1.39552 time= 0.03303
Epoch: 0011 train_loss= 1.33469 time= 0.02924
Epoch: 0012 train_loss= 1.24477 time= 0.01756
Epoch: 0013 train_loss= 1.18284 time= 0.01964
Epoch: 0014 train_loss= 1.14498 time= 0.02183
Epoch: 0015 train_loss= 1.07965 time= 0.03239
Epoch: 0016 train_loss= 1.03540 time= 0.02962
Epoch: 0017 train_loss= 1.00378 time= 0.02217
Epoch: 0018 train_loss= 0.96235 time= 0.03016
Epoch: 0019 train_loss= 0.92968 time= 0.02058
Epoch: 0020 train_loss= 0.90113 time= 0.01986
Epoch: 0021 train_loss= 0.83857 time= 0.02014
Epoch: 0022 train_loss= 0.82263 time= 0.02962
Epoch: 0023 train_loss= 0.79857 time= 0.03018
Epoch: 0024 train_loss= 0.77300 time= 0.01855
Epoch: 0025 train_loss= 0.74587 time= 0.03297
Epoch: 0026 train_loss= 0.74002 time= 0.03497
Epoch: 0027 train_loss= 0.73169 time= 0.02603
Epoch: 0028 train_loss= 0.72691 time= 0.02704
Epoch: 0029 train_loss= 0.70868 ti

Epoch: 0189 train_loss= 0.52717 time= 0.02549
Epoch: 0190 train_loss= 0.52719 time= 0.02576
Epoch: 0191 train_loss= 0.52522 time= 0.01962
Epoch: 0192 train_loss= 0.52597 time= 0.03406
Epoch: 0193 train_loss= 0.52573 time= 0.02822
Epoch: 0194 train_loss= 0.52494 time= 0.01849
Epoch: 0195 train_loss= 0.52405 time= 0.02324
Epoch: 0196 train_loss= 0.52449 time= 0.01868
Epoch: 0197 train_loss= 0.52445 time= 0.02979
Epoch: 0198 train_loss= 0.52342 time= 0.03058
Epoch: 0199 train_loss= 0.52357 time= 0.03141
Epoch: 0200 train_loss= 0.52259 time= 0.03090
Optimization Finished!
Test ROC score: 0.9006097154351133
Test AP score: 0.9021544029534668
Using email dataset
Epoch: 0001 train_loss= 1.71458 time= 0.03065
Epoch: 0002 train_loss= 1.67078 time= 0.02937
Epoch: 0003 train_loss= 1.67956 time= 0.02748
Epoch: 0004 train_loss= 1.65107 time= 0.02792
Epoch: 0005 train_loss= 1.57573 time= 0.02797
Epoch: 0006 train_loss= 1.53547 time= 0.03247
Epoch: 0007 train_loss= 1.52713 time= 0.03888
Epoch: 0008 tr

Epoch: 0171 train_loss= 0.54622 time= 0.03085
Epoch: 0172 train_loss= 0.54602 time= 0.02699
Epoch: 0173 train_loss= 0.54496 time= 0.02500
Epoch: 0174 train_loss= 0.54401 time= 0.02695
Epoch: 0175 train_loss= 0.54203 time= 0.02537
Epoch: 0176 train_loss= 0.54314 time= 0.02708
Epoch: 0177 train_loss= 0.54301 time= 0.03069
Epoch: 0178 train_loss= 0.54356 time= 0.03400
Epoch: 0179 train_loss= 0.54308 time= 0.03150
Epoch: 0180 train_loss= 0.54182 time= 0.02900
Epoch: 0181 train_loss= 0.54252 time= 0.02945
Epoch: 0182 train_loss= 0.54198 time= 0.02877
Epoch: 0183 train_loss= 0.54216 time= 0.02900
Epoch: 0184 train_loss= 0.54124 time= 0.02500
Epoch: 0185 train_loss= 0.54100 time= 0.02451
Epoch: 0186 train_loss= 0.54099 time= 0.03100
Epoch: 0187 train_loss= 0.54004 time= 0.02900
Epoch: 0188 train_loss= 0.54141 time= 0.03000
Epoch: 0189 train_loss= 0.54027 time= 0.02901
Epoch: 0190 train_loss= 0.54069 time= 0.03000
Epoch: 0191 train_loss= 0.54038 time= 0.03000
Epoch: 0192 train_loss= 0.54156 ti