In [1]:
from __future__ import division
from __future__ import print_function

import argparse
import time

import numpy as np
import scipy.sparse as sp
import torch
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx

from utils import *

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--dataset', type=str,
                    default='email', help='type of dataset.')
parser.add_argument('--hops', type=int, default=20, help='number of hops')

args,_ = parser.parse_known_args()

In [3]:
def run(args):
    print("Using {} dataset".format(args.dataset))

    adj, features = load_data(args.dataset)
    
    n_nodes, feat_dim = features.shape

    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train

    for hop in range(args.hops, args.hops+1):
        input_features = 0.
        if args.dataset == 'pubmed':
            r_list = [0.3, 0.4, 0.5]
        else:
            r_list = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
        for r in r_list:
            adj_norm = normalize_adj(adj, r)

            features_list = []
            features_list.append(features)
            for _ in range(hop):
                features_list.append(torch.spmm(adj_norm, features_list[-1]))

            weight_list = []
            norm_fea = torch.norm(features, 2, 1).add(1e-10)
            for fea in features_list:
                norm_cur = torch.norm(fea, 2, 1).add(1e-10)

                temp = torch.div((features*fea).sum(1), norm_fea)
                temp = torch.div(temp, norm_cur)
                weight_list.append(temp.unsqueeze(-1))

            weight = F.softmax(torch.cat(weight_list, dim=1), dim=1)

            input_feas = []
            for i in range(n_nodes):
                fea = 0.
                for j in range(hop+1):
                    fea += (weight[i][j]*features_list[j][i]).unsqueeze(0)
                input_feas.append(fea)
            input_feas = torch.cat(input_feas, dim=0)
            if r == r_list[0]:
                input_features = input_feas
            else:
                temp = []
                temp.append(input_features.unsqueeze(0))
                temp.append(input_feas.unsqueeze(0))
                input_features = torch.cat(temp, dim=0).max(0)[0]

        sim = torch.sigmoid(torch.mm(input_features, input_features.T))

        roc_score, ap_score = get_roc_score(sim.numpy(), adj_orig, test_edges, test_edges_false)
        print(f'AUC: {roc_score:.4f}, AP: {ap_score:.4f}')
        
        return roc_score, ap_score

In [4]:
if __name__ == '__main__':
    set_seed(args.seed)
    all_ap = []
    all_auc = []
    for i in range(10):
        auc, ap = run(args)
        all_ap.append(ap)
        all_auc.append(auc)
    print('############################# SUM UP ###########################')
    print("AP MEAN : ", np.array(all_ap).mean())
    print("AP STD : ", np.array(all_ap).std())
    print("AUC MEAN : ", np.array(all_auc).mean())
    print("AUC STD : ", np.array(all_auc).std())

Using email dataset
AUC: 0.9170, AP: 0.9001
Using email dataset
AUC: 0.9220, AP: 0.9078
Using email dataset
AUC: 0.9196, AP: 0.9017
Using email dataset
AUC: 0.9120, AP: 0.8970
Using email dataset
AUC: 0.9174, AP: 0.9015
Using email dataset
AUC: 0.9210, AP: 0.9108
Using email dataset
AUC: 0.9211, AP: 0.9088
Using email dataset
AUC: 0.9149, AP: 0.9013
Using email dataset
AUC: 0.9184, AP: 0.8944
Using email dataset
AUC: 0.9092, AP: 0.8941
############################# SUM UP ###########################
AP MEAN :  0.9017744755572012
AP STD :  0.005532580332027531
AUC MEAN :  0.917276957207483
AUC STD :  0.003949125267972038
