In [1]:
from __future__ import division
from __future__ import print_function

import argparse
import time

import numpy as np
import scipy.sparse as sp
import torch
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx

from utils import *

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--dataset', type=str,
                    default='karate', help='type of dataset.')
parser.add_argument('--hops', type=int, default=20, help='number of hops')

args,_ = parser.parse_known_args()

In [9]:
def run(args):
    print("Using {} dataset".format(args.dataset))

    adj, features = load_data(args.dataset)
    
    n_nodes, feat_dim = features.shape

    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, test_edges, test_edges_false = mask_test_edges(adj, adj_name='karate')
    adj = adj_train

    for hop in range(args.hops, args.hops+1):
        input_features = 0.
        if args.dataset == 'pubmed':
            r_list = [0.3, 0.4, 0.5]
        else:
            r_list = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
        for r in r_list:
            adj_norm = normalize_adj(adj, r)

            features_list = []
            features_list.append(features)
            for _ in range(hop):
                features_list.append(torch.spmm(adj_norm, features_list[-1]))

            weight_list = []
            norm_fea = torch.norm(features, 2, 1).add(1e-10)
            for fea in features_list:
                norm_cur = torch.norm(fea, 2, 1).add(1e-10)

                temp = torch.div((features*fea).sum(1), norm_fea)
                temp = torch.div(temp, norm_cur)
                weight_list.append(temp.unsqueeze(-1))

            weight = F.softmax(torch.cat(weight_list, dim=1), dim=1)

            input_feas = []
            for i in range(n_nodes):
                fea = 0.
                for j in range(hop+1):
                    fea += (weight[i][j]*features_list[j][i]).unsqueeze(0)
                input_feas.append(fea)
            input_feas = torch.cat(input_feas, dim=0)
            if r == r_list[0]:
                input_features = input_feas
            else:
                temp = []
                temp.append(input_features.unsqueeze(0))
                temp.append(input_feas.unsqueeze(0))
                input_features = torch.cat(temp, dim=0).max(0)[0]

        sim = torch.sigmoid(torch.mm(input_features, input_features.T))

        roc_score, ap_score = get_roc_score(sim.numpy(), adj_orig, test_edges, test_edges_false)
        print(f'AUC: {roc_score:.4f}, AP: {ap_score:.4f}')
        
        return sim.numpy(), adj_orig

In [10]:
if __name__ == '__main__':
    adj_rec, adj = run(args)

Using karate dataset
AUC: 1.0000, AP: 1.0000


In [13]:
import copy
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# def sigmoid(x):
#     return 1 / (1 + np.exp(-x))

# adj_rec = np.dot(embedding, embedding.T)
# adj_rec = sigmoid(adj_rec)
# adj_rec = adj_rec/adj_rec.max()

for i in range(adj_rec.shape[0]):
    adj_rec[i, i] = 0
s = adj_rec.reshape(adj_rec.shape[0]*adj_rec.shape[0])

adj_true = adj.toarray()
true_edges = adj_true.reshape(34*34)

for gama in [0.51, 0.511, 0.512, 0.513, 0.514, 0.515, 0.516, 0.517, 0.518, 0.519, 0.52]:
    print("gama : ",gama)
    predict_edges = copy.deepcopy(s)
    predict_edges[predict_edges > gama] = 1
    predict_edges[predict_edges <= gama] = 0
    print("edges： ",predict_edges.sum())
    print("ACC： ",accuracy_score(true_edges, predict_edges))
    print("AP： ",precision_score(true_edges, predict_edges, average='macro'))
    print("RECALL： ",recall_score(true_edges, predict_edges, average='macro'))
    print("F1 SCORE： ",f1_score(true_edges, predict_edges, average='macro'))

gama :  0.51
edges：  744.0
ACC：  0.4913494809688581
AP：  0.6048387096774194
RECALL：  0.706
F1 SCORE：  0.4651180358829084
gama :  0.511
edges：  574.0
ACC：  0.6384083044982699
AP：  0.6358885017421603
RECALL：  0.7909999999999999
F1 SCORE：  0.5815873785567082
gama :  0.512
edges：  356.0
ACC：  0.8269896193771626
AP：  0.7191011235955056
RECALL：  0.9
F1 SCORE：  0.7491319444444444
gama :  0.513
edges：  224.0
ACC：  0.9307958477508651
AP：  0.8316025444512569
RECALL：  0.9437692307692308
F1 SCORE：  0.8740329083578511
gama :  0.514
edges：  170.0
ACC：  0.9394463667820069
AP：  0.8622718052738336
RECALL：  0.8892564102564102
F1 SCORE：  0.8750146732650022
gama :  0.515
edges：  126.0
ACC：  0.9325259515570934
AP：  0.8785483125288951
RECALL：  0.8149230769230769
F1 SCORE：  0.8424903049994759
gama :  0.516
edges：  104.0
ACC：  0.9307958477508651
AP：  0.9013234863995321
RECALL：  0.7814615384615384
F1 SCORE：  0.8266606687659319
gama :  0.517
edges：  74.0
ACC：  0.9152249134948097
AP：  0.9043562971474246
RECALL： 

In [14]:
gama = 0.514
predict_edges = copy.deepcopy(s)
predict_edges[predict_edges > gama] = 1
predict_edges[predict_edges <= gama] = 0
print("edges： ",predict_edges.sum())
print("ACC： ",accuracy_score(true_edges, predict_edges))
print("AP： ",precision_score(true_edges, predict_edges, average='macro'))
print("RECALL： ",recall_score(true_edges, predict_edges, average='macro'))
print("F1 SCORE： ",f1_score(true_edges, predict_edges, average='macro'))

edges：  170.0
ACC：  0.9394463667820069
AP：  0.8622718052738336
RECALL：  0.8892564102564102
F1 SCORE：  0.8750146732650022


In [15]:
predict_graph = predict_edges.reshape(34, 34)
pd.DataFrame(predict_graph).to_csv('NAFS_recon_karate.txt', header=None, index=False, sep=' ')