In [1]:
import sys
sys.path.insert(0, '../src/')

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import scipy.sparse as sp
from scipy.sparse import load_npz

import torch

import utils

In [2]:
_A_obs = load_npz('../data/datasets/CORA_ML.npz')

In [3]:
val_share = 0.1
test_share = 0.05
seed = 481516234

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

train_graph = sp.csr_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1])))
assert (train_graph.toarray() == train_graph.toarray().T).all()

In [4]:
train_graph

<2810x2810 sparse matrix of type '<class 'numpy.float64'>'
	with 13566 stored elements in Compressed Sparse Row format>

In [5]:
def preferential_attachment(train_graph):
    degrees = train_graph.sum(axis=-1)
    scores_matrix = degrees @ degrees.T
    scores_matrix /= scores_matrix.sum()
    return np.array(scores_matrix)

In [6]:
scores_matrix = preferential_attachment(train_graph)

In [8]:
def common_neigbors(train_graph):
    scores_matrix = train_graph @ train_graph.T
    scores_matrix /= scores_matrix.sum()
    return scores_matrix.toarray()

In [9]:
scores_matrix = common_neigbors(train_graph)

In [11]:
def jaccard_index(train_graph):
    degrees = train_graph.sum(axis=-1)
    scores_matrix = train_graph @ train_graph
    scores_matrix /= (degrees + degrees.T - scores_matrix)
    return np.array(scores_matrix)

In [12]:
scores_matrix = jaccard_index(train_graph)

In [13]:
utils.link_prediction_performance(scores_matrix, test_ones, test_zeros)

(0.82569375, 0.7981150986784133)