In [1]:
import networkx as nx 
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors

from segk import segk
from utils import read_edgelist

In [2]:
nodes, edgelist = read_edgelist('datasets/aan/release/2012/networks/author-collaboration-network.txt', delimiter=' ==> ')

E_segk_sp = segk(nodes, edgelist, radius=2, dim=20, kernel='shortest_path')
E_segk_wl = segk(nodes, edgelist, radius=2, dim=20, kernel='weisfeiler_lehman')

acl_fellows = {"Wu, Dekai", "Moore, Robert C.", "Mercer, Robert E.", "McKeown, Kathleen R.", "Matsumoto, Yuji", "Marcus, Mitchell P.", "Manning, Christopher D.", "Karttunen, Lauri", "Kaplan, Ronald M.", "Joshi, Aravind K.", "Johnson, Mark", "Hovy, Eduard", "Hirschberg, Julia", "Hajicov&aacute;, Eva", "Collins, Michael John", "Charniak, Eugene", "Calzolari, Nicoletta", "Webber, Bonnie Lynn", "Steedman, Mark", "Sproat, Richard W.", "Roth, Dan", "Ng, Hwee Tou", "Yarowsky, David", "Dagan, Ido", "Sidner, Candace L.", "Lin, Dekang", "Tsujii, Jun'ichi", "Palmer, Martha", "Mooney, Raymond J.", "Marcu, Daniel", "Knight, Kevin", "Daelemans, Walter", "Wiebe, Janyce", "Roukos, Salim", "Church, Kenneth Ward", "Cardie, Claire", "Weischedel, Ralph M.", "Wang, Haifeng", "Walker, Marilyn A.", "Dorr, Bonnie Jean", "Shieber, Stuart M.", "Pereira, Fernando", "Litman, Diane J.", "Lee, Lillian", "Grishman, Ralph", "Barzilay, Regina"}

In [3]:
name2idx = dict()
idx2name = dict()
acl_fellows_matched = set()
for i,node in enumerate(nodes):
    name2idx[node] = i
    idx2name[i] = node
    if node in acl_fellows:
        acl_fellows_matched.add(node)

algorithms = ["SEGK-SP", "SEGK-WL"]
embeddings = [E_segk_sp, E_segk_wl]

lim = 100

for j, embedding_matrix in enumerate(embeddings):
    nbrs = NearestNeighbors(n_neighbors=lim).fit(embedding_matrix)
    distances, indices = nbrs.kneighbors(embedding_matrix)

    retrieved = list()
    precision = list()
    for acl_fellow in acl_fellows_matched:
        matches = 0
        idx = indices[name2idx[acl_fellow],:]
        for i in range(idx.size):
            if idx2name[idx[i]] != acl_fellow and idx2name[idx[i]] in acl_fellows:
                matches += 1
        
        retrieved.append(matches)
        precision.append(matches/lim)
    
    print("\n"+algorithms[j])
    print("Total acl fellows retrieved:", np.sum(retrieved))
    print("Average acl fellows retrieved:", np.mean(retrieved))
    print("Average precision @ "+str(lim)+": "+str(np.mean(precision)))


SEGK-SP
Total acl fellows retrieved: 623
Average acl fellows retrieved: 13.543478260869565
Average precision @ 100: 0.13543478260869565

SEGK-WL
Total acl fellows retrieved: 394
Average acl fellows retrieved: 8.565217391304348
Average precision @ 100: 0.08565217391304347
