In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations

from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
cmap = plt.get_cmap('coolwarm')

In [2]:
!cat ../experiments/lp/Facebook-1*.unseen.log > ../results/UnseenFacebookLPGreedySearch.csv
!cat ../experiments/lp/CA-AstroPh-1*.unseen.log > ../results/UnseenCa-AstroPhLPGreedySearch.csv

In [3]:
def load_experiment_data(path):
    header = ['Task', 'Embeddings', 'Graph', 'Labels', 'NumExperiments']
    names = header + [str(x) for x in range(25)]
    df = pd.read_csv(path, sep=' ', header=None, names=names)
    df["ExperimentMean"] = df.iloc[:, -25:].mean(axis=1)
    df["ExperimentStdDev"] = df.iloc[:, -25:].std(axis=1)
    return df

In [4]:
# Facebook graph evaluation
fb_df = load_experiment_data('../results/UnseenFacebookLPGreedySearch.csv')
fb_df.sort_values(by="ExperimentMean", ascending=False)

Unnamed: 0,Task,Embeddings,Graph,Labels,NumExperiments,0,1,2,3,4,...,17,18,19,20,21,22,23,24,ExperimentMean,ExperimentStdDev
5,[LINK],emb/Facebook-1-K2-D32-E250-C6-M1.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.819559,0.815464,0.815735,0.821159,0.816538,...,0.820257,0.818869,0.815151,0.811959,0.818552,0.820715,0.820664,0.816376,0.818021,0.002621
1,[LINK],emb/Facebook-1-K2-D128-E250-C6-M2.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.807767,0.798788,0.801927,0.808009,0.808125,...,0.806624,0.804409,0.806612,0.802823,0.804366,0.801875,0.802208,0.805847,0.804611,0.00292
8,[LINK],emb/Facebook-1-K2-D32-E50-C6-M2.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.806187,0.805295,0.798919,0.806066,0.800924,...,0.80731,0.803426,0.802854,0.801372,0.805438,0.80365,0.801319,0.802505,0.804094,0.002687
10,[LINK],emb/Facebook-1-K2-D64-E250-C6-M2.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.803617,0.80391,0.79996,0.802137,0.800906,...,0.800377,0.7987,0.799133,0.806809,0.806333,0.799407,0.801325,0.801994,0.802304,0.002571
4,[LINK],emb/Facebook-1-K2-D32-E250-C4-M2.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.795582,0.794625,0.797457,0.797548,0.79833,...,0.795908,0.794132,0.80025,0.800239,0.801395,0.794783,0.79963,0.800035,0.798341,0.002793
3,[LINK],emb/Facebook-1-K2-D32-E250-C2-M2.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.795256,0.795127,0.794392,0.793793,0.792083,...,0.795236,0.797933,0.797027,0.793179,0.796097,0.799377,0.797207,0.792046,0.795435,0.002189
7,[LINK],emb/Facebook-1-K2-D32-E250-C8-M2.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.788741,0.789876,0.790667,0.785789,0.787577,...,0.790643,0.789246,0.789757,0.790149,0.788572,0.787905,0.786068,0.790317,0.788465,0.001852
2,[LINK],emb/Facebook-1-K2-D32-E250-C10-M2.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.789184,0.787734,0.784402,0.781759,0.789202,...,0.790754,0.782293,0.783399,0.790537,0.787415,0.781769,0.791863,0.787507,0.787087,0.003869
6,[LINK],emb/Facebook-1-K2-D32-E250-C6-M2.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.791377,0.788957,0.785795,0.786937,0.792009,...,0.785389,0.786274,0.784702,0.782515,0.779311,0.783338,0.7951,0.785947,0.787034,0.003423
9,[LINK],emb/Facebook-1-K2-D32-E500-C6-M2.emb,graph/sampled//Facebook-2-C.edgelist,labels/Facebook-2-K2.json,25,0.786094,0.782485,0.783943,0.779938,0.784796,...,0.787512,0.783386,0.78574,0.782359,0.785669,0.789136,0.787553,0.788333,0.785316,0.003378


In [5]:
# CA-AstroPh graph evaluation
ap_df = load_experiment_data('../results/UnseenCa-AstroPhLPGreedySearch.csv')
ap_df.sort_values(by="ExperimentMean", ascending=False)

Unnamed: 0,Task,Embeddings,Graph,Labels,NumExperiments,0,1,2,3,4,...,17,18,19,20,21,22,23,24,ExperimentMean,ExperimentStdDev
5,[LINK],emb/CA-AstroPh-1-K2-D32-E250-C6-M1.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K2.json,25,0.755054,0.751274,0.752571,0.754309,0.752951,...,0.758613,0.75652,0.757382,0.754392,0.756197,0.751839,0.75858,0.753584,0.754819,0.002198
1,[LINK],emb/CA-AstroPh-1-K2-D128-E250-C6-M2.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K2.json,25,0.745924,0.746843,0.748161,0.748917,0.748668,...,0.750056,0.749931,0.749422,0.749248,0.747028,0.750263,0.749397,0.750767,0.74892,0.002016
6,[LINK],emb/CA-AstroPh-1-K2-D32-E250-C6-M2.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K2.json,25,0.7446,0.742606,0.741719,0.742111,0.744585,...,0.74315,0.743695,0.743271,0.745494,0.746319,0.738906,0.740682,0.745441,0.74409,0.002292
7,[LINK],emb/CA-AstroPh-1-K2-D32-E250-C8-M2.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K2.json,25,0.73575,0.736875,0.739255,0.733964,0.736266,...,0.735961,0.737572,0.73589,0.737844,0.737535,0.734039,0.737762,0.736643,0.73667,0.00162
0,[LINK],emb/CA-AstroPh-1-K1-D32-E250-C6-M2.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K1.json,25,0.721427,0.725939,0.72376,0.729314,0.725787,...,0.724212,0.725185,0.725147,0.717726,0.723527,0.724337,0.722939,0.722233,0.724392,0.002356
8,[LINK],emb/CA-AstroPh-1-K2-D32-E50-C6-M2.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K2.json,25,0.722021,0.718436,0.71816,0.717496,0.719708,...,0.721263,0.718977,0.71794,0.71698,0.719029,0.718489,0.716913,0.717198,0.719111,0.001635
9,[LINK],emb/CA-AstroPh-1-K2-D32-E500-C6-M2.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K2.json,25,0.706462,0.707662,0.707307,0.705289,0.7105,...,0.70553,0.709462,0.708137,0.71173,0.709247,0.705327,0.707076,0.708926,0.707887,0.002117
3,[LINK],emb/CA-AstroPh-1-K2-D32-E250-C2-M2.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K2.json,25,0.702325,0.707919,0.707473,0.707204,0.709448,...,0.703627,0.701972,0.707598,0.706923,0.704419,0.70461,0.708611,0.707873,0.706268,0.001695
4,[LINK],emb/CA-AstroPh-1-K2-D32-E250-C4-M2.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K2.json,25,0.706273,0.705258,0.704036,0.707852,0.703703,...,0.706339,0.702119,0.70877,0.708441,0.707941,0.701718,0.705255,0.707793,0.706141,0.002298
10,[LINK],emb/CA-AstroPh-1-K2-D64-E250-C6-M2.emb,graph/sampled//CA-AstroPh-2-C.edgelist,labels/CA-AstroPh-2-K2.json,25,0.688095,0.681269,0.684507,0.683155,0.685342,...,0.680666,0.683985,0.682355,0.68252,0.684108,0.685594,0.681767,0.680809,0.683681,0.001764


In [17]:
import json
from collections import Counter

def label_jaccard(l_a, l_b):
    s_a = set(Counter(l_a).most_common())
    s_b = set(Counter(l_b).most_common())
    return (1.0 * len(s_a & s_b)) / len(s_a | s_b)

def pairwise_match(a, b):
    return (1.0 * sum([v == a[k] for k, v in b.items()])) / len(a)

def jaccard_match(a, b):
    return (1.0 * sum([label_jaccard(v, a[k]) for k, v in b.items()])) / len(a)

fb1_labels = json.load(open('../labels/Facebook-1-K2.json'))
fb2_labels = json.load(open('../labels/Facebook-2-K2.json'))
fb_pairwise = pairwise_match(fb1_labels, fb2_labels)
fb_jaccard = jaccard_match(fb1_labels, fb2_labels)
print('Facebook: {} {}'.format(fb_pairwise, fb_jaccard))

ap1_labels = json.load(open('../labels/CA-AstroPh-1-K2.json'))
ap2_labels = json.load(open('../labels/CA-AstroPh-2-K2.json'))
ap_pairwise = pairwise_match(ap1_labels, ap2_labels)
ap_jaccard = jaccard_match(ap1_labels, ap2_labels)
print('AstroPh: {} {}'.format(ap_pairwise, ap_jaccard))

AstroPh: 0.0396888817857 0.20013946621
Facebook: 0.00148551621689 0.160386854529
