In [5]:
import pandas as pd
import numpy as np
import math

In [27]:
def stderr(datapoints):
    n = len(datapoints)
    S = n * np.var(datapoints) / (n - 1)
    return math.sqrt(S / n)

def ci(datapoints):
    return 1.96 * stderr(datapoints)

In [28]:
results_pd = pd.read_csv("link-prediction-result.csv")

In [29]:
results_pd = results_pd.groupby(["Graph", "Algorithm"]).agg(["mean", ci, stderr])

In [30]:
graphs = ["Facebook", "Autonomous Systems", "Protein-Protein", "ca-HepTh", "LastFM", "Wikipedia"]
for graph in graphs:
    row = ""
    row = "{} & {:.3f}$\\pm${:.3f} & {:.3f}$\\pm${:.3f} & {:.3f}$\\pm${:.3f} & {:.3f}$\\pm${:.3f}\\\\\\hline".format(
#     row = "{} & {:.2f}({:.2f}) & {:.2f}({:.2f}) & {:.2f}({:.2f}) & {:.2f}({:.2f}) & {:.2f}({:.2f}) & {:.2f}({:.2f}) & {:.2f}({:.2f}) & {:.2f}({:.2f})\\\\\\hline".format(
        graph,
#         results_pd.loc[graph, "LE"].loc["F1", "mean"],
#         1.96 * results_pd.loc[graph, "LE"].loc["F1", "std"],
#         results_pd.loc[graph, "Stable LE"].loc["F1", "mean"],
#         1.96 * results_pd.loc[graph, "Stable LE"].loc["F1", "std"],
        results_pd.loc[graph, "LE"].loc["ROC", "mean"],
        results_pd.loc[graph, "LE"].loc["ROC", "ci"],
        results_pd.loc[graph, "Stable LE"].loc["ROC", "mean"],
        results_pd.loc[graph, "Stable LE"].loc["ROC", "ci"],
#         results_pd.loc[graph, "LINE"].loc["F1", "mean"],
#         1.96 * results_pd.loc[graph, "LINE"].loc["F1", "std"],
#         results_pd.loc[graph, "Stable LINE"].loc["F1", "mean"],
#         1.96 * results_pd.loc[graph, "Stable LINE"].loc["F1", "std"],
        results_pd.loc[graph, "LINE"].loc["ROC", "mean"],
        results_pd.loc[graph, "LINE"].loc["ROC", "ci"],
        results_pd.loc[graph, "Stable LINE"].loc["ROC", "mean"],
        results_pd.loc[graph, "Stable LINE"].loc["ROC", "ci"],
    )
    print(row)

Facebook & 0.982$\pm$0.000 & 0.924$\pm$0.047 & 0.971$\pm$0.001 & 0.933$\pm$0.001\\\hline
Autonomous Systems & 0.693$\pm$0.002 & 0.699$\pm$0.020 & 0.693$\pm$0.007 & 0.672$\pm$0.004\\\hline
Protein-Protein & 0.770$\pm$0.016 & 0.761$\pm$0.008 & 0.638$\pm$0.036 & 0.660$\pm$0.022\\\hline
ca-HepTh & 0.811$\pm$0.008 & 0.811$\pm$0.008 & 0.893$\pm$0.003 & 0.890$\pm$0.001\\\hline
LastFM & 0.910$\pm$0.001 & 0.785$\pm$0.001 & 0.914$\pm$0.001 & 0.895$\pm$0.002\\\hline
Wikipedia & 0.614$\pm$0.001 & 0.615$\pm$0.001 & 0.458$\pm$0.008 & 0.499$\pm$0.003\\\hline


In [31]:
for graphName in graphs:
    for alg in ["LE", "LINE"]:
        base_mean = results_pd.loc[graphName, alg].loc["ROC", "mean"]
        base_se = results_pd.loc[graphName, alg].loc["ROC", "stderr"]
        
        stable_mean = results_pd.loc[graphName, "Stable " + alg].loc["ROC", "mean"]
        stable_se = results_pd.loc[graphName, "Stable " + alg].loc["ROC", "stderr"]

        t = (stable_mean - base_mean) / math.sqrt(base_se**2 + stable_se**2)
        if abs(t) <= 1.96:
            print("{} {}".format(graphName, alg))

Autonomous Systems LE
Protein-Protein LE
Protein-Protein LINE
ca-HepTh LE
Wikipedia LE


In [6]:
graphs = ["Facebook", "Autonomous Systems", "Protein-Protein", "ca-HepTh", "LastFM", "Wikipedia"]
total_gap = 0.0
for graph in graphs:
    stable_auc = results_pd.loc[graph, "Stable LE"].loc["ROC", "mean"]
    base_auc = results_pd.loc[graph, "LE"].loc["ROC", "mean"]
    total_gap += (stable_auc - base_auc)
print("Average Gap: {:.3f}".format(total_gap / len(graphs)))

Average Gap: -0.031


In [7]:
graphs = ["Facebook", "Autonomous Systems", "Protein-Protein", "ca-HepTh", "LastFM", "Wikipedia"]
total_gap = 0.0
for graph in graphs:
    stable_auc = results_pd.loc[graph, "Stable LINE"].loc["ROC", "mean"]
    base_auc = results_pd.loc[graph, "LINE"].loc["ROC", "mean"]
    total_gap += (stable_auc - base_auc)
print("Average Gap: {:.3f}".format(total_gap / len(graphs)))

Average Gap: -0.003
