In [1]:
import sys 
sys.path.append('../')

from dynalign.experiments.paths import LP_EVALUATION_RESULTS

In [2]:
import os
import pandas as pd
from collections import defaultdict
from typing import Dict, Any

import numpy as np
from tqdm.autonotebook import tqdm
from pathlib import Path

  from tqdm.autonotebook import tqdm


In [3]:
path = LP_EVALUATION_RESULTS

In [4]:
def read_results(path: str) -> Dict[Any, Any]:
    output = defaultdict(dict)
    path = Path(path)
    for results_dir in list(path.iterdir()):
        if ".gitignore" in str(results_dir):
            continue
        for ds_file in results_dir.glob('*.pkl'):
            name = ds_file.name.replace(".pkl", "")
            model_name = ds_file.parent.name
            output[name][model_name] = pd.read_pickle(ds_file)
    return output


def get_metric(output: Dict[str, Any], metric_name: str):
    parsed_results = defaultdict(dict)

    for ds, ds_results in output.items():
        for method, method_df in ds_results.items():
            auc_values = []
            auc_values = method_df[method_df.snapshot == "Snapshot merged"][
                metric_name
            ].values

            method_mean = np.mean(auc_values)
            method_std = np.std(auc_values)
            parsed_results[ds][method] = (method_mean.round(4), method_std.round(4))

    return parsed_results

In [5]:
from typing import Union, Tuple, List

In [6]:
def convert_float_to_str(x: float) -> str:
    return f"{x:.2f}"


def percentage_style(x: Union[float, Tuple[float, float]]) -> Union[float, Tuple[float, float]]:
    """Percantage style fn."""
    if isinstance(x, float):
        return round(x * 100, 2)
    elif isinstance(x, tuple):
        return round(x[0] * 100, 2), round(x[1] * 100, 2)
    raise ValueError("X parsing error")


def get_top_score_bold(x: pd.Series) -> List[str]:
    max_id = np.argmax(x.values)

    output = []
    for i in range(len(x)):
        if i == max_id:
            out_str = f'$\\mathbf{{{convert_float_to_str(x[i][0]) + " ± " + convert_float_to_str(x[i][1])}}}'
            out_str += "$"
            output.append(out_str)

        elif -1_000_000 < x[i][0] < 1_000_000:
            out_str = f'${convert_float_to_str(x[i][0]) + " ± " + convert_float_to_str(x[i][1])}'
            out_str += "$"
            output.append(out_str)

        else:
            output.append("$\\times$")
    return output


In [7]:
MODEL_NAMES = {
    "PosthocTB": "Posthoc-TB",
    "PosthocEJ": "Posthoc-EJ",
    "PosthocALL": "Posthoc-PA",
    "Node2VecAligned_L2_ALL": "\makecell[l]{Node2Vec \\\\ (Regularized, All)}",
    "Node2VecAligned_L2_EJ": "\makecell[l]{Node2Vec \\\\ (Regularized, EJ)}",
    "Node2VecAligned_L2_EJ_Weighted": "\makecell[l]{Node2Vec \\\\ (Regularized, \\\\ Weighted, EJ)}",
    "Node2VecAligned_L2_TB": "\makecell[l]{Node2Vec \\\\ (Regularized, TB)}",
    "Node2VecAligned_L2_TB_Weighted": "\makecell[l]{Node2Vec \\\\ (Regularized, \\\\ Weighted, TB)}",
}


order = [
    "Node2Vec",
    "PosthocALL",
    "PosthocEJ",
    "PosthocTB",
    "Node2VecAligned_L2_ALL",
    "Node2VecAligned_L2_EJ",
    "Node2VecAligned_L2_EJ_Weighted",
    "Node2VecAligned_L2_TB",
    "Node2VecAligned_L2_TB_Weighted",
]

In [8]:
results = pd.DataFrame(get_metric(read_results(path), "auc_LR"))
mean_rank = results.applymap(lambda x: x[0]).rank(ascending=False).mean(axis=1).round(2)
results["Mean rank"] = mean_rank
results

Unnamed: 0,fb-forum,fb-messages,ppi,bitcoin-otc,bitcoin-alpha,ogbl-collab,Mean rank
Node2Vec,"(0.8391, 0.0263)","(0.6344, 0.0618)","(0.5917, 0.0068)","(0.6583, 0.0612)","(0.6467, 0.1476)","(0.8155, 0.0077)",7.0
Node2VecAligned_L2_TB_Weighted,"(0.8811, 0.0385)","(0.712, 0.0713)","(0.6003, 0.0062)","(0.6411, 0.0526)","(0.6575, 0.0995)","(0.8236, 0.0044)",4.33
Node2VecAligned_L2_ALL,"(0.8793, 0.0395)","(0.7261, 0.076)","(0.6013, 0.0086)","(0.6329, 0.0449)","(0.6598, 0.1061)","(0.8223, 0.006)",4.83
Node2VecAligned_L2_TB,"(0.8798, 0.0296)","(0.7626, 0.077)","(0.5976, 0.0078)","(0.6315, 0.0401)","(0.612, 0.1379)","(0.8197, 0.005)",6.17
Node2VecAligned_L2_EJ,"(0.9111, 0.0206)","(0.7384, 0.0802)","(0.6006, 0.0064)","(0.6747, 0.0417)","(0.6254, 0.1273)","(0.8227, 0.0052)",4.0
PosthocALL,"(0.9234, 0.0107)","(0.7051, 0.0665)","(0.609, 0.007)","(0.6184, 0.0511)","(0.5835, 0.1516)","(0.8337, 0.0065)",4.83
Node2VecAligned_L2_EJ_Weighted,"(0.8789, 0.0399)","(0.6567, 0.0847)","(0.5989, 0.0059)","(0.6447, 0.0497)","(0.6333, 0.1107)","(0.8178, 0.0063)",6.5
PosthocTB,"(0.9302, 0.0125)","(0.7449, 0.0658)","(0.6061, 0.006)","(0.638, 0.054)","(0.648, 0.168)","(0.8286, 0.0055)",2.83
PosthocEJ,"(0.921, 0.0111)","(0.7105, 0.0706)","(0.6069, 0.0064)","(0.6352, 0.0526)","(0.608, 0.1438)","(0.8333, 0.0057)",4.5


In [9]:
results = pd.DataFrame(get_metric(read_results(path), "auc_LR"))
mean_rank = results.applymap(lambda x: x[0]).rank(ascending=False).mean(axis=1).round(2)
results = results.applymap(percentage_style)
results = results.apply(get_top_score_bold)
results["Mean rank"] = mean_rank
results = results.loc[order].rename(MODEL_NAMES, axis=0)
# results = results.sort_values(by="Mean rank").rename(MODEL_NAMES, axis=0)
results

Unnamed: 0,fb-forum,fb-messages,ppi,bitcoin-otc,bitcoin-alpha,ogbl-collab,Mean rank
Node2Vec,$83.91 ± 2.63$,$63.44 ± 6.18$,$59.17 ± 0.68$,$65.83 ± 6.12$,$64.67 ± 14.76$,$81.55 ± 0.77$,7.0
Posthoc-PA,$92.34 ± 1.07$,$70.51 ± 6.65$,$\mathbf{60.90 ± 0.70}$,$61.84 ± 5.11$,$58.35 ± 15.16$,$\mathbf{83.37 ± 0.65}$,4.83
Posthoc-EJ,$92.10 ± 1.11$,$71.05 ± 7.06$,$60.69 ± 0.64$,$63.52 ± 5.26$,$60.80 ± 14.38$,$83.33 ± 0.57$,4.5
Posthoc-TB,$\mathbf{93.02 ± 1.25}$,$74.49 ± 6.58$,$60.61 ± 0.60$,$63.80 ± 5.40$,$64.80 ± 16.80$,$82.86 ± 0.55$,2.83
"\makecell[l]{Node2Vec \\ (Regularized, All)}",$87.93 ± 3.95$,$72.61 ± 7.60$,$60.13 ± 0.86$,$63.29 ± 4.49$,$\mathbf{65.98 ± 10.61}$,$82.23 ± 0.60$,4.83
"\makecell[l]{Node2Vec \\ (Regularized, EJ)}",$91.11 ± 2.06$,$73.84 ± 8.02$,$60.06 ± 0.64$,$\mathbf{67.47 ± 4.17}$,$62.54 ± 12.73$,$82.27 ± 0.52$,4.0
"\makecell[l]{Node2Vec \\ (Regularized, \\ Weighted, EJ)}",$87.89 ± 3.99$,$65.67 ± 8.47$,$59.89 ± 0.59$,$64.47 ± 4.97$,$63.33 ± 11.07$,$81.78 ± 0.63$,6.5
"\makecell[l]{Node2Vec \\ (Regularized, TB)}",$87.98 ± 2.96$,$\mathbf{76.26 ± 7.70}$,$59.76 ± 0.78$,$63.15 ± 4.01$,$61.20 ± 13.79$,$81.97 ± 0.50$,6.17
"\makecell[l]{Node2Vec \\ (Regularized, \\ Weighted, TB)}",$88.11 ± 3.85$,$71.20 ± 7.13$,$60.03 ± 0.62$,$64.11 ± 5.26$,$65.75 ± 9.95$,$82.36 ± 0.44$,4.33


In [10]:
print((
    results.style.to_latex()
    .replace("{llllll}", "{lrrrrr}\n\\toprule")
    .replace("&  &  &  &  &  \\\\", "")
    .replace("NaN", "---")
    .replace("±", r"\pm")
    .replace(r"\$", "$")
    .replace("textbackslash ", "")
    .replace("\{", "{")
    .replace(r"\}", "}")
    .replace("_", "\_")
    .replace("}<KLEJ>", "\\textcolor{red}{*}}")
    .replace("mathbf", "mathbf")
    .replace("\\end{tabular}", "\\bottomrule \n\\end{tabular}")
))

\begin{tabular}{lllllllr}
 & fb-forum & fb-messages & ppi & bitcoin-otc & bitcoin-alpha & ogbl-collab & Mean rank \\
Node2Vec & $83.91 \pm 2.63$ & $63.44 \pm 6.18$ & $59.17 \pm 0.68$ & $65.83 \pm 6.12$ & $64.67 \pm 14.76$ & $81.55 \pm 0.77$ & 7.000000 \\
Posthoc-PA & $92.34 \pm 1.07$ & $70.51 \pm 6.65$ & $\mathbf{60.90 \pm 0.70}$ & $61.84 \pm 5.11$ & $58.35 \pm 15.16$ & $\mathbf{83.37 \pm 0.65}$ & 4.830000 \\
Posthoc-EJ & $92.10 \pm 1.11$ & $71.05 \pm 7.06$ & $60.69 \pm 0.64$ & $63.52 \pm 5.26$ & $60.80 \pm 14.38$ & $83.33 \pm 0.57$ & 4.500000 \\
Posthoc-TB & $\mathbf{93.02 \pm 1.25}$ & $74.49 \pm 6.58$ & $60.61 \pm 0.60$ & $63.80 \pm 5.40$ & $64.80 \pm 16.80$ & $82.86 \pm 0.55$ & 2.830000 \\
\makecell[l]{Node2Vec \\ (Regularized, All)} & $87.93 \pm 3.95$ & $72.61 \pm 7.60$ & $60.13 \pm 0.86$ & $63.29 \pm 4.49$ & $\mathbf{65.98 \pm 10.61}$ & $82.23 \pm 0.60$ & 4.830000 \\
\makecell[l]{Node2Vec \\ (Regularized, EJ)} & $91.11 \pm 2.06$ & $73.84 \pm 8.02$ & $60.06 \pm 0.64$ & $\mathbf{67.

In [11]:
results

Unnamed: 0,fb-forum,fb-messages,ppi,bitcoin-otc,bitcoin-alpha,ogbl-collab,Mean rank
Node2Vec,$83.91 ± 2.63$,$63.44 ± 6.18$,$59.17 ± 0.68$,$65.83 ± 6.12$,$64.67 ± 14.76$,$81.55 ± 0.77$,7.0
Posthoc-PA,$92.34 ± 1.07$,$70.51 ± 6.65$,$\mathbf{60.90 ± 0.70}$,$61.84 ± 5.11$,$58.35 ± 15.16$,$\mathbf{83.37 ± 0.65}$,4.83
Posthoc-EJ,$92.10 ± 1.11$,$71.05 ± 7.06$,$60.69 ± 0.64$,$63.52 ± 5.26$,$60.80 ± 14.38$,$83.33 ± 0.57$,4.5
Posthoc-TB,$\mathbf{93.02 ± 1.25}$,$74.49 ± 6.58$,$60.61 ± 0.60$,$63.80 ± 5.40$,$64.80 ± 16.80$,$82.86 ± 0.55$,2.83
"\makecell[l]{Node2Vec \\ (Regularized, All)}",$87.93 ± 3.95$,$72.61 ± 7.60$,$60.13 ± 0.86$,$63.29 ± 4.49$,$\mathbf{65.98 ± 10.61}$,$82.23 ± 0.60$,4.83
"\makecell[l]{Node2Vec \\ (Regularized, EJ)}",$91.11 ± 2.06$,$73.84 ± 8.02$,$60.06 ± 0.64$,$\mathbf{67.47 ± 4.17}$,$62.54 ± 12.73$,$82.27 ± 0.52$,4.0
"\makecell[l]{Node2Vec \\ (Regularized, \\ Weighted, EJ)}",$87.89 ± 3.99$,$65.67 ± 8.47$,$59.89 ± 0.59$,$64.47 ± 4.97$,$63.33 ± 11.07$,$81.78 ± 0.63$,6.5
"\makecell[l]{Node2Vec \\ (Regularized, TB)}",$87.98 ± 2.96$,$\mathbf{76.26 ± 7.70}$,$59.76 ± 0.78$,$63.15 ± 4.01$,$61.20 ± 13.79$,$81.97 ± 0.50$,6.17
"\makecell[l]{Node2Vec \\ (Regularized, \\ Weighted, TB)}",$88.11 ± 3.85$,$71.20 ± 7.13$,$60.03 ± 0.62$,$64.11 ± 5.26$,$65.75 ± 9.95$,$82.36 ± 0.44$,4.33


# PREV Alingment

In [15]:
results = pd.DataFrame(get_metric(read_results("../data/prev/evaluation/lp/"), "auc_LR"))
results

Unnamed: 0,fb-forum,fb-messages,ppi,bitcoin-otc,bitcoin-alpha,ogbl-collab
Node2VecAligned_L2_ALL,"(0.8314, 0.0422)","(0.6476, 0.0683)","(0.587, 0.0102)","(0.6061, 0.0623)","(0.5849, 0.1507)","(0.8062, 0.0062)"
Node2VecAligned_L2_EJ,"(0.8289, 0.0444)","(0.6415, 0.0794)","(0.5873, 0.0091)","(0.6203, 0.0529)","(0.5303, 0.1731)","(0.8057, 0.0066)"
PosthocALL,"(0.8443, 0.036)","(0.653, 0.0667)","(0.5921, 0.0078)","(0.6241, 0.0476)","(0.5996, 0.1595)","(0.8178, 0.0061)"
PosthocEJ,"(0.8322, 0.0424)","(0.6468, 0.0696)","(0.5922, 0.0079)","(0.6306, 0.0568)","(0.6288, 0.121)","(0.8188, 0.0051)"


In [None]:
results