In [None]:
import sys

sys.path.append("../")

import numpy as np
import pandas as pd
from dynalign.experiments.paths import LP_EVALUATION_RESULTS, PREV_EXPERIMENTS_PATH
from pathlib import Path
from typing import List, Dict, Any, Union, Tuple
from collections import defaultdict


def get_dirs_from_path(
    path: Path, only_files_with_extension: str = "", glob: str = ""
) -> List[Path]:
    if only_files_with_extension:
        return list(path.glob(f"*{only_files_with_extension}"))
    elif glob:
        return list(path.glob(f"{glob}"))
    else:
        return [it for it in path.iterdir() if ".gitignore" not in str(it)]


DF_COLUMNS_TO_AGGREGATION = ["run", "embeddings_aggregation"]

def convert_float_to_str(x: float) -> str:
    return f"{x:.2f}"


def percentage_style(
    x: Union[float, Tuple[float, float]]
) -> Union[float, Tuple[float, float]]:
    """Percantage style fn."""
    if isinstance(x, float):
        return round(x * 100, 2)
    elif isinstance(x, tuple):
        return round(x[0] * 100, 2), round(x[1] * 100, 2)
    raise ValueError("X parsing error")


def highlight_max(x: pd.Series) -> List[str]:
    values = [it[0] if it else 0 for it in x.values]
    max_id = np.argmax(values)

    output = []
    for i in range(len(x)):
        if i == max_id:
            output.append("color:red")
        else:
            output.append("")
    return output

In [None]:
def aggregate_aligner_results_last_snapshot(
    df: pd.DataFrame, metric_name: str, precision: float = 3
) -> pd.DataFrame:
    df_columns_to_aggregation = [*DF_COLUMNS_TO_AGGREGATION, metric_name]
    df_columns_to_remove = set(df.columns).difference(set(df_columns_to_aggregation))
    df = df[df.prediction_snapshot == df.prediction_snapshot.max()].copy()
    df = df.drop(df_columns_to_remove, axis=1)
    df = (
        df.groupby(by=["embeddings_aggregation"])
        .agg(("mean", "std"))
        .drop("run", axis=1)
    )

    df = df.apply(
        lambda x: (
            round(x[metric_name]["mean"], precision),
            round(x[metric_name]["std"], precision),
        ),
        axis=1,
    )
    return df.to_dict()


def aggreagte_all_results_last_snapshot(
    paths: str, metric_name: str, precision: float = 3
):
    results = defaultdict(dict)
    for method_results_path in paths:
        method_name = method_results_path.name
        method_ds_results_paths = get_dirs_from_path(
            method_results_path, only_files_with_extension=".pkl"
        )

        for method_ds_results_path in method_ds_results_paths:
            ds_name = method_ds_results_path.name.replace(".pkl", "")
            results[ds_name][method_name] = aggregate_aligner_results_last_snapshot(
                df=pd.read_pickle(method_ds_results_path),
                metric_name=metric_name,
                precision=3,
            )

    return results



def get_alpha_results_dataframe_last_snapshot(
    results: Dict[str, Any]
) -> Dict[str, Any]:
    agg_results = defaultdict(dict)
    for ds in results.keys():
        for method in results[ds].keys():
            result = results[ds][method]
            alpha = f'0.{method.split("alpha_")[-1].split("0")[-1]}'
            for aggregation_method, aggregation_results in result.items():
                agg_results[aggregation_method].update({
                    (ds, alpha): aggregation_results
                })

    return agg_results


In [None]:
prev_paths

In [None]:
prev_paths = get_dirs_from_path(PREV_EXPERIMENTS_PATH / "evaluation" / "lp", glob="*alpha*")
prev_results = aggreagte_all_results_last_snapshot(
    prev_paths, metric_name="auc", precision=3
)
df_last_snapshot = get_alpha_results_dataframe_last_snapshot(prev_results)

In [None]:
pd.DataFrame(df_last_snapshot)

In [None]:
df_last_snapshot.index

In [None]:
pd.DataFrame(prev_results)

In [None]:
order = [
    "Node2Vec",
    "PosthocALL",
    "PosthocEJ",
    "PosthocTB",
    "Node2VecAligned_L2_ALL",
    "Node2VecAligned_L2_EJ",
    "Node2VecAligned_L2_EJ_Weighted",
    "Node2VecAligned_L2_TB",
    "Node2VecAligned_L2_TB_Weighted",
]
order_prev = [
    "PosthocALL",
    "PosthocEJ",
    "PosthocTB",
    "Node2VecAligned_L2_ALL",
    "Node2VecAligned_L2_EJ",
    "Node2VecAligned_L2_TB",
    "Node2VecAligned_L2_TB_Weighted",
]

In [None]:
for ds in agg_results.keys():
    print(ds)
    ds_df = pd.DataFrame.from_dict(agg_results[ds]).T
    display(ds_df.loc[order].style.apply(highlight_max))