In [1]:
from ast import literal_eval
from collections import defaultdict

import pandas as pd

import wandb
from scripts.results_processing.preprocess import preprocess_df
from scripts.results_processing.reproduction.constants import (
    dataset_model_columns,
    optimization_metrics,
    run_columns,
    sweeped_columns,
)

%load_ext autoreload
%autoreload 2


In [2]:

columns_to_normalize = [
    "model",
    "dataset",
    "transforms",
    "optimizer",
    "callbacks",
]
def normalize_columns(df, columns_to_normalize):
    # Gather the new DataFrames to be concatenated
    flattened_dfs = []

    for col in columns_to_normalize:
        df[col] = df[col].apply(lambda x: str(x).replace("nan", "None"))
        df[col] = df[col].apply(literal_eval)

        # Flatten
        flat = pd.json_normalize(df[col])

        # Rename
        flat.columns = [f"{col}.{c}" for c in flat.columns]
        flattened_dfs.append(flat)

    # Drop all nested columns in one shot
    df = df.drop(columns=columns_to_normalize)

    # Concatenate once at the end
    return pd.concat([df] + flattened_dfs, axis=1)


def normalize_df(df, columns_to_normalize):
    # Config columns to normalize
    df = normalize_columns(df, columns_to_normalize)

    return df

def fetch(project):
    user = "telyatnikov_sap"
    api = wandb.Api(overrides={"base_url": "https://api.wandb.ai"}, timeout=40)
    runs = api.runs(f"{user}/{project}")
    summary_list, config_list, name_list = [], [], []
    for run in runs:
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {
                k: v
                for k, v in run.config.items()
                if not k.startswith("_")
            }
        )

        # .name is the human-readable name of the run.
        name_list.append(run.name)

    runs_df = pd.DataFrame(
        {
            "summary": summary_list,
            "config": config_list,
            "name": name_list,
        }
    )
    # Merge the dicts in a vectorized way:
    merged_dicts = [
        {**s, **c}
        for s, c in zip(runs_df["summary"], runs_df["config"], strict=False)
    ]

    # Now expand them into a DataFrame:
    df_merged = pd.DataFrame.from_records(merged_dicts)
    return df_merged

def main():
    df_list = [ ]
    projects = ["CSL", "CSL_sann", "CSL_sccnn", "Topotune_CSL", "Topotune_cell_CSL"]
    for project in projects:
        tmp_df = fetch(project)
        df_list.append(tmp_df)
    df = pd.concat(df_list, ignore_index=True)
    df = normalize_df(df, columns_to_normalize)
    return df

In [3]:
def gen_scores(df):
    # Get unique datasets
    datasets = list(df["dataset.loader.parameters.data_name"].unique())
    # Get unique models
    models = list(df["model.model_name"].unique())

    collect_subsets = defaultdict(dict)
    # Got over each dataset and model and find the best result
    for dataset in datasets:
        for model in models:
            # Get the subset of the DataFrame for the current dataset and model
            subset = df.loc[
                (df["dataset.loader.parameters.data_name"] == dataset)
            ]

            optim_metric = optimization_metrics[dataset]["optim_metric"]
            eval_metric = optimization_metrics[dataset]["eval_metric"]
            direction = optimization_metrics[dataset]["direction"]

            # Keep metrics that matters for dataset
            performance_columns = optimization_metrics[dataset][
                "performance_columns"
            ]
            subset = subset[
                dataset_model_columns
                + sweeped_columns
                + performance_columns
                + run_columns
            ]
            aggregated = subset.groupby(
                sweeped_columns + ["model.model_name", "model.model_domain"],
                dropna=False,
            ).agg(
                {col: ["mean", "std", "count", "max", "min"] for col in performance_columns},
            )

            # aggregated = subset.groupby(sweeped_columns, dropna=False).count()

            n_count = 5 if "MANTRA" not in dataset else 4
            # Go from MultiIndex to Index
            aggregated = aggregated.reset_index()
            print(f"Dataset: {dataset}, Model: {model}")
            print(aggregated[(eval_metric, "count")].unique())
            # print(aggregated['dataset.split_params.data_seed'].unique())
            print(
                (aggregated[(eval_metric, "count")] >= n_count).sum()
                / len(aggregated)
                * 100
            )
            aggregated = aggregated[aggregated[(eval_metric, "count")] >= n_count]
            # print(len(aggregated[aggregated['seed'] > 4]))
            # aggregated = aggregated.sort_values(
            #     by=(optim_metric, "mean"), ascending=(direction == "min")
            # )

            # Git percent in case of classification
            if "test/accuracy" in performance_columns:
                # Go over all the performance columns and multiply by 100
                for col in performance_columns:
                    aggregated[(col, "mean")] *= 100
                    aggregated[(col, "std")] *= 100
                    aggregated[(col, "max")] *= 100
                    aggregated[(col, "min")] *= 100

                # Round performance columns values up to 2 decimal points
                for col in performance_columns:
                    aggregated[(col, "mean")] = aggregated[
                        (col, "mean")
                    ].round(2)
                    aggregated[(col, "max")] = aggregated[(col, "max")].round(
                        2
                    )
                    aggregated[(col, "min")] = aggregated[(col, "min")].round(
                        2
                    )

            else:
                # Round all values up to 4 decimal points
                # Round performance columns values up to 4 decimal points
                for col in performance_columns:
                    aggregated[(col, "mean")] = aggregated[
                        (col, "mean")
                    ].round(4)
                    aggregated[(col, "std")] = aggregated[(col, "std")].round(
                        4
                    )

            collect_subsets[dataset] = aggregated
    return collect_subsets


In [4]:
df = main()

In [5]:
df = preprocess_df(df, split_mantra=False)

In [6]:
collected_subsets = gen_scores(df)

Dataset: CSL, Model: hopse_g
[ 5  4 88 89 90]
98.94736842105263
Dataset: CSL, Model: HOPSE_MANUAL_PE
[ 5  4 88 89 90]
98.94736842105263
Dataset: CSL, Model: SANN
[ 5  4 88 89 90]
98.94736842105263
Dataset: CSL, Model: sccnn
[ 5  4 88 89 90]
98.94736842105263
Dataset: CSL, Model: topotune
[ 5  4 88 89 90]
98.94736842105263


In [7]:
collected_subsets["CSL"].sort_values(by=("val/auroc", "mean"), ascending=False)[["model.model_name", "val/auroc", "test/auroc", "model.backbone.n_layers", "dataset.dataloader_params.batch_size", "optimizer.parameters.lr", "optimizer.parameters.weight_decay", "transforms.sann_encoding.pretrain_model", "transforms.sann_encoding.neighborhoods"]].head(60)

Unnamed: 0_level_0,model.model_name,val/auroc,val/auroc,val/auroc,val/auroc,val/auroc,test/auroc,test/auroc,test/auroc,test/auroc,test/auroc,model.backbone.n_layers,dataset.dataloader_params.batch_size,optimizer.parameters.lr,optimizer.parameters.weight_decay,transforms.sann_encoding.pretrain_model,transforms.sann_encoding.neighborhoods
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,count,max,min,mean,std,count,max,min,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
290,HOPSE_MANUAL_PE,100.0,0.0,5,100.0,100.0,100.0,0.0,5,100.0,100.0,5.0,256,0.001,0.0001,,"['up_adjacency-0', 'up_adjacency-1', '2-up_adj..."
96,hopse_g,99.6,0.903028,5,100.0,97.98,99.6,0.903028,5,100.0,97.98,5.0,256,0.001,0.0001,ZINC,"['up_adjacency-0', 'up_adjacency-1', '2-up_adj..."
291,HOPSE_MANUAL_PE,93.14,4.121533,5,96.62,86.89,91.94,1.83102,5,94.91,90.63,5.0,256,0.001,0.0001,,"['up_adjacency-0', 'up_adjacency-1', '2-up_adj..."
372,topotune,83.48,17.641148,90,100.0,49.75,79.73,22.060695,90,100.0,39.98,,256,0.001,0.0,,
376,topotune,83.22,18.571034,90,100.0,40.17,79.07,22.018113,90,100.0,35.13,,256,0.001,0.0001,,
364,topotune,82.57,19.714335,88,100.0,39.85,80.34,22.346895,88,100.0,41.1,,256,0.001,0.0,,
368,topotune,82.4,20.472845,90,100.0,41.45,80.31,22.919848,90,100.0,32.36,,256,0.001,0.0001,,
370,topotune,75.56,21.273444,89,100.0,39.18,72.56,22.950385,89,100.0,40.8,,256,0.01,0.0001,,
366,topotune,74.65,21.126191,90,100.0,38.67,71.96,21.845193,90,100.0,38.36,,256,0.01,0.0,,
378,topotune,70.55,22.366105,90,100.0,37.2,68.53,22.400109,90,100.0,44.21,,256,0.01,0.0001,,


In [8]:
models = list(df["model.model_name"].unique())
domains = list(df["model.model_domain"].unique())

df_dict = {
    "model": [],
    "domain": [],
    "mean": [],
    "std": [],
    "max": [],
    "min": []
}

for model in models:
    for domain in domains:
        subset = collected_subsets["CSL"][
            (collected_subsets["CSL"]["model.model_name"] == model)
            & (collected_subsets["CSL"]["model.model_domain"] == domain)
        ]
        if subset.empty:
            continue
        optim_metric = optimization_metrics["CSL"]["optim_metric"]
        eval_metric = optimization_metrics["CSL"]["eval_metric"]
        direction = optimization_metrics["CSL"]["direction"]

        subset = subset.sort_values(by=(optim_metric, "mean"), ascending=(direction == "min"))
        best_result = subset.iloc[0]

        df_dict["model"].append(model)
        df_dict["domain"].append(domain)
        df_dict["mean"].append(best_result[(eval_metric, "mean")])
        df_dict["std"].append(best_result[(eval_metric, "std")])
        df_dict["max"].append(best_result[(eval_metric, "max")])
        df_dict["min"].append(best_result[(eval_metric, "min")])
df_res = pd.DataFrame(df_dict)
         


In [9]:
df_res

Unnamed: 0,model,domain,mean,std,max,min
0,hopse_g,cell,98.0,4.472137,100.0,90.0
1,hopse_g,simplicial,18.67,6.912147,26.67,10.0
2,HOPSE_MANUAL_PE,cell,100.0,0.0,100.0,100.0
3,HOPSE_MANUAL_PE,simplicial,60.0,11.303884,73.33,46.67
4,SANN,simplicial,26.0,6.831301,33.33,20.0
5,sccnn,simplicial,25.33,7.67391,33.33,16.67
6,topotune,cell,52.78,34.286921,100.0,3.33
7,topotune,simplicial,24.16,5.801448,33.33,13.33
