In [1]:
import ast
import glob
import warnings
from collections import defaultdict
from datetime import date

import numpy as np
import pandas as pd
import wandb

today = date.today()
api = wandb.Api()

# # Find all csv files in the current directory
csv_files = glob.glob("*.csv")
# # Collect all the names of the csv files without the extension
csv_names = [csv_file[:-4] for csv_file in csv_files]
project_name = "TopoBench"  #'best_results_edhnn'
user = "telyatnikov_sap"

if project_name not in csv_names:
    runs = api.runs(f"{user}/{project_name}")

    summary_list, config_list, name_list = [], [], []
    for run in runs:
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k, v in run.config.items() if not k.startswith("_")}
        )

        # .name is the human-readable name of the run.
        name_list.append(run.name)

    runs_df = pd.DataFrame(
        {"summary": summary_list, "config": config_list, "name": name_list}
    )

    runs_df.to_csv(f"{project_name}.csv")
else:
    runs_df = pd.read_csv(f"{project_name}.csv", index_col=0)

    for row in runs_df.iloc:
        row["summary"] = ast.literal_eval(row["summary"])
        row["config"] = ast.literal_eval(row["config"])


for row in runs_df.iloc:
    row["summary"].update(row["config"])

lst = [i["summary"] for i in runs_df.iloc]
df = pd.DataFrame.from_dict(lst)

df_init = df.copy()

# Get average epoch run time
df["epoch_run_time"] = df["_runtime"] / df["epoch"]

In [2]:
df_init.columns

Index(['AvgTime/train_batch_mean', 'AvgTime/train_batch_std',
       'AvgTime/train_epoch_mean', 'AvgTime/train_epoch_std',
       'AvgTime/val_batch_mean', 'AvgTime/val_batch_std',
       'AvgTime/val_epoch_mean', 'AvgTime/val_epoch_std', '_runtime', '_step',
       '_timestamp', '_wandb', 'epoch', 'lr-Adam', 'test/accuracy',
       'test/auroc', 'test/loss', 'test/precision', 'test/recall',
       'train/accuracy', 'train/auroc', 'train/loss', 'train/precision',
       'train/recall', 'trainer/global_step', 'val/accuracy', 'val/auroc',
       'val/loss', 'val/precision', 'val/recall', 'loss', 'seed', 'tags',
       'test', 'model', 'paths', 'train', 'extras', 'logger', 'dataset',
       'trainer', 'callbacks', 'ckpt_path', 'evaluator', 'optimizer',
       'task_name', 'transforms', 'model/params/total',
       'model/params/trainable', 'model/params/non_trainable', 'test/mae',
       'test/mse', 'train/mae', 'train/mse', 'val/mae', 'val/mse'],
      dtype='object')

In [3]:
def normalize_column(df, column_to_normalize):
    # Use json_normalize to flatten the nested dictionaries into separate columns
    flattened_df = pd.json_normalize(df[column_to_normalize])
    # Rename columns to include 'nested_column' prefix
    flattened_df.columns = [
        f"{column_to_normalize}.{col}" for col in flattened_df.columns
    ]
    # Concatenate the flattened DataFrame with the original DataFrame
    result_df = pd.concat([df, flattened_df], axis=1)
    # Get new columns names
    new_columns = flattened_df.columns
    # Drop the original nested column if needed
    result_df.drop(column_to_normalize, axis=1, inplace=True)
    return result_df, new_columns


# Config columns to normalize
columns_to_normalize = ["model", "dataset", "callbacks", "paths"]

# Keep track of config columns added
config_columns = []
for column in columns_to_normalize:
    df, columns = normalize_column(df, column)
    config_columns.extend(columns)

In [4]:
# Remove columns that are not needed (we shouldn't vary them or their variation is not interesting)
remove_col = [
    #"dataset.transforms.data_manipulations.selected_fields",
    "callbacks.model_checkpoint.dirpath", 'model.feature_encoder.selected_dimensions','callbacks.model_checkpoint.dirpath',
]
df = df.drop(remove_col, axis=1)

# Ensure that removed columns are not present in config_columns
config_columns = [col for col in config_columns if col != remove_col]

In [5]:
print(
    f"Number of rows with model.backbone._target_ = nan is {sum(df['model.backbone._target_'].isna())}"
)
# Drop na values if there are
df = df.dropna(subset=["model.backbone._target_"])
# Reset index
df = df.reset_index(drop=True)

# Drop rows that 'callbacks.early_stopping.monitor' isna
print(
    f"Number of rows with callbacks.early_stopping.monitor = nan is {sum(df['callbacks.early_stopping.monitor'].isna())}"
)

# print("Because of SCCN and CWN false runs there were 96 such runs on 13/03/24")

df = df.dropna(subset=["callbacks.early_stopping.monitor"])
# Reset index
df = df.reset_index(drop=True)


# Get correct names for the models
df["model.backbone._target_"] = df["model.backbone._target_"].apply(
    lambda x: x.split(".")[-1]
)

Number of rows with model.backbone._target_ = nan is 0
Number of rows with callbacks.early_stopping.monitor = nan is 0


In [6]:
df["model.backbone._target_"].unique()

array(['CCCN', 'CCXN', 'CWN', 'GAT', 'GCN', 'GIN', 'AllSetTransformer',
       'EDGNN', 'UniGCNII', 'SCCN', 'SCCNNCustom', 'SCN2'], dtype=object)

In [17]:
# Identify unique models in DataFrame
unique_models = df["model.backbone._target_"].unique()

# Identify unique datasets in DataFrame
unique_datasets = df["dataset.loader.parameters.data_name"].unique()


collected_results = defaultdict(dict)
collected_results_time = defaultdict(dict)

collected_aggregated_results = defaultdict(dict)
collected_non_aggregated_results = defaultdict(dict)

# Got over each dataset and model and find the best result
for dataset in unique_datasets:
    for model in unique_models:
        # Get the subset of the DataFrame for the current dataset and model
        subset = df[
            (df["dataset.loader.parameters.data_name"] == dataset)
            & (df["model.backbone._target_"] == model)
        ]

        if subset.empty:
            print("---------")
            print(f"No results for {model} on {dataset}")
            print("---------")
            continue
        # Suppress all warnings
        warnings.filterwarnings("ignore")
        subset["Model"] = model
        warnings.filterwarnings("default")

        def get_metric(df):
            metric_ = df["callbacks.early_stopping.monitor"].unique()
            assert len(metric_) == 1, "There should be only one metric to optimize"
            metric = metric_[0]
            return metric.split("/")[-1]

        # Cols to get statistics later
        # TODO: log maximum validation value for optimized metric
        performance_cols = [f"test/{get_metric(subset)}"]

        # Get the unique values for each config column
        unique_colums_values = {}
        for col in config_columns:
            try:
                unique_colums_values[col] = subset[col].unique()
            except:
                print(f"Attention the columns: {col}, has issues with unique values")

        # Keep only those keys that have more than one unique value
        unique_colums_values = {
            k: v for k, v in unique_colums_values.items() if len(v) > 1
        }

        # Print the unique values for each config column

        print(f"Unique values for each config column for {model} on {dataset}:")
        for col, unique in unique_colums_values.items():
            print(f"{col}: {unique}")
            print()
        print("---------")

        # Check if "special colums" are not in unique_colums_values
        # For example dataset.parameters.data_seed should not be in aggregation columns
        # If it is, then we should remove it from the list
        special_columns = ["dataset.parameters.data_seed"]

        for col in special_columns:
            if col in unique_colums_values:
                unique_colums_values.pop(col)

        # Obtain the aggregation columns
        aggregation_columns = ["Model"] + list(unique_colums_values.keys())

        collected_non_aggregated_results[dataset][model] = {
            "df": subset.copy(),
            "aggregation_columns": aggregation_columns,
            "performance_cols": performance_cols,
        }

        # Aggregate the subset by the aggregation columns and get the best result for each group
        aggregated = subset.groupby(aggregation_columns).agg(
            {col: ["mean", "std"] for col in performance_cols}
        )

        # Go from MultiIndex to Index
        aggregated = aggregated.reset_index()

        assert (
            len(subset["callbacks.early_stopping.mode"].unique()) == 1
        ), "There should be only one mode for early stopping"
        # Identify the mode of the early stopping mode

        if subset["callbacks.early_stopping.mode"].unique()[0] == "max":
            ascending = False
            final_best_ = aggregated.sort_values(
                by=(f"test/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)
            final_best_ = (final_best_ * 100).round(2)
        else:
            ascending = True
            final_best_ = aggregated.sort_values(
                by=(f"test/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)

        collected_results[dataset][model] = {
            "mean": final_best_[(f"test/{get_metric(subset)}", "mean")].values[0],
            "std": final_best_[(f"test/{get_metric(subset)}", "std")].values[0],
        }

        # Get average epoch run time
        collected_results_time[dataset][model] = {
            "mean": subset['AvgTime/train_epoch_mean'].mean(),
            "std": subset["epoch"].std(),
        }


        collected_aggregated_results[dataset][model] = aggregated.sort_values(
            by=(f"test/{get_metric(subset)}", "mean"), ascending=ascending
        )

Attention the columns: model.feature_encoder.in_channels, has issues with unique values
Attention the columns: model.feature_encoder.selected_dimensions, has issues with unique values
Attention the columns: callbacks.model_checkpoint.dirpath, has issues with unique values
Unique values for each config column for CCCN on IMDB-BINARY:
dataset.split_params.data_seed: [0 3 5 7 9]

paths.output_dir: ['/home/levtel/projects/TopoBench/logs/train/multiruns/2025-03-17_03-08-10/0'
 '/home/levtel/projects/TopoBench/logs/train/multiruns/2025-03-17_03-08-10/1'
 '/home/levtel/projects/TopoBench/logs/train/multiruns/2025-03-17_03-08-10/2'
 '/home/levtel/projects/TopoBench/logs/train/multiruns/2025-03-17_03-08-10/3'
 '/home/levtel/projects/TopoBench/logs/train/multiruns/2025-03-17_03-08-10/4']

---------
Attention the columns: model.feature_encoder.in_channels, has issues with unique values
Attention the columns: model.feature_encoder.selected_dimensions, has issues with unique values
Attention the co

In [18]:
collected_non_aggregated_results.keys()

dict_keys(['IMDB-BINARY', 'IMDB-MULTI', 'MUTAG', 'NCI1', 'NCI109', 'PROTEINS', 'US-county-demos', 'amazon_ratings', 'citeseer', 'Cora', 'PubMed', 'minesweeper', 'roman_empire', 'REDDIT-BINARY', 'ZINC'])

In [None]:
#"epoch_run_time" in collected_non_aggregated_results['NCI1']['CCCN']['df'].columns

True

In [20]:
collected_results_time
# Convert nested dictionary to DataFrame
nested_dict = dict(collected_results_time)
result_dict = pd.DataFrame.from_dict(
    {
        (i, j): nested_dict[i][j]
        for i in nested_dict
        for j in nested_dict[i].keys()
    },
    orient="index",
)


result_dict = result_dict.round(2)
result_dict["performance"] = result_dict.apply(
    lambda x: f"{x['mean']} ± {x['std']}", axis=1
)
result_dict = result_dict.drop(["mean", "std"], axis=1)

# Reset multiindex
result_dict = result_dict.reset_index()
# rename columns
result_dict.columns = ["Dataset", "Model", "Average Time per Epoch"]

In [None]:
result_dict.pivot_table(
    index="Model", columns="Dataset", values="Average Time per Epoch", aggfunc="first"
)[['MUTAG', 'NCI1','NCI109','PROTEINS','ZINC','IMDB-BINARY','IMDB-MULTI']]

Dataset,MUTAG,NCI1,NCI109,PROTEINS,ZINC,IMDB-BINARY,IMDB-MULTI
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AllSetTransformer,0.08 ± 35.88,0.55 ± 76.7,0.56 ± 47.64,0.16 ± 17.89,,0.19 ± 15.25,0.18 ± 28.42
CCCN,0.11 ± 19.87,1.33 ± 85.91,1.33 ± 16.81,0.32 ± 23.18,6.14 ± 23.29,0.44 ± 20.43,0.48 ± 42.92
CCXN,0.09 ± 19.81,1.25 ± 37.32,1.39 ± 63.58,0.32 ± 25.15,5.92 ± 53.69,0.36 ± 48.04,0.5 ± 30.12
CWN,0.1 ± 5.7,1.38 ± 47.51,1.37 ± 32.13,0.37 ± 51.77,5.86 ± 15.25,0.4 ± 21.39,0.45 ± 11.18
EDGNN,0.04 ± 29.66,0.49 ± 60.17,0.49 ± 91.15,0.13 ± 24.6,,0.09 ± 45.19,0.13 ± 23.18
GAT,0.04 ± 21.97,0.34 ± 47.36,0.33 ± 38.24,0.07 ± 37.65,1.23 ± 28.58,0.06 ± 33.65,0.09 ± 18.37
GCN,0.03 ± 20.74,0.26 ± 73.19,0.27 ± 29.15,0.05 ± 40.25,,0.05 ± 61.81,0.07 ± 49.12
GIN,0.03 ± 13.96,0.27 ± 81.55,0.27 ± 27.29,0.06 ± 30.94,,0.06 ± 27.48,0.06 ± 56.28
SCCN,0.09 ± 29.5,1.71 ± 31.9,1.7 ± 33.65,0.5 ± 32.13,,5.33 ± 21.68,9.32 ± 16.96
SCCNNCustom,0.09 ± 27.06,1.65 ± 50.5,1.66 ± 61.5,0.43 ± 28.42,,5.92 ± 14.83,9.41 ± 33.47
