In [1]:
import ast
import glob
from collections import defaultdict
from datetime import date

import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import wandb

PROJ_PREFIX = "rebuttal_cell"
PROJ_DS = [ "MUTAG", "PROTEINS", "NCI1", "NCI109", "ZINC"]

today = date.today()
api = wandb.Api(overrides={"base_url": "https://api.wandb.ai"}, timeout=40)


In [3]:

# # Find all csv files in the current directory
csv_files = glob.glob("csv_rebutal/*.csv")
# # Collect all the names of the csv files without the extension
csv_names = [csv_file[:-4].split('/')[-1] for csv_file in csv_files]
print(csv_names)
user = "levsap"

for project_dataset in PROJ_DS:
    project_name = f"{PROJ_PREFIX}_{project_dataset}"
    if f"{user}_{project_name}" not in csv_names:
        runs = api.runs(f"{user}/{project_name}")
        print(project_name)

        summary_list, config_list, name_list = [], [], []
        for i, run in enumerate(runs):
            
            # .summary contains the output keys/values for metrics like accuracy.
            #  We call ._json_dict to omit large files
            summary_list.append(run.summary._json_dict)

            # .config contains the hyperparameters.
            #  We remove special values that start with _.
            config_list.append(
                {k: v for k, v in run.config.items() if not k.startswith("_")}
            )

            # .name is the human-readable name of the run.
            name_list.append(run.name)

        runs_df = pd.DataFrame(
            {"summary": summary_list, "config": config_list, "name": name_list}
        )

        runs_df.to_csv(f"csv_rebutal/{user}_{project_name}.csv")
        print('Done')
    else:
        continue
        # runs_df = pd.read_csv(f"csv_rebutal/{user}_{project_name}.csv", index_col=0)

        # for row in runs_df.iloc:
        #     row["summary"] = ast.literal_eval(row["summary"])
        #     row["config"] = ast.literal_eval(row["config"])
    # for row in runs_df.iloc:
    #     row["summary"].update(row["config"])

    # lst = [i["summary"] for i in runs_df.iloc]
    # df = pd.DataFrame.from_dict(lst)

    # df_init = df.copy()

['levsap_rebuttal_cell_MUTAG', 'levsap_rebuttal_cell_PROTEINS', 'levsap_rebuttal_cell_NCI1']
rebuttal_cell_NCI109




CommError: HTTPSConnectionPool(host='api.wandb.ai', port=443): Read timed out. (read timeout=20)

In [3]:
csv_files = glob.glob("csv_rebutal/*.csv")
df_list = []
for csv_file in csv_files:
    df = pd.read_csv(csv_file, index_col=0)
    for row in df.iloc:
        row["summary"] = ast.literal_eval(row["summary"])
        row["config"] = ast.literal_eval(row["config"])
        row["summary"].update(row["config"])

    lst = [i["summary"] for i in df.iloc]
    df = pd.DataFrame.from_dict(lst)

    df_init = df.copy()

    df_list.append(df_init)

df = pd.concat(df_list, ignore_index=True)

In [4]:
def normalize_column(df, column_to_normalize):
    # Use json_normalize to flatten the nested dictionaries into separate columns
    flattened_df = pd.json_normalize(df[column_to_normalize])
    # Rename columns to include 'nested_column' prefix
    flattened_df.columns = [
        f"{column_to_normalize}.{col}" for col in flattened_df.columns
    ]
    # Concatenate the flattened DataFrame with the original DataFrame
    result_df = pd.concat([df, flattened_df], axis=1)
    # Get new columns names
    new_columns = flattened_df.columns
    # Drop the original nested column if needed
    result_df.drop(column_to_normalize, axis=1, inplace=True)
    return result_df, new_columns


# Config columns to normalize
columns_to_normalize = ["model", "dataset", "callbacks", "paths", "transforms", "optimizer"]

# Keep track of config columns added
config_columns = []
for column in columns_to_normalize:
    df, columns = normalize_column(df, column)
    config_columns.extend(columns)
df.to_csv("merged_rebutals/merged_normalized.csv")

In [21]:
df = pd.read_csv("merged_rebutals/merged_normalized.csv")
columns_to_eval = ["transforms.sann_encoding.pe_types"]
for col in columns_to_eval:
    df[col] = df[col].apply(lambda x: str(x).replace("nan", "None"))
    df[col] = df[col].apply(ast.literal_eval)

  df = pd.read_csv("merged_rebutals/merged_normalized.csv")


# Correct model name based on each pre-trained model

In [7]:
def map_name(row):
    if isinstance(row["transforms.sann_encoding.pe_types"], list):
        return "HOPSE_MANUAL_PE"
    elif row["model.model_name"] == "sann":
        if type(
            row["transforms.sann_encoding.pretrain_model"]
        ) == float and pd.isna(row["transforms.sann_encoding.pretrain_model"]):
            return "SANN"
        else:
            return "HOPSE_GPSE"
    else:
        return row["model.model_name"]

In [6]:
df["model.model_name"] = df.apply(map_name, axis=1)
df["transforms.sann_encoding.neighborhoods"] = df["transforms.sann_encoding.neighborhoods"].astype(str)


NameError: name 'map_name' is not defined

In [7]:
df["model.model_name"].unique()

array(['hopse_m', nan], dtype=object)

In [8]:
df["dataset.loader.parameters.data_name"].unique()

array(['MUTAG', 'NCI109', 'PROTEINS', 'ZINC', nan, 'NCI1'], dtype=object)

In [11]:
df[df["dataset.loader.parameters.data_name"] == "MANTRA_betti_numbers"]

Unnamed: 0,AvgTime/train_batch_mean,AvgTime/train_batch_std,AvgTime/train_epoch_mean,AvgTime/train_epoch_std,AvgTime/val_batch_mean,AvgTime/val_batch_std,AvgTime/val_epoch_mean,AvgTime/val_epoch_std,_runtime,_step,...,transforms.one_hot_node_degree_features.transform_name,transforms.one_hot_node_degree_features.transform_type,transforms.one_hot_node_degree_features.keep_degree_field,optimizer._target_,optimizer.optimizer_id,optimizer.scheduler.scheduler_id,optimizer.scheduler.scheduler_params.gamma,optimizer.scheduler.scheduler_params.step_size,optimizer.parameters.lr,optimizer.parameters.weight_decay


In [12]:
[i for i in df.columns]

['AvgTime/train_batch_mean',
 'AvgTime/train_batch_std',
 'AvgTime/train_epoch_mean',
 'AvgTime/train_epoch_std',
 'AvgTime/val_batch_mean',
 'AvgTime/val_batch_std',
 'AvgTime/val_epoch_mean',
 'AvgTime/val_epoch_std',
 '_runtime',
 '_step',
 '_timestamp',
 '_wandb',
 'epoch',
 'lr-Adam',
 'preprocessor_time',
 'test/accuracy',
 'test/auroc',
 'test/loss',
 'test/precision',
 'test/recall',
 'train/accuracy',
 'train/auroc',
 'train/loss',
 'train/precision',
 'train/recall',
 'trainer/global_step',
 'val/accuracy',
 'val/auroc',
 'val/loss',
 'val/precision',
 'val/recall',
 'loss',
 'seed',
 'tags',
 'test',
 'train',
 'extras',
 'logger',
 'trainer',
 'ckpt_path',
 'evaluator',
 'task_name',
 'model/params/total',
 'model/params/trainable',
 'model/params/non_trainable',
 'test/mae',
 'test/mse',
 'train/mae',
 'train/mse',
 'val/mae',
 'val/mse',
 'model.compile',
 'model._target_',
 'model.model_name',
 'model.model_domain',
 'model.readout._target_',
 'model.readout.hidden_dim',

### Get grouped df

In [32]:
df = df[~(df["dataset.split_params.data_seed"].isna())]

In [10]:
df["model.model_domain"].unique()

array(['cell'], dtype=object)

In [11]:
list(df.columns)

['AvgTime/train_batch_mean',
 'AvgTime/train_batch_std',
 'AvgTime/train_epoch_mean',
 'AvgTime/train_epoch_std',
 'AvgTime/val_batch_mean',
 'AvgTime/val_batch_std',
 'AvgTime/val_epoch_mean',
 'AvgTime/val_epoch_std',
 '_runtime',
 '_step',
 '_timestamp',
 '_wandb',
 'epoch',
 'lr-Adam',
 'preprocessor_time',
 'test/accuracy',
 'test/auroc',
 'test/loss',
 'test/precision',
 'test/recall',
 'train/accuracy',
 'train/auroc',
 'train/loss',
 'train/precision',
 'train/recall',
 'trainer/global_step',
 'val/accuracy',
 'val/auroc',
 'val/loss',
 'val/precision',
 'val/recall',
 'loss',
 'seed',
 'tags',
 'test',
 'train',
 'extras',
 'logger',
 'trainer',
 'ckpt_path',
 'evaluator',
 'task_name',
 'model/params/total',
 'model/params/trainable',
 'model/params/non_trainable',
 'test/mae',
 'test/mse',
 'train/mae',
 'train/mse',
 'val/mae',
 'val/mse',
 'model.compile',
 'model._target_',
 'model.model_name',
 'model.model_domain',
 'model.readout._target_',
 'model.readout.hidden_dim',

In [16]:
changed_params = []
for param in list(df.columns):
    if df[param].dtype == pd.CategoricalDtype:
        continue
    if  len(df[param].unique()) > 1:
        changed_params.append(param)

In [17]:
changed_params

['AvgTime/train_batch_mean',
 'AvgTime/train_batch_std',
 'AvgTime/train_epoch_mean',
 'AvgTime/train_epoch_std',
 'AvgTime/val_batch_mean',
 'AvgTime/val_batch_std',
 '_runtime',
 '_step',
 '_timestamp',
 'epoch',
 'lr-Adam',
 'preprocessor_time',
 'test/accuracy',
 'test/auroc',
 'test/precision',
 'test/recall',
 'train/accuracy',
 'train/auroc',
 'train/precision',
 'train/recall',
 'trainer/global_step',
 'val/accuracy',
 'val/auroc',
 'val/precision',
 'val/recall',
 'seed',
 'model/params/total',
 'model/params/trainable',
 'test/mae',
 'test/mse',
 'model.readout.hidden_dim',
 'model.readout.out_channels',
 'model.backbone.num_layers',
 'model.backbone.in_channels',
 'model.backbone.hidden_channels',
 'model.feature_encoder.max_hop',
 'model.feature_encoder.out_channels',
 'model.feature_encoder.proj_dropout',
 'model.backbone_wrapper.out_channels',
 'model.backbone.heads',
 'dataset.parameters.num_classes',
 'dataset.parameters.max_dim_if_lifted',
 'dataset.split_params.k',
 '

In [41]:
df["model.model_name"]= df['model.backbone._target_']

In [29]:
from ast import literal_eval
df["transforms.sann_encoding.pe_types"] = df["transforms.sann_encoding.pe_types"].str.join(',')

In [33]:
df["transforms.sann_encoding.pe_types"]

0        RWSE,ElstaticPE,HKdiagSE,LapPE
1        RWSE,ElstaticPE,HKdiagSE,LapPE
2        RWSE,ElstaticPE,HKdiagSE,LapPE
3        RWSE,ElstaticPE,HKdiagSE,LapPE
4        RWSE,ElstaticPE,HKdiagSE,LapPE
                      ...              
69204                        ElstaticPE
69205                        ElstaticPE
69206                        ElstaticPE
69207                        ElstaticPE
69208                        ElstaticPE
Name: transforms.sann_encoding.pe_types, Length: 69208, dtype: object

In [42]:
# Extract best results for each model and dataset
# 1. Keep the columns that are necessary for the comparison

sweeped_columns = [
    "transforms.sann_encoding.max_hop",
    "transforms.sann_encoding.max_rank",
    "transforms.sann_encoding.neighborhoods",
    "model.feature_encoder.proj_dropout",
    "model.backbone.hidden_channels",
    "model.backbone.num_layers",
    "model.readout.hidden_dim",
    "model.feature_encoder.out_channels",
    # Others
    "optimizer.parameters.weight_decay",
    "optimizer.parameters.lr",
    "dataset.dataloader_params.batch_size",
    # Additional
    "transforms.sann_encoding.copy_initial",
    "transforms.graph2cell_lifting.max_cell_length",
    "transforms.sann_encoding.is_undirected",
    "transforms.sann_encoding.pe_types",
]
run_columns = ["dataset.split_params.data_seed","seed",]

# Dataset and model columns
dataset_model_columns = ["model.model_name", "model.model_domain", "dataset.loader.parameters.data_name"]

# Performance columns
performance_columns = [
    "val/loss", "test/loss",
    "val/mae", "test/mae",
    "val/mse", "test/mse",
    "val/accuracy", "test/accuracy",
    "val/auroc","test/auroc",
    "val/recall", "test/recall",
    "val/precision", "test/precision",
    ]
keep_columns = dataset_model_columns + sweeped_columns + performance_columns + run_columns
df_keep = df[keep_columns]

In [80]:
def map_correct_dataname(row):
    if row["dataset.loader.parameters.data_name"] ==  "MANTRA_betti_numbers":
        rows = []
        for i in range(3):
            row_dict = row.to_dict()
            row_dict["dataset.loader.parameters.data_name"] = row_dict["dataset.loader.parameters.data_name"] + f"_{i}"
            row_dict["val/f1"] = row_dict[f"val/f1_{i}"]
            row_dict["test/f1"] = row_dict[f"test/f1_{i}"]
            rows.append(pd.DataFrame.from_records([row_dict]))
        return pd.concat(rows)

In [177]:

scores_dfs = []
for i, row in df.iterrows():
    if row["dataset.loader.parameters.data_name"] ==  "MANTRA_betti_numbers":
        rows = []
        for i in range(3):
            row_dict = row.to_dict()
            row_dict["dataset.loader.parameters.data_name"] = row_dict["dataset.loader.parameters.data_name"] + f"_{i}"
            row_dict["val/f1"] = row_dict[f"val/f1_{i}"]
            row_dict["test/f1"] = row_dict[f"test/f1_{i}"]
            rows.append(pd.DataFrame.from_records([row_dict]))
        scores_dfs.append(pd.concat(rows))
scores_df = pd.concat(scores_dfs)

In [178]:
scores_df[["model.model_name", "dataset.loader.parameters.data_name", "val/f1"]]
df = df[df["dataset.loader.parameters.data_name"] != "MANTRA_betti_numbers"]
df = pd.concat([df, scores_df])

In [35]:
performance_classification = [
    "val/accuracy", "test/accuracy",
    "val/auroc","test/auroc",
    "val/loss",
    "val/recall", "test/recall",
    "val/precision", "test/precision",
    ]
performance_split = [
    "val/loss",
    "test/loss",
]
performance_regression = [
    "val/mae", "test/mae",
    "val/mse", "test/mse",
    ]
# Define a dict of dicts for each dataset the corresponding optimization metrics
optimization_metrics = {
    "NCI109": {"optim_metric": "val/accuracy", "eval_metric": "test/accuracy", "direction": "max", "performance_columns": performance_classification},
    "NCI1": {"optim_metric": "val/accuracy", "eval_metric": "test/accuracy", "direction": "max", "performance_columns": performance_classification},
    "PROTEINS": {"optim_metric": "val/accuracy", "eval_metric": "test/accuracy", "direction": "max", "performance_columns": performance_classification},
    "MUTAG": {"optim_metric": "val/accuracy", "eval_metric": "test/accuracy", "direction": "max", "performance_columns": performance_classification},
    "ZINC": {"optim_metric": "val/mae", "eval_metric": "test/mae", "direction": "min", "performance_columns": performance_regression},
    "IMDB-BINARY": {"optim_metric": "val/accuracy", "eval_metric": "test/accuracy", "direction": "max", "performance_columns": performance_classification},
    "IMDB-MULTI": {"optim_metric": "val/accuracy", "eval_metric": "test/accuracy", "direction": "max", "performance_columns": performance_classification},
    "Cora": {"direction": "max", "performance_columns": performance_classification},
    "Citeseer": {"direction": "max", "performance_columns": performance_classification},
    "PubMed": {"direction": "max", "performance_columns": performance_classification},
    "MANTRA_name": {"optim_metric": "val/f1", "eval_metric": "test/f1", "direction": "max", "performance_columns": performance_classification},
    "MANTRA_orientation": {"optim_metric": "val/f1", "eval_metric": "test/f1", "direction": "max", "performance_columns": performance_classification},
    "MANTRA_betti_numbers": {"optim_metric": "val/loss", "eval_metric": "val/loss", "direction": "min", "performance_columns": performance_classification},
    "MANTRA_betti_numbers_0": {"optim_metric": "val/loss", "eval_metric": "test/f1", "direction": "min", "performance_columns": performance_classification},
    "MANTRA_betti_numbers_1": {"optim_metric": "val/loss", "eval_metric": "test/f1", "direction": "min", "performance_columns": performance_classification},
    "MANTRA_betti_numbers_2": {"optim_metric": "val/loss", "eval_metric": "test/f1", "direction": "min", "performance_columns": performance_classification},

}

len(optimization_metrics)

16

In [68]:
collect_subsets=None

In [43]:
search_metric_types = ["accuracy",] #'auroc', 'recall', 'precision']


# Get unique datasets
datasets = list(df["dataset.loader.parameters.data_name"].unique())
# Get unique models
models = list(df["model.model_name"].unique())

best_results = defaultdict(dict)
best_results_all_metrics = defaultdict(dict)
best_runs = defaultdict(dict)
collect_subsets = defaultdict(dict)
collect_bast_parameters = defaultdict(dict)
# Got over each dataset and model and find the best result
for dataset in datasets:
    for model in models:
        # Get the subset of the DataFrame for the current dataset and model
        subset = df_keep[
            (df_keep["dataset.loader.parameters.data_name"] == dataset)
        ]

        optim_metric = optimization_metrics[dataset]["optim_metric"]
        eval_metric = optimization_metrics[dataset]["eval_metric"]
        direction = optimization_metrics[dataset]["direction"]
        
        # Keep metrics that matters for dataset
        performance_columns = optimization_metrics[dataset]["performance_columns"]
        subset = subset[dataset_model_columns + sweeped_columns + performance_columns + run_columns]

        for col in performance_columns:
            subset[col] = subset[col].astype(float)
        aggregated = subset.groupby(sweeped_columns +  ["model.model_name", "model.model_domain"], dropna=False).agg(
            {col: ["mean", "std", "count"] for col in performance_columns},
        )

        # aggregated = subset.groupby(sweeped_columns, dropna=False).count()
        n_count = 5 if "MANTRA" not in dataset else 4
        # Go from MultiIndex to Index
        aggregated = aggregated.reset_index()
        print(f"Dataset: {dataset}, Model: {model}")
        print(aggregated[(eval_metric, "count")].unique())
        #print(aggregated['dataset.split_params.data_seed'].unique())
        print((aggregated[(eval_metric, "count")] >= n_count).sum() / len(aggregated) * 100)
        aggregated = aggregated[aggregated[(eval_metric, "count")] >= n_count]
        #print(len(aggregated[aggregated['seed'] > 4]))
        # aggregated = aggregated.sort_values(
        #         by=(eval_metric, "mean"), ascending=(direction == 'min')
        #     )

        
        # Git percent in case of classification
        if "test/accuracy" in performance_columns:
            # Go over all the performance columns and multiply by 100
            for col in performance_columns:
                aggregated[(col, "mean")] *= 100
                aggregated[(col, "std")] *= 100
            
            # Round performance columns values up to 2 decimal points
            for col in performance_columns:
                aggregated[(col, "mean")] = aggregated[(col, "mean")].round(4)
                aggregated[(col, "std")] = aggregated[(col, "std")].round(4)
            
            
        else:
            # Round all values up to 4 decimal points
            # Round performance columns values up to 4 decimal points
            for col in performance_columns:
                aggregated[(col, "mean")] = aggregated[(col, "mean")].round(4)
                aggregated[(col, "std")] = aggregated[(col, "std")].round(4)
        
        
        collect_subsets[dataset] = aggregated


Dataset: MUTAG, Model: torch_geometric.nn.models.GIN
[5 4]
99.16666666666667
Dataset: MUTAG, Model: torch_geometric.nn.models.GCN
[5 4]
99.16666666666667
Dataset: MUTAG, Model: torch_geometric.nn.models.GAT
[5 4]
99.16666666666667
Dataset: NCI109, Model: torch_geometric.nn.models.GIN
[5 4]
99.8015873015873
Dataset: NCI109, Model: torch_geometric.nn.models.GCN
[5 4]
99.8015873015873
Dataset: NCI109, Model: torch_geometric.nn.models.GAT
[5 4]
99.8015873015873
Dataset: PROTEINS, Model: torch_geometric.nn.models.GIN
[5 4]
99.88425925925925
Dataset: PROTEINS, Model: torch_geometric.nn.models.GCN
[5 4]
99.88425925925925
Dataset: PROTEINS, Model: torch_geometric.nn.models.GAT
[5 4]
99.88425925925925
Dataset: ZINC, Model: torch_geometric.nn.models.GIN
[5 3 1 2 0 4]
97.43589743589743
Dataset: ZINC, Model: torch_geometric.nn.models.GCN
[5 3 1 2 0 4]
97.43589743589743
Dataset: ZINC, Model: torch_geometric.nn.models.GAT
[5 3 1 2 0 4]
97.43589743589743
Dataset: NCI1, Model: torch_geometric.nn.model

In [44]:
collect_subsets["ZINC"][[ "test/mae", 'val/mse', "model.model_name", "model.model_domain", "transforms.sann_encoding.neighborhoods", "transforms.sann_encoding.pe_types"]].sort_values(by=("val/mse", "mean"), ascending=True)

Unnamed: 0_level_0,test/mae,test/mae,test/mae,val/mse,val/mse,val/mse,model.model_name,model.model_domain,transforms.sann_encoding.neighborhoods,transforms.sann_encoding.pe_types
Unnamed: 0_level_1,mean,std,count,mean,std,count,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
558,0.4110,0.0197,5,0.8983,0.0697,5,torch_geometric.nn.models.GIN,cell,"['up_adjacency-0', 'down_incidence-1']","RWSE,ElstaticPE,HKdiagSE,LapPE"
534,0.4597,0.0582,5,0.9300,0.1467,5,torch_geometric.nn.models.GIN,cell,"['up_adjacency-0', 'down_incidence-1']","RWSE,ElstaticPE,HKdiagSE,LapPE"
556,0.4480,0.0972,5,0.9338,0.1978,5,torch_geometric.nn.models.GIN,cell,"['up_adjacency-0', 'down_incidence-1']","RWSE,ElstaticPE,HKdiagSE,LapPE"
530,0.4933,0.0076,5,1.0176,0.0506,5,torch_geometric.nn.models.GIN,cell,"['up_adjacency-0', 'down_incidence-1']","RWSE,ElstaticPE,HKdiagSE,LapPE"
536,0.5124,0.0988,5,1.0223,0.2369,5,torch_geometric.nn.models.GIN,cell,"['up_adjacency-0', 'down_incidence-1']","RWSE,ElstaticPE,HKdiagSE,LapPE"
...,...,...,...,...,...,...,...,...,...,...
517,0.5322,0.0094,5,1.7333,0.6131,5,torch_geometric.nn.models.GIN,cell,['up_adjacency-0'],RWSE
611,0.6729,0.0132,5,1.7459,0.2260,5,torch_geometric.nn.models.GIN,cell,['up_adjacency-0'],"RWSE,ElstaticPE,HKdiagSE,LapPE"
596,0.7593,0.4299,5,1.8078,1.3852,5,torch_geometric.nn.models.GIN,cell,['up_adjacency-0'],"RWSE,ElstaticPE,HKdiagSE,LapPE"
573,0.8254,0.4016,5,1.9064,1.1301,5,torch_geometric.nn.models.GIN,cell,"['up_adjacency-0', 'down_incidence-1']","RWSE,ElstaticPE,HKdiagSE,LapPE"


In [45]:
collect_subsets["MUTAG"][[ "val/accuracy", "test/accuracy", "model.model_name", "transforms.sann_encoding.pe_types"]].sort_values(by=("val/accuracy", "mean"), ascending=False)


Unnamed: 0_level_0,val/accuracy,val/accuracy,val/accuracy,test/accuracy,test/accuracy,test/accuracy,model.model_name,transforms.sann_encoding.pe_types
Unnamed: 0_level_1,mean,std,count,mean,std,count,Unnamed: 7_level_1,Unnamed: 8_level_1
3374,84.6809,5.0797,5,82.9787,5.2117,5,torch_geometric.nn.models.GIN,ElstaticPE
2504,84.6809,5.5073,5,80.0000,5.7486,5,torch_geometric.nn.models.GIN,LapPE
3377,84.2553,5.3404,5,81.2766,7.5824,5,torch_geometric.nn.models.GIN,HKdiagSE
836,84.2553,5.9422,5,79.5745,5.3404,5,torch_geometric.nn.models.GIN,LapPE
1685,84.2553,5.9422,5,79.5745,5.3404,5,torch_geometric.nn.models.GIN,HKdiagSE
...,...,...,...,...,...,...,...,...
4064,48.5106,18.8751,5,65.1064,7.4620,5,torch_geometric.nn.models.GIN,"RWSE,ElstaticPE,HKdiagSE,LapPE"
4280,48.5106,18.8751,5,65.1064,7.4620,5,torch_geometric.nn.models.GIN,"RWSE,ElstaticPE,HKdiagSE,LapPE"
4283,48.5106,18.8751,5,65.1064,7.4620,5,torch_geometric.nn.models.GIN,"RWSE,ElstaticPE,HKdiagSE,LapPE"
1709,47.2340,18.1413,5,66.3830,8.2951,5,torch_geometric.nn.models.GIN,HKdiagSE


# Plot best models

In [13]:
def inc_list_to_name(inc_list):
    inc_list = eval(inc_list)
    inc_name = ""
    for inc in inc_list:
        inc_num = inc.split("-")
        inc_val = int(inc_num[0]) if len(inc_num) == 3 else 1
        dim = int(inc_num[-1])

        key = ""
        if "incidence" in inc:
            if "up" in inc:
                key = f"U_{dim}_{dim+inc_val}"
            elif "down" in inc:
                key = f"L_{dim-inc_val}_{dim}"
            else:
                raise Exception("Unknown NHBD")
        elif "adjacency" in inc:
            key = "A_"
            if "up" in inc or "down" in inc:
                key = f"A_{dim}"
            else:
                raise Exception("Unknown NHBD")
        inc_name += key + ","
    return inc_name

In [74]:
for dataset in datasets:
    agg_sub = collect_subsets[dataset].copy()
    eval_metric = optimization_metrics[dataset]["eval_metric"]
    optim_dir = optimization_metrics[dataset]["direction"]
    print(dataset, optim_dir)
    agg_sub.sort_values(by=(eval_metric,"mean"), ascending=(optim_dir == "min"), inplace=True)
    agg_subset = agg_sub[:10].copy()

    cols = [
        "transforms.sann_encoding.neighborhoods",
        "transforms.sann_encoding.pretrain_model",
        "model.backbone.n_layers",
        "model.model_domain",
        "model.feature_encoder.proj_dropout",
        "model.feature_encoder.out_channels",
        "optimizer.parameters.weight_decay",
        "optimizer.parameters.lr",
        "dataset.dataloader_params.batch_size"]

    # iterate over rows
    model_names = []
    for index, row in agg_subset.iterrows():
        m_name = row["model.model_name"].item()
        # Get values of the row
        values = [row[col].item() for col in cols]
        is_sann = "SANN" in m_name

        model_name=f"{m_name}|"
        for col, value in zip(cols, values, strict=False):
            if col == "transforms.sann_encoding.max_hop" and is_sann:
                model_name += f"Hop={value}|"
            elif col == "model.backbone.n_layers":
                model_name += f"L={value}|"
            elif col == "transforms.sann_encoding.neighborhoods" and not is_sann:
                model_name += f"NH={inc_list_to_name(value)}|"
            elif col == "model.feature_encoder.out_channels":
                model_name += f"OCs={value}|"
            elif col == "optimizer.parameters.lr":
                model_name += f"LR={value}|"
            elif col == "optimizer.parameters.weight_decay":
                model_name += f"WD={value}|"
            elif col == "dataset.dataloader_params.batch_size":
                model_name += f"BS={value}|"
        model_names.append(model_name)
    agg_subset["Model_name"] = model_names
    
    # Plotting
    plt.figure(figsize=(12, 6))
    model_names = agg_subset["Model_name"]
    
    accuracy_means = np.array(agg_subset[(eval_metric,"mean")])
    accuracy_stds = np.array(agg_subset[(eval_metric,"std")])
    bars = plt.bar(model_names, accuracy_means, yerr=accuracy_stds, capsize=5, color="skyblue")

    accuracy_means = np.array(agg_subset[(eval_metric,"mean")])
    # Adding data labels on top of bars
    for bar, mean in zip(bars, accuracy_means, strict=False):
        plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.2, f"{mean:.1f}", ha="center", va="bottom")
    #plt.xticks(rotation=45)
    plt.title(f"Model performance for {dataset} dataset")
    plt.ylabel(f"Test {eval_metric}")
    plt.tight_layout()
    delta = agg_subset[(eval_metric,"std")].max() + 1
    plt.ylim((agg_subset[(eval_metric,"mean")].min()-delta).round(), (agg_subset[(eval_metric,"mean")].max()+delta).round())
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    # Rotate x-axis labels
    plt.xticks(rotation=45, ha="right", fontsize=8)

    plt.savefig(f"figures/{dataset}_performance.png",dpi=300, bbox_inches = "tight")
    plt.close()

PROTEINS max
NCI109 max
NCI1 max
IMDB-BINARY max
ZINC min
IMDB-MULTI max


In [75]:
for dataset in datasets:
    aggregated = collect_subsets[dataset]
    for m_name in aggregated["model.model_name"].unique():
        agg_sub = aggregated[aggregated["model.model_name"]==m_name].copy()
        optim_metric = optimization_metrics[dataset]["eval_metric"]
        optim_dir = optimization_metrics[dataset]["direction"]
        agg_sub.sort_values(by=(optim_metric,"mean"), ascending=(optim_dir == "min"), inplace=True)
        agg_subset = agg_sub[:10].copy()

        cols = [
            "transforms.sann_encoding.neighborhoods",
            "transforms.sann_encoding.pretrain_model",
            "model.backbone.n_layers",
            "model.model_domain",
            "model.feature_encoder.out_channels",
            "optimizer.parameters.weight_decay",
            "optimizer.parameters.lr",
            "model.feature_encoder.proj_dropout",
            "dataset.dataloader_params.batch_size"]

        # iterate over rows
        model_names = []
        for index, row in agg_subset.iterrows():
            # Get values of the row
            values = [row[col].item() for col in cols]
            is_sann = "SANN" in m_name

            model_name=f"{m_name}|"
            for col, value in zip(cols, values, strict=False):
                if col == "transforms.sann_encoding.max_hop" and is_sann:
                    model_name += f"Hop={value}|"
                elif col == "model.backbone.n_layers":
                    model_name += f"L={value}|"
                elif col == "transforms.sann_encoding.neighborhoods" and not is_sann:
                    model_name += f"NH={inc_list_to_name(value)}|"
                elif col == "model.feature_encoder.out_channels":
                    model_name += f"OCs={value}|"
                elif col == "optimizer.parameters.lr":
                    model_name += f"LR={value}|"
                elif col == "optimizer.parameters.weight_decay":
                    model_name += f"WD={value}|"
                elif col == "dataset.dataloader_params.batch_size":
                    model_name += f"BS={value}|"
            model_names.append(model_name)
        agg_subset["Model_name"] = model_names
        
        # Plotting
        plt.figure(figsize=(12, 6))
        model_names = agg_subset["Model_name"]
        
        accuracy_means = np.array(agg_subset[(optim_metric,"mean")])
        accuracy_stds = np.array(agg_subset[(optim_metric,"std")])
        bars = plt.bar(model_names, accuracy_means, yerr=accuracy_stds, capsize=5, color="skyblue")

        accuracy_means = np.array(agg_subset[(optim_metric,"mean")])
        # Adding data labels on top of bars
        for bar, mean in zip(bars, accuracy_means, strict=False):
            plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.2, f"{mean:.1f}", ha="center", va="bottom")
        #plt.xticks(rotation=45)
        plt.title(f"Model performance for {dataset} dataset")
        plt.ylabel("Test accuracy")
        plt.tight_layout()
        delta = agg_subset[(optim_metric,"std")].max() + 1
        plt.ylim((agg_subset[(optim_metric,"mean")].min()-delta).round(), (agg_subset[(optim_metric,"mean")].max()+delta).round())
        plt.grid(axis="y", linestyle="--", alpha=0.7)
        # Rotate x-axis labels
        plt.xticks(rotation=45, ha="right", fontsize=8)

        plt.savefig(f"figures/{m_name}_performance_{dataset}.png",dpi=300, bbox_inches = "tight")
        plt.close()

In [14]:
df_dict = {
    "model": [],
    "dataset": [],
    "mean": [],
    "std": [],
    "domain": []
}

for dataset in datasets:
    aggregated = collect_subsets[dataset]
    for m_name in aggregated["model.model_name"].unique():
        for domain in aggregated["model.model_domain"].unique():
            agg_sub = aggregated[(aggregated["model.model_name"]==m_name) & (aggregated["model.model_domain"] == domain)].copy()
            optim_metric = optimization_metrics[dataset]["optim_metric"]
            eval_metric = optimization_metrics[dataset]["eval_metric"]
            optim_dir = optimization_metrics[dataset]["direction"]
            agg_sub.sort_values(by=(optim_metric,"mean"), ascending=(optim_dir == "min"), inplace=True)

            df_dict["domain"].append(agg_sub.iloc[0]["model.model_domain"].item())
            df_dict["model"].append(m_name)
            df_dict["dataset"].append(dataset)
            df_dict["mean"].append(agg_sub.iloc[0][(eval_metric, "mean")])
            df_dict["std"].append(agg_sub.iloc[0][(eval_metric, "std")])
df_res = pd.DataFrame(df_dict)

In [67]:
data = []
data.extend([
    {"model": "GCCN with GAT", "domain": "cell", "dataset": "MUTAG", "mean": 83.40, "std": 4.85},
    {"model": "GCCN with GAT", "domain": "cell", "dataset": "PROTEINS", "mean": 74.05, "std": 2.16},
    {"model": "GCCN with GAT", "domain": "cell", "dataset": "NCI1", "mean": 76.11, "std": 1.69},
    {"model": "GCCN with GAT", "domain": "cell", "dataset": "NCI109", "mean": 75.62, "std": 0.76},
    {"model": "GCCN with GAT", "domain": "cell", "dataset": "ZINC", "mean": 0.38, "std": 0.03},
    # {'model': 'Cell with GAT', 'domain': 'cell', 'dataset': 'Cora', 'mean': 88.39, 'std': 0.65},
    # {'model': 'Cell with GAT', 'domain': 'cell', 'dataset': 'Citeseer', 'mean': 74.62, 'std': 1.95},
    # {'model': 'Cell with GAT', 'domain': 'cell', 'dataset': 'PubMed', 'mean': 87.68, 'std': 0.33},
    
    {"model": "GCCN with GCN", "domain": "cell", "dataset": "MUTAG", "mean": 85.11, "std": 6.73},
    {"model": "GCCN with GCN", "domain": "cell", "dataset": "PROTEINS", "mean": 74.41, "std": 1.77},
    {"model": "GCCN with GCN", "domain": "cell", "dataset": "NCI1", "mean": 76.42, "std": 1.67},
    {"model": "GCCN with GCN", "domain": "cell", "dataset": "NCI109", "mean": 75.62, "std": 0.94},
    {"model": "GCCN with GCN", "domain": "cell", "dataset": "ZINC", "mean": 0.36, "std": 0.01},
    # {'model': 'Cell with GCN', 'domain': 'cell', 'dataset': 'Cora', 'mean': 88.51, 'std': 0.70},
    # {'model': 'Cell with GCN', 'domain': 'cell', 'dataset': 'Citeseer', 'mean': 75.41, 'std': 2.00},
    # {'model': 'Cell with GCN', 'domain': 'cell', 'dataset': 'PubMed', 'mean': 88.18, 'std': 0.26},
    
    {"model": "GCCN with GIN", "domain": "cell", "dataset": "MUTAG", "mean": 86.38, "std": 6.49},
    {"model": "GCCN with GIN", "domain": "cell", "dataset": "PROTEINS", "mean": 72.54, "std": 3.07},
    {"model": "GCCN with GIN", "domain": "cell", "dataset": "NCI1", "mean": 77.65, "std": 1.11},
    {"model": "GCCN with GIN", "domain": "cell", "dataset": "NCI109", "mean": 77.19, "std": 0.21},
    {"model": "GCCN with GIN", "domain": "cell", "dataset": "ZINC", "mean": 0.19, "std": 0.00},
    # {'model': 'Cell with GIN', 'domain': 'cell', 'dataset': 'Cora', 'mean': 87.42, 'std': 1.85},
    # {'model': 'Cell with GIN', 'domain': 'cell', 'dataset': 'Citeseer', 'mean': 75.13, 'std': 1.17},
    # {'model': 'Cell with GIN', 'domain': 'cell', 'dataset': 'PubMed', 'mean': 88.47, 'std': 0.27},
    
    {"model": "GCCN with GraphSAGE", "domain": "cell", "dataset": "MUTAG", "mean": 85.53, "std": 6.80},
    {"model": "GCCN with GraphSAGE", "domain": "cell", "dataset": "PROTEINS", "mean": 73.62, "std": 2.72},
    {"model": "GCCN with GraphSAGE", "domain": "cell", "dataset": "NCI1", "mean": 78.23, "std": 1.47},
    {"model": "GCCN with GraphSAGE", "domain": "cell", "dataset": "NCI109", "mean": 77.10, "std": 0.83},
    {"model": "GCCN with GraphSAGE", "domain": "cell", "dataset": "ZINC", "mean": 0.24, "std": 0.00},
    # {'model': 'Cell with GraphSAGE', 'domain': 'cell', 'dataset': 'Cora', 'mean': 88.57, 'std': 0.58},
    # {'model': 'Cell with GraphSAGE', 'domain': 'cell', 'dataset': 'Citeseer', 'mean': 75.89, 'std': 1.84},
    # {'model': 'Cell with GraphSAGE', 'domain': 'cell', 'dataset': 'PubMed', 'mean': 89.40, 'std': 0.57},
    
    {"model": "GCCN with Transformer", "domain": "cell", "dataset": "MUTAG", "mean": 83.83, "std": 6.49},
    {"model": "GCCN with Transformer", "domain": "cell", "dataset": "PROTEINS", "mean": 70.97, "std": 4.06},
    {"model": "GCCN with Transformer", "domain": "cell", "dataset": "NCI1", "mean": 73.00, "std": 1.37},
    {"model": "GCCN with Transformer", "domain": "cell", "dataset": "NCI109", "mean": 73.20, "std": 1.05},
    {"model": "GCCN with Transformer", "domain": "cell", "dataset": "ZINC", "mean": 0.45, "std": 0.02},
    # {'model': 'Cell with Transformer', 'domain': 'cell', 'dataset': 'Cora', 'mean': 84.61, 'std': 1.32},
    # {'model': 'Cell with Transformer', 'domain': 'cell', 'dataset': 'Citeseer', 'mean': 75.05, 'std': 1.67},
    # {'model': 'Cell with Transformer', 'domain': 'cell', 'dataset': 'PubMed', 'mean': 88.37, 'std': 0.22},
    
    {"model": "GCCN with Hasse", "domain": "cell", "dataset": "MUTAG", "mean": 85.96, "std": 7.15},
    {"model": "GCCN with Hasse", "domain": "cell", "dataset": "PROTEINS", "mean": 73.73, "std": 2.95},
    {"model": "GCCN with Hasse", "domain": "cell", "dataset": "NCI1", "mean": 76.75, "std": 1.63},
    {"model": "GCCN with Hasse", "domain": "cell", "dataset": "NCI109", "mean": 76.94, "std": 0.82},
    {"model": "GCCN with Hasse", "domain": "cell", "dataset": "ZINC", "mean": 0.31, "std": 0.01},
    # {'model': 'Cell with Hasse', 'domain': 'cell', 'dataset': 'Cora', 'mean': 87.24, 'std': 0.58},
    # {'model': 'Cell with Hasse', 'domain': 'cell', 'dataset': 'Citeseer', 'mean': 74.26, 'std': 1.47},
    # {'model': 'Cell with Hasse', 'domain': 'cell', 'dataset': 'PubMed', 'mean': 88.65, 'std': 0.55},
])

# Simplicial models
data.extend([
    {"model": "GCCN with GAT", "domain": "simplicial", "dataset": "MUTAG", "mean": 79.15, "std": 4.09},
    {"model": "GCCN with GAT", "domain": "simplicial", "dataset": "PROTEINS", "mean": 74.62, "std": 1.95},
    {"model": "GCCN with GAT", "domain": "simplicial", "dataset": "NCI1", "mean": 74.86, "std": 1.42},
    {"model": "GCCN with GAT", "domain": "simplicial", "dataset": "NCI109", "mean": 74.81, "std": 1.14},
    {"model": "GCCN with GAT", "domain": "simplicial", "dataset": "ZINC", "mean": 0.57, "std": 0.03},
    # {'model': 'GCCN with GAT', 'domain': 'simplicial', 'dataset': 'Cora', 'mean': 88.33, 'std': 0.67},
    # {'model': 'GCCN with GAT', 'domain': 'simplicial', 'dataset': 'Citeseer', 'mean': 74.65, 'std': 1.93},
    # {'model': 'GCCN with GAT', 'domain': 'simplicial', 'dataset': 'PubMed', 'mean': 87.72, 'std': 0.36},
    
    {"model": "GCCN with GCN", "domain": "simplicial", "dataset": "MUTAG", "mean": 74.04, "std": 8.30},
    {"model": "GCCN with GCN", "domain": "simplicial", "dataset": "PROTEINS", "mean": 74.91, "std": 2.51},
    {"model": "GCCN with GCN", "domain": "simplicial", "dataset": "NCI1", "mean": 74.20, "std": 2.17},
    {"model": "GCCN with GCN", "domain": "simplicial", "dataset": "NCI109", "mean": 74.13, "std": 0.53},
    {"model": "GCCN with GCN", "domain": "simplicial", "dataset": "ZINC", "mean": 0.53, "std": 0.05},
    # {'model': 'GCN', 'domain': 'simplicial', 'dataset': 'Cora', 'mean': 88.51, 'std': 0.70},
    # {'model': 'GCN', 'domain': 'simplicial', 'dataset': 'Citeseer', 'mean': 75.41, 'std': 2.00},
    # {'model': 'GCN', 'domain': 'simplicial', 'dataset': 'PubMed', 'mean': 88.19, 'std': 0.24},
    
    {"model": "GCCN with GIN", "domain": "simplicial", "dataset": "MUTAG", "mean": 85.96, "std": 4.66},
    {"model": "GCCN with GIN", "domain": "simplicial", "dataset": "PROTEINS", "mean": 72.83, "std": 2.72},
    {"model": "GCCN with GIN", "domain": "simplicial", "dataset": "NCI1", "mean": 76.67, "std": 1.62},
    {"model": "GCCN with GIN", "domain": "simplicial", "dataset": "NCI109", "mean": 75.76, "std": 1.28},
    {"model": "GCCN with GIN", "domain": "simplicial", "dataset": "ZINC", "mean": 0.35, "std": 0.01},
    # {'model': 'GIN', 'domain': 'simplicial', 'dataset': 'Cora', 'mean': 87.27, 'std': 1.63},
    # {'model': 'GIN', 'domain': 'simplicial', 'dataset': 'Citeseer', 'mean': 75.05, 'std': 1.27},
    # {'model': 'GIN', 'domain': 'simplicial', 'dataset': 'PubMed', 'mean': 88.54, 'std': 0.21},
    
    {"model": "GCCN with GraphSAGE", "domain": "simplicial", "dataset": "MUTAG", "mean": 75.74, "std": 2.43},
    {"model": "GCCN with GraphSAGE", "domain": "simplicial", "dataset": "PROTEINS", "mean": 74.70, "std": 3.10},
    {"model": "GCCN with GraphSAGE", "domain": "simplicial", "dataset": "NCI1", "mean": 76.85, "std": 1.50},
    {"model": "GCCN with GraphSAGE", "domain": "simplicial", "dataset": "NCI109", "mean": 75.64, "std": 1.94},
    {"model": "GCCN with GraphSAGE", "domain": "simplicial", "dataset": "ZINC", "mean": 0.50, "std": 0.02},
    # {'model': 'GraphSAGE', 'domain': 'simplicial', 'dataset': 'Cora', 'mean': 88.57, 'std': 0.59},
    # {'model': 'GraphSAGE', 'domain': 'simplicial', 'dataset': 'Citeseer', 'mean': 75.92, 'std': 1.85},
    # {'model': 'GraphSAGE', 'domain': 'simplicial', 'dataset': 'PubMed', 'mean': 89.34, 'std': 0.39},
    
    {"model": "GCCN with Transformer", "domain": "simplicial", "dataset": "MUTAG", "mean": 74.04, "std": 4.09},
    {"model": "GCCN with Transformer", "domain": "simplicial", "dataset": "PROTEINS", "mean": 70.97, "std": 4.06},
    {"model": "GCCN with Transformer", "domain": "simplicial", "dataset": "NCI1", "mean": 70.39, "std": 0.96},
    {"model": "GCCN with Transformer", "domain": "simplicial", "dataset": "NCI109", "mean": 69.99, "std": 1.13},
    {"model": "GCCN with Transformer", "domain": "simplicial", "dataset": "ZINC", "mean": 0.64, "std": 0.01},
    # {'model': 'Transformer', 'domain': 'simplicial', 'dataset': 'Cora', 'mean': 84.40, 'std': 1.16},
    # {'model': 'Transformer', 'domain': 'simplicial', 'dataset': 'Citeseer', 'mean': 74.60, 'std': 1.88},
    # {'model': 'Transformer', 'domain': 'simplicial', 'dataset': 'PubMed', 'mean': 88.55, 'std': 0.39},
    
    {"model": "GCCN with Hasse", "domain": "simplicial", "dataset": "MUTAG", "mean": 74.04, "std": 5.51},
    {"model": "GCCN with Hasse", "domain": "simplicial", "dataset": "PROTEINS", "mean": 74.48, "std": 1.89},
    {"model": "GCCN with Hasse", "domain": "simplicial", "dataset": "NCI1", "mean": 75.02, "std": 2.24},
    {"model": "GCCN with Hasse", "domain": "simplicial", "dataset": "NCI109", "mean": 73.91, "std": 3.90},
    {"model": "GCCN with Hasse", "domain": "simplicial", "dataset": "ZINC", "mean": 0.56, "std": 0.02},
    # {'model': 'Simplicial with Hasse', 'domain': 'simplicial', 'dataset': 'Cora', 'mean': 87.56, 'std': 0.66},
    # {'model': 'Simplicial with Hasse', 'domain': 'simplicial', 'dataset': 'Citeseer', 'mean': 74.50, 'std': 1.61},
    # {'model': 'Simplicial with Hasse', 'domain': 'simplicial', 'dataset': 'PubMed', 'mean': 88.61, 'std': 0.27},
])
topotune_df = pd.DataFrame(data)

In [68]:
# Define the raw table data
raw_table_data = {
    # Format: {dataset: [(method, mean, std), ...]}
    # 'Cora': [
    #     ('CWN', 74.95, 0.98),
    #     ('CCCN', 86.32, 1.38),
    #     ('SCCNN', 87.44, 1.17),
    #     ('SCN', 87.68, 1.17),
    #     ('DR_cell', 82.19, 1.07),
    #     ('SDP_cell', 80.65, 2.39),
    #     ('DR_simplicial', 82.27, 1.34),
    #     ('SDP_simplicial', 79.91, 1.18)
    # ],
    # 'Citeseer': [
    #     ('CWN', 70.49, 2.85),
    #     ('CCCN', 75.20, 1.82),
    #     ('SCCNN', 75.63, 1.58),
    #     ('SCN', 74.91, 1.25),
    #     ('DR_cell', 70.23, 2.69),
    #     ('SDP_cell', 69.03, 2.01),
    #     ('DR_simplicial', 71.24, 1.68),
    #     ('SDP_simplicial', 70.40, 1.53)
    # ],
    # 'PubMed': [
    #     ('CWN', 86.94, 0.68),
    #     ('CCCN', 88.64, 0.36),
    #     ('SCCNN', 88.52, 0.44),
    #     ('SCN', 88.67, 0.39),
    #     ('DR_cell', 88.18, 0.32),
    #     ('SDP_cell', 87.78, 0.58),
    #     ('DR_simplicial', 88.72, 0.50),
    #     ('SDP_simplicial', 88.62, 0.44)
    # ],
    "MUTAG": [
        ("CWN", 69.68, 8.58),
        ("CCCN", 80.43, 1.78),
        ("SCCNN", 80.85, 5.42),
        ("SCN", 77.02, 9.32),
        ("DR_cell", 76.17, 6.63),
        ("SDP_cell", 70.64, 3.16),
        ("DR_simplicial", 71.49, 2.43),
        ("SDP_simplicial", 73.62, 6.13)
    ],
    "PROTEINS": [
        ("CWN", 76.13, 1.80),
        ("CCCN", 76.13, 2.70),
        ("SCCNN", 73.55, 3.43),
        ("SCN", 73.33, 2.30),
        ("DR_cell", 74.19, 2.86),
        ("SDP_cell", 74.98, 1.92),
        ("DR_simplicial", 75.27, 2.14),
        ("SDP_simplicial", 74.77, 1.69)
    ],
    "NCI1": [
        ("CWN", 68.52, 0.51),
        ("CCCN", 73.93, 1.87),
        ("SCCNN", 76.67, 1.48),
        ("SCN", 77.65, 1.28),
        ("DR_cell", 76.60, 1.75),
        ("SDP_cell", 75.60, 2.45),
        ("DR_simplicial", 75.27, 1.57),
        ("SDP_simplicial", 74.49, 1.03)
    ],
    "NCI109": [
        ("CWN", 68.19, 0.65),
        ("CCCN", 73.80, 2.06),
        ("SCCNN", 75.35, 1.50),
        ("SCN", 74.83, 1.18),
        ("DR_cell", 77.12, 1.07),
        ("SDP_cell", 75.43, 1.94),
        ("DR_simplicial", 74.58, 1.29),
        ("SDP_simplicial", 75.70, 1.04)
    ],
    "ZINC": [
        ("CWN", 0.70, 0.00),
        ("CCCN", 0.34, 0.01),
        ("SCCNN", 0.35, 0.02),
        ("SCN", 0.34, 0.02),
        ("DR_cell", 0.36, 0.01),
        ("SDP_cell", 0.36, 0.02),
        ("DR_simplicial", 0.59, 0.01),
        ("SDP_simplicial", 0.53, 0.04)
    ]
}


# Process the data to select the best of DR and SDP for each method
additional_data = []

for dataset, entries in raw_table_data.items():
    optim_dir = optimization_metrics[dataset]["direction"]
    
    # Group data by method prefix (before the underscore)
    method_results = {}
    standard_methods = []
    dr_sdp_methods = {"cell": {}, "simplicial": {}}
    
    for method, mean, std in entries:
        if method in ["CWN", "CCCN", "SCCNN", "SCN"]:
            # These are the standard methods - just add them directly
            standard_methods.append({"method": method, "dataset": dataset, "mean": mean, "std": std})
        elif method.startswith("DR_") or method.startswith("SDP_"):
            # These are DR or SDP methods - group by domain
            method_type, domain = method.split("_")
            dr_sdp_methods[domain][method_type] = {"mean": mean, "std": std}
    
    # Add all standard methods
    additional_data.extend(standard_methods)
    
    # Select the best of DR/SDP for each domain
    for domain in ["cell", "simplicial"]:
        if "DR" in dr_sdp_methods[domain] and "SDP" in dr_sdp_methods[domain]:
            dr_result = dr_sdp_methods[domain]["DR"]
            sdp_result = dr_sdp_methods[domain]["SDP"]
            
            # Determine which is better based on optimization direction
            if optim_dir == "max":
                if dr_result["mean"] >= sdp_result["mean"]:
                    best_method = "DR"
                    best_result = dr_result
                else:
                    best_method = "SDP"
                    best_result = sdp_result
            else:  # min direction
                if dr_result["mean"] <= sdp_result["mean"]:
                    best_method = "DR"
                    best_result = dr_result
                else:
                    best_method = "SDP"
                    best_result = sdp_result
            
            # Add the best result
            method_name = f"{best_method}_{domain}"
            additional_data.append({
                "method": method_name,
                "dataset": dataset,
                "mean": best_result["mean"],
                "std": best_result["std"]
            })


# Map method names to their proper format in the data
method_mappings = {
    "CWN": "Cell CWN",
    "CCCN": "CCNN",  # Cell CCNN
    "SCCNN": "Simplicial CCNN",
    "SCN": "Simplicial SCN",
    "DR_cell": "Cell DR",
    "SDP_cell": "Cell SDP",
    "DR_simplicial": "DR",
    "SDP_simplicial": "SDP"
}

# Domain mappings
domain_mappings = {
    "CWN": "cell",
    "CCCN": "cell",
    "SCCNN": "simplicial",
    "SCN": "simplicial",
    "DR_cell": "cell",
    "SDP_cell": "cell",
    "DR_simplicial": "simplicial",
    "SDP_simplicial": "simplicial"
}

# For display purposes, we'll use simplified method names
display_mappings = {
    "DR_cell": "DR",
    "SDP_cell": "SDP",
    "DR_simplicial": "DR",
    "SDP_simplicial": "SDP"
}

# Convert the additional data to the proper format for the dataframe
formatted_data = []
for item in additional_data:
    formatted_data.append({
        "model": method_mappings[item["method"]],
        "domain": domain_mappings[item["method"]],
        "dataset": item["dataset"],
        "mean": item["mean"],
        "std": item["std"]
    })

# This data can now be added to your existing dataframe or used to create a new one
tbx_df = pd.DataFrame(formatted_data)

In [69]:
final_add_df = pd.concat([df_res, topotune_df, tbx_df], ignore_index=True)

In [70]:
final_add_df[final_add_df.duplicated(subset=["model", "dataset", "domain"], keep=False)]

Unnamed: 0,model,dataset,mean,std,domain


In [63]:
def generate_latex_table(df):
    """
    Generates a LaTeX table with methods as rows and datasets as columns.
    For models containing "with", splits into first word (model) and last word (variant).
    
    Parameters:
    df (pandas.DataFrame): DataFrame with columns 'model', 'domain', 'dataset', 'mean', and 'std'
    
    Returns:
    tuple: (LaTeX preamble, LaTeX table code)
    """
    # Ensure df has required columns
    required_columns = ["model", "domain", "dataset", "mean", "std"]
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"DataFrame must contain column: {col}")
    
    # Remove duplicates and show warning if any are found
    duplicates = df.duplicated(subset=["model", "domain", "dataset"], keep=False)
    if duplicates.any():
        print(f"Warning: Found {duplicates.sum()} duplicate entries. Using the first occurrence.")
        df = df.drop_duplicates(subset=["model", "domain", "dataset"], keep="first")
    
    
    # Instead of pivot, create a dictionary structure directly
    data_dict = {}
    
    # Group by domain, then model, then dataset
    for _, row in df.iterrows():
        domain = row["domain"]
        model = row["model"]
        dataset = row["dataset"]
        mean = row["mean"]
        std = row["std"]
        
        # Initialize nested dictionaries if they don't exist
        if domain not in data_dict:
            data_dict[domain] = {}
        if model not in data_dict[domain]:
            data_dict[domain][model] = {}
        
        # Store the mean and std values
        data_dict[domain][model][dataset] = {"mean": mean, "std": std}
    
    # Get sorted lists of unique values
    domains = sorted(data_dict.keys())
    datasets = sorted(df["dataset"].unique())
    
    # Find best results for each dataset
    best_results = {}
    
    for dataset in datasets:
        all_means = []
        for domain in domains:
            for model in data_dict[domain]:
                if dataset in data_dict[domain][model]:
                    all_means.append(data_dict[domain][model][dataset]["mean"])
        
        if all_means:
            optim_dir = optimization_metrics.get(dataset, {"direction": "max"})["direction"]
            if optim_dir == "max":
                best_results[dataset] = max(all_means)
            else:
                best_results[dataset] = min(all_means)
    
    # Helper function to process model names for display
    def process_model_name(model_name, domain):
        # Remove domain prefix if present
        domain_prefix = domain.capitalize() + " "
        if model_name.startswith(domain_prefix):
            model_name = model_name[len(domain_prefix):]
        
        # For models with "with", split into main model and variant
        if " with " in model_name:
            parts = model_name.split()
            main_model = parts[0]  # First word (usually GCCN)
            variant = parts[-1]    # Last word after "with"
            return main_model, variant
        elif "GPSE" in model_name:
            # Special handling for GPSE
            return "GPSE", model_name.replace("GPSE", "").strip()
        else:
            # No splitting needed
            return model_name, ""
    
    # Function to generate a LaTeX row for a model
    def generate_model_row(model, domain, is_first_in_group=False, group_name=""):
        # Process model name
        main_model, variant = process_model_name(model, domain)
        
        # Start row
        if is_first_in_group:
            row = f"\\multirow{{1}}{{*}}{{{group_name}}} & "
        else:
            row = " & "
        
        # Add variant name
        row += f"\\emph{{{variant if variant else main_model}}}"
        
        # Add data for each dataset
        for dataset in datasets:
            if dataset in data_dict[domain][model]:
                mean = data_dict[domain][model][dataset]["mean"]
                std = data_dict[domain][model][dataset]["std"]
                
                # Format based on whether it's the best result
                if dataset in best_results and np.isclose(mean, best_results[dataset]):
                    row += f" & \\cellcolor{{gray!30}}\\textbf{{{mean:.3f}}} \\scriptsize{{±{std:.3f}}}"
                else:
                    # Check if within one standard deviation of best
                    optim_dir = optimization_metrics.get(dataset, {"direction": "max"})["direction"]
                    threshold = best_results[dataset] - std if dataset in best_results else None
                    
                    if threshold is not None:
                        if (optim_dir == "max" and mean >= threshold) or (optim_dir == "min" and mean <= threshold):
                            row += f" & \\cellcolor{{blue!20}}{mean:.3f} \\scriptsize{{±{std:.3f}}}"
                        else:
                            row += f" & {mean:.3f} \\scriptsize{{±{std:.3f}}}"
                    else:
                        row += f" & {mean:.3f} \\scriptsize{{±{std:.3f}}}"
            else:
                row += " & -"
        
        return row + " \\\\"
    
    # Generate LaTeX table
    latex_table = []
    
    # Process each domain
    for domain in domains:
        # Organize models by main type
        domain_models = {}
        
        # Group models based on their main type (after processing)
        for model in data_dict[domain]:
            main_type, _ = process_model_name(model, domain)
            
            if main_type not in domain_models:
                domain_models[main_type] = []
            
            domain_models[main_type].append(model)
        
        # Sort model groups and models within groups
        sorted_types = sorted(domain_models.keys())
        for model_type in sorted_types:
            domain_models[model_type] = sorted(domain_models[model_type])
        
        # Add domain header
        domain_display = domain.capitalize()
        latex_table.append(f"\\multicolumn{{{len(datasets) + 2}}}{{l}}{{\\textbf{{{domain_display} Methods}}}} \\\\")
        latex_table.append("\\midrule")
        
        # Process each model type group
        for i, model_type in enumerate(sorted_types):
            models = domain_models[model_type]
            
            # Add group header with multirow
            latex_table.append(f"\\multirow{{{len(models)}}}{{*}}{{{model_type}}} & ")
            
            # Process each model in the group
            for j, model in enumerate(models):
                if j == 0:
                    # First model in group, header already added
                    row = generate_model_row(model, domain, is_first_in_group=True, group_name=model_type)
                else:
                    # Subsequent models in group
                    row = " & " + generate_model_row(model, domain).split(" & ", 1)[1]
                
                latex_table.append(row)
                
                # Add cmidrule between models but not after the last one
                if j < len(models) - 1:
                    latex_table.append(f"\\cmidrule{{2-{len(datasets) + 2}}}")
            
            # Add midrule between different model types but not after the last one
            if i < len(sorted_types) - 1:
                latex_table.append("\\midrule")
        
        # Add midrule between domains
        if domain != domains[-1]:
            latex_table.append("\\midrule")
    
    # Construct the LaTeX table header
    dataset_labels = " & & " + " & ".join([f"\\emph{{{dataset}}}" for dataset in datasets])
    
    # Additional packages needed
    preamble = "\\usepackage{multirow}\n\\usepackage{booktabs}\n\\usepackage{colortbl}\n\\usepackage{array}"
    
    # Create column specifications
    col_spec = ">{{\\raggedright\\arraybackslash}}p{{2.5cm}}>{{\\raggedright\\arraybackslash}}p{{2.5cm}}" + "c" * len(datasets)
    
    latex_code = (
        "\\begin{table}[h]\n"
        "\\centering\n"
        "\\scriptsize\n"
        "\\begin{tabular}{" + col_spec + "}\n"
        "\\toprule\n"
        "\\textbf{Method} & \\textbf{Variant} " + dataset_labels + " \\\\\n"
        + "\n".join(latex_table)
        + "\n\\bottomrule\n"
        "\\end{tabular}\n"
        "\\caption{Cross-domain comparison grouped by domain type (Simplicial/Cellular): results are shown as mean and standard deviation. "
        "The best result is bold and shaded in grey, while those within one standard deviation are in blue-shaded boxes.}\n"
        "\\end{table}"
    )

    return preamble, latex_code

In [64]:
print(generate_latex_table(final_add_df)[1])

\begin{table}[h]
\centering
\scriptsize
\begin{tabular}{>{{\raggedright\arraybackslash}}p{{2.5cm}}>{{\raggedright\arraybackslash}}p{{2.5cm}}ccccccc}
\toprule
\textbf{Method} & \textbf{Variant}  & & \emph{IMDB-BINARY} & \emph{IMDB-MULTI} & \emph{MUTAG} & \emph{NCI1} & \emph{NCI109} & \emph{PROTEINS} & \emph{ZINC} \\
\multicolumn{9}{l}{\textbf{Cell Methods}} \\
\midrule
\multirow{4}{*}{GPSE} & 
\emph{_GEOM} & \cellcolor{gray!30}\textbf{73.200} \scriptsize{±1.876} & \cellcolor{blue!20}49.013 \scriptsize{±3.783} & - & 78.016 \scriptsize{±1.175} & \cellcolor{blue!20}77.909 \scriptsize{±1.246} & \cellcolor{blue!20}74.767 \scriptsize{±3.695} & 0.210 \scriptsize{±0.002} \\
\cmidrule{2-9}
 & \emph{_MOLPCBA} & \cellcolor{blue!20}70.960 \scriptsize{±5.104} & \cellcolor{gray!30}\textbf{50.027} \scriptsize{±3.561} & - & \cellcolor{blue!20}78.521 \scriptsize{±0.947} & 77.483 \scriptsize{±0.641} & \cellcolor{blue!20}74.839 \scriptsize{±2.590} & 0.213 \scriptsize{±0.005} \\
\cmidrule{2-9}
 & \emph{_PC