In [24]:
import ast
import glob
import warnings
from collections import defaultdict
from datetime import date

import numpy as np
import pandas as pd
import wandb

today = date.today()
api = wandb.Api()

# # Find all csv files in the current directory
csv_files = glob.glob("/home/lev/projects/TopoBenchmarkX/big_csv/*.csv")
# # Collect all the names of the csv files without the extension
csv_names = [csv_file[:-4] for csv_file in csv_files]
project_name = "TopoBenchmarkX_Simplicial"  
user = "telyatnikov_sap"

if project_name not in csv_names:
    runs = api.runs(f"{user}/{project_name}")

    summary_list, config_list, name_list = [], [], []
    for run in runs:
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k, v in run.config.items() if not k.startswith("_")}
        )

        # .name is the human-readable name of the run.
        name_list.append(run.name)

    runs_df = pd.DataFrame(
        {"summary": summary_list, "config": config_list, "name": name_list}
    )

    runs_df.to_csv(f"{user}_{project_name}.csv")
else:
    runs_df = pd.read_csv(f"{user}_{project_name}.csv", index_col=0)

    for row in runs_df.iloc:
        row["summary"] = ast.literal_eval(row["summary"])
        row["config"] = ast.literal_eval(row["config"])


for row in runs_df.iloc:
    row["summary"].update(row["config"])

lst = [i["summary"] for i in runs_df.iloc]
df = pd.DataFrame.from_dict(lst)

df_init = df.copy()

# Get average epoch run time
df["epoch_run_time"] = df["_runtime"] / df["epoch"]

In [26]:
df = df_init.copy() 

In [27]:
def normalize_column(df, column_to_normalize):
    # Use json_normalize to flatten the nested dictionaries into separate columns
    flattened_df = pd.json_normalize(df[column_to_normalize])
    # Rename columns to include 'nested_column' prefix
    flattened_df.columns = [
        f"{column_to_normalize}.{col}" for col in flattened_df.columns
    ]
    # Concatenate the flattened DataFrame with the original DataFrame
    result_df = pd.concat([df, flattened_df], axis=1)
    # Get new columns names
    new_columns = flattened_df.columns
    # Drop the original nested column if needed
    result_df.drop(column_to_normalize, axis=1, inplace=True)
    return result_df, new_columns


# Config columns to normalize
columns_to_normalize = ["model", "dataset", "callbacks", "paths"]

# Keep track of config columns added
config_columns = []
for column in columns_to_normalize:
    df, columns = normalize_column(df, column)
    config_columns.extend(columns)

### Concatenate tables to obtain full hp space

In [28]:
additiona_runs = pd.read_csv(f'gbg141_{project_name}.csv', index_col=0)
df = pd.concat([df, additiona_runs], axis=0)
df.reset_index(drop=True, inplace=True)

In [29]:
df.reset_index(drop=True, inplace=True)

## Select models that have finished the runs

## Workout us_demographic 

In [30]:
# For every rows where df['dataset.parameters.data_name'] == 'US-county-demos' extend the 'dataset.parameters.data_name' with dataset.parameters.task_variable 
# and set it to 'US-county-demos' + '-' + dataset.parameters.task_variable
df.loc[df['dataset.parameters.data_name'] == 'US-county-demos', 'dataset.parameters.data_name'] = df.loc[df['dataset.parameters.data_name'] == 'US-county-demos', 'dataset.parameters.data_name'] + '-' + df.loc[df['dataset.parameters.data_name'] == 'US-county-demos', 'dataset.parameters.task_variable']

In [31]:
# Print all columns 10 per line
for i in range(0, len(df.columns), 5):
    print(list(df.columns[i:i + 5]))


['train/loss', 'val/precision', 'trainer/global_step', '_step', '_runtime']
['val/auroc', 'train/accuracy', 'train/precision', 'epoch', 'val/recall']
['train/auroc', 'train/recall', 'lr-Adam', '_timestamp', 'val/accuracy']
['val/loss', 'seed', 'tags', 'extras', 'trainer']
['ckpt_path', 'task_name', 'model/params/total', 'model/params/trainable', 'model/params/non_trainable']
['test/auroc', 'test/accuracy', '_wandb', 'test/recall', 'test/precision']
['test/loss', 'train/mse', 'val/mae', 'val/mse', 'test/mae']
['test/mse', 'train/mae', 'model.compile', 'model._target_', 'model.model_name']
['model.model_domain', 'model.loss.task', 'model.loss._target_', 'model.loss.loss_type', 'model.readout._target_']
['model.readout.hidden_dim', 'model.readout.readout_name', 'model.readout.num_cell_dimensions', 'model.backbone._target_', 'model.backbone.n_layers']
['model.backbone.in_channels_0', 'model.backbone.in_channels_1', 'model.backbone.in_channels_2', 'model.optimizer.lr', 'model.optimizer._tar

### See unique datasets

In [32]:
print(df['dataset.parameters.data_name'].unique())
print("Num unique datasets:", len(df['dataset.parameters.data_name'].unique()))

['NCI1' 'NCI109' 'minesweeper' 'roman_empire' 'ZINC' 'PROTEINS' 'PubMed'
 'citeseer' 'Cora' 'US-county-demos-UnemploymentRate'
 'US-county-demos-BachelorRate' 'MUTAG' 'US-county-demos-DeathRate'
 'US-county-demos-BirthRate' 'US-county-demos-MigraRate'
 'US-county-demos-MedianIncome' 'US-county-demos-Election']
Num unique datasets: 17


## See unique models

In [33]:
print(df['model.model_name'].unique())

['scn' 'sccn' 'sccnn_custom']


## Solve batch problems

In [34]:
datasets = ['minesweeper', 'roman_empire']
models = ['sccnn_custom', 'scn', 'sccn']
# For the following models and datasets I mistook the batch size, it should be 1, instead of 256 or 128
# Keep the run where batch size is 128 and then change the batch size to 1
for model in models:
    print("MODEL:", model)
    for dataset in datasets:

        # Change the batch size to 1 when it is 128
        
        print(df.loc[(df['model.model_name'] == model) & (df['dataset.parameters.data_name'] == dataset), 'dataset.parameters.batch_size'].unique())
        

MODEL: sccnn_custom
[1]
[1]
MODEL: scn
[1]
[1]
MODEL: sccn
[1]
[1]


## Solve issue with projection dropout

In [35]:
print(df['model.feature_encoder.proj_dropout'].unique())

[0.5  0.25]


In [36]:
# Keep rows where model.feature_encoder.proj_dropout is [0.5  0.25]
df = df[df['model.feature_encoder.proj_dropout'].isin([0.5, 0.25])]


In [37]:
df.reset_index(drop=True, inplace=True)

In [38]:
# Sweeped parameters: 
sweeped_columns = [
    'model.optimizer.lr', 
    'model.feature_encoder.out_channels',
    'model.backbone.n_layers',
    'model.readout.readout_name',
    'dataset.transforms.graph2simplicial_lifting.signed',
    'model.feature_encoder.proj_dropout',
    'dataset.parameters.batch_size',
    'dataset.parameters.data_seed',
    'seed',
]



# For each model and dataset go over all the sweeped parameters and print the unique values
for model in df['model.model_name'].unique():
    print(f"Model: {model}")
    for dataset in df['dataset.parameters.data_name'].unique():
        print(f"Dataset: {dataset}")
        for column in sweeped_columns:
            print(f"Column: {column}")
            print(df.loc[(df['model.model_name'] == model) & (df['dataset.parameters.data_name'] == dataset), column].unique())
        
        print('---------------NEW DATASET------------------')
    print('---------------NEW MODEL------------------')


Model: scn
Dataset: NCI1
Column: model.optimizer.lr
[0.001 0.01 ]
Column: model.feature_encoder.out_channels
[64 32]
Column: model.backbone.n_layers
[2 3 4 1]
Column: model.readout.readout_name
['NoReadOut' 'PropagateSignalDown']
Column: dataset.transforms.graph2simplicial_lifting.signed
[ True]
Column: model.feature_encoder.proj_dropout
[0.5  0.25]
Column: dataset.parameters.batch_size
[128 256]
Column: dataset.parameters.data_seed
[0 9 5 3 7]
Column: seed
[42]
---------------NEW DATASET------------------
Dataset: NCI109
Column: model.optimizer.lr
[0.001 0.01 ]
Column: model.feature_encoder.out_channels
[64 32]
Column: model.backbone.n_layers
[2 3 4 1]
Column: model.readout.readout_name
['PropagateSignalDown' 'NoReadOut']
Column: dataset.transforms.graph2simplicial_lifting.signed
[ True]
Column: model.feature_encoder.proj_dropout
[0.5  0.25]
Column: dataset.parameters.batch_size
[256 128]
Column: dataset.parameters.data_seed
[7 9 5 0 3]
Column: seed
[42]
---------------NEW DATASET----

### Get the best results

In [39]:
# Extract best results for each model and dataset
# 1. Keep the columns that are necessary for the comparison
sweeped_columns = [
    'model.optimizer.lr', 
    'model.feature_encoder.out_channels',
    'model.backbone.n_layers',
    'model.readout.readout_name',
    'dataset.transforms.graph2simplicial_lifting.signed',
    'model.feature_encoder.proj_dropout',
    'dataset.parameters.batch_size',
]
run_columns = ['dataset.parameters.data_seed','seed']

# Dataset and model columns
dataset_model_columns = ['model.model_name', 'dataset.parameters.data_name']

# Performance columns
performance_columns = [
    'val/loss', 'test/loss',
    'val/mae', 'test/mae',
    'val/mse', 'test/mse',
    'val/accuracy', 'test/accuracy',
    'val/auroc','test/auroc',
    'val/recall', 'test/recall',
    'val/precision', 'test/precision',
    ]
keep_columns = dataset_model_columns + sweeped_columns + performance_columns + run_columns
df = df[keep_columns]

In [40]:
performance_classification = [
    'val/accuracy', 'test/accuracy',
    'val/auroc','test/auroc',
    'val/recall', 'test/recall',
    'val/precision', 'test/precision',
    ]
performance_regression = [
    'val/mae', 'test/mae',
    'val/mse', 'test/mse',
    ]
# Define a dict of dicts for each dataset the corresponding optimization metrics
optimization_metrics = {
    'IMDB-MULTI': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'IMDB-BINARY': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'REDDIT-BINARY': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'NCI109': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'NCI1': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'PROTEINS': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'MUTAG': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'Cora': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'citeseer': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'PubMed': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},

    'roman_empire': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    'amazon_ratings': {'optim_metric': 'val/accuracy', 'eval_metric': 'test/accuracy', 'direction': 'max', 'performance_columns': performance_classification},
    
    'tolokers': {'optim_metric': 'val/auroc', 'eval_metric': 'test/auroc', 'direction': 'max', 'performance_columns': performance_classification},
    'questions': {'optim_metric': 'val/auroc', 'eval_metric': 'test/auroc', 'direction': 'max', 'performance_columns': performance_classification},
    'minesweeper': {'optim_metric': 'val/auroc', 'eval_metric': 'test/auroc', 'direction': 'max', 'performance_columns': performance_classification},

    'ZINC': {'optim_metric': 'val/mae', 'eval_metric': 'test/mae', 'direction': 'min', 'performance_columns': performance_regression},
    
    'US-county-demos-UnemploymentRate': {'optim_metric': 'val/mse', 'eval_metric': 'test/mse', 'direction': 'min', 'performance_columns': performance_regression},
    'US-county-demos-BachelorRate': {'optim_metric': 'val/mse', 'eval_metric': 'test/mse', 'direction': 'min', 'performance_columns': performance_regression},
    'US-county-demos-DeathRate': {'optim_metric': 'val/mse', 'eval_metric': 'test/mse', 'direction': 'min', 'performance_columns': performance_regression},
    'US-county-demos-BirthRate': {'optim_metric': 'val/mse', 'eval_metric': 'test/mse', 'direction': 'min', 'performance_columns': performance_regression},
    'US-county-demos-MigraRate': {'optim_metric': 'val/mse', 'eval_metric': 'test/mse', 'direction': 'min', 'performance_columns': performance_regression},
    'US-county-demos-MedianIncome': {'optim_metric': 'val/mse', 'eval_metric': 'test/mse', 'direction': 'min', 'performance_columns': performance_regression},
    'US-county-demos-Election': {'optim_metric': 'val/mse', 'eval_metric': 'test/mse', 'direction': 'min', 'performance_columns': performance_regression},

} 

len(optimization_metrics)

23

### Generate the best results

In [41]:
# Get unique datasets
datasets = list(df['dataset.parameters.data_name'].unique())
# Get unique models
models = list(df['model.model_name'].unique())

best_results = defaultdict(dict)
hp_runs = defaultdict(dict)
best_runs = defaultdict(dict)
# Got over each dataset and model and find the best result
for dataset in datasets:
    for model in models:
        # Get the subset of the DataFrame for the current dataset and model
        subset = df[
            (df['dataset.parameters.data_name'] == dataset)
            & (df['model.model_name'] == model)
        ]

        optim_metric = optimization_metrics[dataset]['optim_metric']
        eval_metric = optimization_metrics[dataset]['eval_metric']
        direction = optimization_metrics[dataset]['direction']
        
        # Keep metrics that matters for dataset
        performance_columns = optimization_metrics[dataset]['performance_columns']
        subset = subset[dataset_model_columns + sweeped_columns + performance_columns + run_columns]

        # --------WORKOUT NAN--------

        print(f"Dataset: {dataset}, Model: {model}")
        init_num_of_rows = subset.shape[0]
        print(f'Initial number of rows, {init_num_of_rows}')

        # Find 'NaN' in performance columns
        # for each column find number of rows with 'NaN' string
        total_performance_nan = 0
        for column in performance_columns:
            # Find the number of NaN string in the column
            num_nan = subset[column].apply(lambda x: x == 'NaN')
            num_nan = num_nan.sum()
            total_performance_nan += num_nan        
    
        if total_performance_nan > 0:
          
            nan_rows = subset[performance_columns].eq('NaN')
            nan_rows = nan_rows.sum(axis=1)
            # Drop every rows where 'NaN' string is present
            subset = subset[~nan_rows.gt(0)]
           
        # Ensure that the performance columns are of type float
        subset[performance_columns] = subset[performance_columns].astype(float)
    
        for column in performance_columns:
            if subset[column].isna().sum() > 0:  
                subset = subset[~subset[column].isna()]

        final_num_of_rows = subset.shape[0]  
        print(f'Final number of rows , {final_num_of_rows}')
        print(f'Rows that had NANs number of rows , {final_num_of_rows - init_num_of_rows}')

        # --------WORKOUT NAN--------

        aggregated = subset.groupby(sweeped_columns, dropna=False).agg(
            {col: ["mean", "std"] for col in performance_columns}
        )

         # Go from MultiIndex to Index
        aggregated = aggregated.reset_index()
        aggregated = aggregated.sort_values(
                by=(optim_metric, "mean"), ascending=(direction == 'min')
            )
        
        # Git percent in case of classification
        if 'test/accuracy' in performance_columns:
            # Go over all the performance columns and multiply by 100
            for col in performance_columns:
                aggregated[(col, "mean")] *= 100
                aggregated[(col, "std")] *= 100
            
            # Round performance columns values up to 2 decimal points
            for col in performance_columns:
                aggregated[(col, "mean")] = aggregated[(col, "mean")].round(2)
                aggregated[(col, "std")] = aggregated[(col, "std")].round(2)
            
            
        else:
            # Round all values up to 4 decimal points
            # Round performance columns values up to 4 decimal points
            for col in performance_columns:
                aggregated[(col, "mean")] = aggregated[(col, "mean")].round(4)
                aggregated[(col, "std")] = aggregated[(col, "std")].round(4)
        
            
        
        # Get the best result
        final_best = aggregated.head(1)
        if final_best[(eval_metric, "mean")].any(): 
            best_results[dataset][model] = {
                "mean": final_best[(eval_metric, "mean")].values[0],
                "std": final_best[(eval_metric, "std")].values[0],
            }

            # Extract best runs: 
            best_params = {}
            for col in sweeped_columns:
                best_params[col] = final_best[(col, '')].item()
            
            hp_runs[dataset][model] = subset.copy()
            
            # Start with the entire DataFrame
            filtered_subset = subset.copy()

            # Iterate over each key-value pair in the best parameters dictionary and filter the DataFrame
            for param, value in best_params.items():
                filtered_subset = filtered_subset[filtered_subset[param] == value]
            best_runs[dataset][model] = filtered_subset
        
        else: 
            best_results[dataset][model] = {
                "mean": np.nan,
                "std": np.nan,
            }

        

        
            
        


Dataset: NCI1, Model: scn
Initial number of rows, 473
Final number of rows , 466
Rows that had NANs number of rows , -7
Dataset: NCI1, Model: sccn
Initial number of rows, 689
Final number of rows , 679
Rows that had NANs number of rows , -10
Dataset: NCI1, Model: sccnn_custom
Initial number of rows, 647
Final number of rows , 646
Rows that had NANs number of rows , -1
Dataset: NCI109, Model: scn
Initial number of rows, 490
Final number of rows , 485
Rows that had NANs number of rows , -5
Dataset: NCI109, Model: sccn
Initial number of rows, 679
Final number of rows , 669
Rows that had NANs number of rows , -10
Dataset: NCI109, Model: sccnn_custom
Initial number of rows, 639
Final number of rows , 638
Rows that had NANs number of rows , -1
Dataset: minesweeper, Model: scn
Initial number of rows, 636
Final number of rows , 635
Rows that had NANs number of rows , -1
Dataset: minesweeper, Model: sccn
Initial number of rows, 479
Final number of rows , 478
Rows that had NANs number of rows , 

In [43]:
pd.set_option("display.max_rows", 1000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 1000)

COLS = [
    'dataset.parameters.data_seed',
    'dataset.parameters.batch_size',
    'model.backbone.n_layers',
    'model.feature_encoder.out_channels',
    'model.readout.readout_name',
    'model.feature_encoder.proj_dropout',
    'model.optimizer.lr', 
    
]

a = hp_runs['US-county-demos-BirthRate']['scn'].sort_values(by=COLS, ascending=False)[COLS]


In [44]:
# for col in COLS:
#     print(a[col].value_counts())
    

## Save obtained best results and best runs

In [45]:
# Convert nested dictionary to DataFrame
nested_dict = dict(best_results)
result_dict = pd.DataFrame.from_dict(
    {
        (i, j): nested_dict[i][j]
        for i in nested_dict
        for j in nested_dict[i].keys()
    },
    orient="index",
)

result_dict["performance"] = result_dict.apply(
    lambda x: f"{x['mean']} ± {x['std']}", axis=1
)
result_dict = result_dict.drop(["mean", "std"], axis=1)

# Reset multiindex
result_dict = result_dict.reset_index()
# rename columns
result_dict.columns = ["Dataset", "Model", "Performance"]

result_dict = result_dict.pivot_table(
    index="Model", columns="Dataset", values="Performance", aggfunc="first"
)

In [46]:
result_dict

Dataset,Cora,MUTAG,NCI1,NCI109,PROTEINS,PubMed,US-county-demos-BachelorRate,US-county-demos-BirthRate,US-county-demos-DeathRate,US-county-demos-Election,US-county-demos-MedianIncome,US-county-demos-MigraRate,US-county-demos-UnemploymentRate,ZINC,citeseer,minesweeper,roman_empire
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
sccn,80.86 ± 2.16,70.64 ± 5.9,76.17 ± 1.39,75.49 ± 1.39,75.05 ± 2.76,88.37 ± 0.48,0.3588 ± 0.0246,0.8242 ± 0.0942,0.5751 ± 0.0553,0.5344 ± 0.0323,0.2908 ± 0.032,0.9146 ± 0.1822,0.4328 ± 0.044,0.4858 ± 0.0584,69.6 ± 1.83,89.07 ± 0.25,88.27 ± 0.14
sccnn_custom,82.19 ± 1.07,76.17 ± 6.63,76.6 ± 1.75,77.12 ± 1.07,74.19 ± 2.86,88.18 ± 0.32,0.3394 ± 0.028,0.7937 ± 0.1162,0.5527 ± 0.0474,0.5112 ± 0.0316,0.2825 ± 0.0279,0.8976 ± 0.1431,0.4278 ± 0.0394,0.4088 ± 0.0047,70.23 ± 2.69,89.0 ± 0.0,89.15 ± 0.32
scn,82.27 ± 1.34,73.62 ± 6.13,74.46 ± 1.11,75.7 ± 1.04,75.27 ± 2.14,88.72 ± 0.5,0.3186 ± 0.0241,0.7122 ± 0.0836,0.5208 ± 0.0525,0.4648 ± 0.043,0.2526 ± 0.0247,0.9209 ± 0.1993,0.3753 ± 0.0432,nan ± nan,71.24 ± 1.68,90.32 ± 0.11,88.79 ± 0.46


In [47]:
# Increase the number of allowed rows to display
pd.set_option("display.max_rows", 1000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 1000)
result_dict.to_csv(f"best_results_simplicial.csv")

### Propagate signal down comparison

In [48]:
# Get unique datasets
datasets = list(df['dataset.parameters.data_name'].unique())
# Get unique models
models = list(df['model.model_name'].unique())

best_results = defaultdict(dict)
hp_runs = defaultdict(dict)
best_runs = defaultdict(dict)
# Got over each dataset and model and find the best result
for dataset in datasets:
    for model in models:
        # Get the subset of the DataFrame for the current dataset and model
        subset = df[
            (df['dataset.parameters.data_name'] == dataset)
            & (df['model.model_name'] == model)
        ]

        optim_metric = optimization_metrics[dataset]['optim_metric']
        eval_metric = optimization_metrics[dataset]['eval_metric']
        direction = optimization_metrics[dataset]['direction']
        
        # Keep metrics that matters for dataset
        performance_columns = optimization_metrics[dataset]['performance_columns']
        subset = subset[dataset_model_columns + sweeped_columns + performance_columns + run_columns]

        # --------WORKOUT NAN--------

        print(f"Dataset: {dataset}, Model: {model}")
        init_num_of_rows = subset.shape[0]
        print(f'Initial number of rows, {init_num_of_rows}')

        # Find 'NaN' in performance columns
        # for each column find number of rows with 'NaN' string
        total_performance_nan = 0
        for column in performance_columns:
            # Find the number of NaN string in the column
            num_nan = subset[column].apply(lambda x: x == 'NaN')
            num_nan = num_nan.sum()
            total_performance_nan += num_nan        
    
        if total_performance_nan > 0:
          
            nan_rows = subset[performance_columns].eq('NaN')
            nan_rows = nan_rows.sum(axis=1)
            # Drop every rows where 'NaN' string is present
            subset = subset[~nan_rows.gt(0)]
           
        # Ensure that the performance columns are of type float
        subset[performance_columns] = subset[performance_columns].astype(float)
    
        for column in performance_columns:
            if subset[column].isna().sum() > 0:  
                subset = subset[~subset[column].isna()]

        final_num_of_rows = subset.shape[0]  
        print(f'Final number of rows , {final_num_of_rows}')
        print(f'Rows that had NANs number of rows , {final_num_of_rows - init_num_of_rows}')

        # --------WORKOUT NAN--------


        aggregated = subset.groupby(sweeped_columns, dropna=False).agg(
            {col: ["mean", "std"] for col in performance_columns}
        )

         # Go from MultiIndex to Index
        aggregated = aggregated.reset_index()
        aggregated = aggregated.sort_values(
                by=(optim_metric, "mean"), ascending=(direction == 'min')
            )
        
        # Git percent in case of classification
        if 'test/accuracy' in performance_columns:
            # Go over all the performance columns and multiply by 100
            for col in performance_columns:
                aggregated[(col, "mean")] *= 100
                aggregated[(col, "std")] *= 100
            
            # Round performance columns values up to 2 decimal points
            for col in performance_columns:
                aggregated[(col, "mean")] = aggregated[(col, "mean")].round(2)
                aggregated[(col, "std")] = aggregated[(col, "std")].round(2)
            
            
        else:
            # Round all values up to 4 decimal points
            # Round performance columns values up to 4 decimal points
            for col in performance_columns:
                aggregated[(col, "mean")] = aggregated[(col, "mean")].round(4)
                aggregated[(col, "std")] = aggregated[(col, "std")].round(4)
        
        if sorted(list(aggregated['model.readout.readout_name'].unique())) == sorted(['NoReadOut', 'PropagateSignalDown']):
            prop_types = ['NoReadOut', 'PropagateSignalDown']
            for prop_type in prop_types:
                agg_sub = aggregated[aggregated['model.readout.readout_name'] == prop_type]
                agg_sub = agg_sub.sort_values(
                    by=(optim_metric, "mean"), ascending=(direction == 'min')
                )
                
                final_best = agg_sub.head(1)
                if final_best[(eval_metric, "mean")].any(): 
                    best_results[dataset][f"{model} ({prop_type})"] = {
                        "mean": final_best[(eval_metric, "mean")].values[0],
                        "std": final_best[(eval_metric, "std")].values[0],
                    }

                    # Extract best runs: 
                    best_params = {}
                    for col in sweeped_columns:
                        best_params[col] = final_best[(col, '')].item()
                    
                    hp_runs[dataset][model] = subset.copy()
                    
                    # Start with the entire DataFrame
                    filtered_subset = subset.copy()

                    # Iterate over each key-value pair in the best parameters dictionary and filter the DataFrame
                    for param, value in best_params.items():
                        filtered_subset = filtered_subset[filtered_subset[param] == value]
                    best_runs[dataset][model] = filtered_subset
                
                else: 
                    best_results[dataset][model] = {
                        "mean": np.nan,
                        "std": np.nan,
                        "prop_type": prop_type
                    }
        else:
            prop_types = ['NoReadOut', 'PropagateSignalDown']
            for prop_type in prop_types:
                best_results[dataset][f"{model} ({prop_type})"] = {
                            "mean": np.nan,
                            "std": np.nan,
                        }

       

# Convert nested dictionary to DataFrame
nested_dict = dict(best_results)
result_dict = pd.DataFrame.from_dict(
    {
        (i, j): nested_dict[i][j]
        for i in nested_dict
        for j in nested_dict[i].keys()
    },
    orient="index",
)

result_dict["performance"] = result_dict.apply(
    lambda x: f"{x['mean']} ± {x['std']}", axis=1
)
result_dict = result_dict.drop(["mean", "std"], axis=1)

# Reset multiindex
result_dict = result_dict.reset_index()
# rename columns
result_dict.columns = ["Dataset", "Model", "Performance"]

result_dict = result_dict.pivot_table(
    index="Model", columns="Dataset", values="Performance", aggfunc="first"
)
result_dict.reset_index(inplace=True)

result_dict['ReadOut'] = result_dict['Model'].apply(lambda x: x.split('(')[1].replace(')', ''))
result_dict['Model'] = result_dict['Model'].apply(lambda x: x.split('(')[0])

result_dict.sort_values(by=['Model','ReadOut'], inplace=True)

columns = ['Model',
'ReadOut',
 'Cora',
 'MUTAG',
 'NCI1',
 'NCI109',
 'PROTEINS',
 'PubMed',
 'US-county-demos-BachelorRate',
 'US-county-demos-BirthRate',
 'US-county-demos-DeathRate',
 'US-county-demos-Election',
 'US-county-demos-MedianIncome',
 'US-county-demos-MigraRate',
 'US-county-demos-UnemploymentRate',
 'ZINC',
 'citeseer',
 'minesweeper',
 'roman_empire',]
result_dict = result_dict[columns]
result_dict.to_csv(f"ablation_simplicial.csv")

Dataset: NCI1, Model: scn
Initial number of rows, 473
Final number of rows , 466
Rows that had NANs number of rows , -7
Dataset: NCI1, Model: sccn
Initial number of rows, 689
Final number of rows , 679
Rows that had NANs number of rows , -10
Dataset: NCI1, Model: sccnn_custom
Initial number of rows, 647
Final number of rows , 646
Rows that had NANs number of rows , -1
Dataset: NCI109, Model: scn
Initial number of rows, 490
Final number of rows , 485
Rows that had NANs number of rows , -5
Dataset: NCI109, Model: sccn
Initial number of rows, 679
Final number of rows , 669
Rows that had NANs number of rows , -10
Dataset: NCI109, Model: sccnn_custom
Initial number of rows, 639
Final number of rows , 638
Rows that had NANs number of rows , -1
Dataset: minesweeper, Model: scn
Initial number of rows, 636
Final number of rows , 635
Rows that had NANs number of rows , -1
Dataset: minesweeper, Model: sccn
Initial number of rows, 479
Final number of rows , 478
Rows that had NANs number of rows , 

In [50]:
result_dict

Dataset,Model,ReadOut,Cora,MUTAG,NCI1,NCI109,PROTEINS,PubMed,US-county-demos-BachelorRate,US-county-demos-BirthRate,US-county-demos-DeathRate,US-county-demos-Election,US-county-demos-MedianIncome,US-county-demos-MigraRate,US-county-demos-UnemploymentRate,ZINC,citeseer,minesweeper,roman_empire
0,sccn,NoReadOut,80.86 ± 2.16,70.64 ± 5.9,76.42 ± 0.88,75.49 ± 1.39,75.05 ± 2.76,88.04 ± 0.51,0.3588 ± 0.0246,0.8242 ± 0.0942,0.5751 ± 0.0553,0.5344 ± 0.0323,0.2868 ± 0.0276,0.9146 ± 0.1822,0.4328 ± 0.044,0.5441 ± 0.0081,69.6 ± 1.83,88.85 ± 0.0,88.2 ± 0.22
1,sccn,PropagateSignalDown,80.06 ± 1.66,73.62 ± 4.41,76.17 ± 1.39,75.31 ± 1.36,74.34 ± 3.17,88.37 ± 0.48,0.341 ± 0.0249,0.8264 ± 0.1018,0.5629 ± 0.0444,0.5686 ± 0.0247,0.2908 ± 0.032,0.9303 ± 0.172,0.4734 ± 0.0377,0.4858 ± 0.0584,68.86 ± 2.4,89.07 ± 0.25,88.27 ± 0.14
2,sccnn_custom,NoReadOut,82.19 ± 1.07,76.17 ± 6.63,76.6 ± 1.75,77.12 ± 1.07,74.19 ± 2.86,88.18 ± 0.32,0.3394 ± 0.028,0.7937 ± 0.1162,0.5527 ± 0.0474,0.5112 ± 0.0316,0.2825 ± 0.0279,0.8976 ± 0.1431,0.4278 ± 0.0394,0.3562 ± 0.013,70.23 ± 2.69,87.4 ± 0.0,89.15 ± 0.32
3,sccnn_custom,PropagateSignalDown,80.65 ± 2.39,70.64 ± 3.16,75.6 ± 2.45,75.43 ± 1.94,74.98 ± 1.92,87.78 ± 0.58,0.3449 ± 0.0314,0.8251 ± 0.1184,0.579 ± 0.0541,0.557 ± 0.041,0.3073 ± 0.0316,0.9274 ± 0.1711,0.4538 ± 0.0428,0.4088 ± 0.0047,69.03 ± 2.01,89.0 ± 0.0,88.73 ± 0.12
4,scn,NoReadOut,82.27 ± 1.34,71.49 ± 2.43,75.27 ± 1.57,75.7 ± nan,75.27 ± 2.14,88.72 ± 0.5,0.3186 ± 0.0241,0.7122 ± 0.0836,0.5208 ± 0.0525,0.4648 ± 0.043,0.2526 ± 0.0247,0.9209 ± 0.1993,0.3753 ± 0.0432,nan ± nan,71.24 ± 1.68,90.32 ± 0.11,85.89 ± 0.34
5,scn,PropagateSignalDown,79.91 ± 1.18,73.62 ± 6.13,74.46 ± 1.11,75.7 ± 1.04,74.77 ± 1.69,88.62 ± 0.44,0.3205 ± 0.0271,0.7985 ± 0.1062,0.5635 ± 0.0457,0.5091 ± 0.0345,0.2723 ± 0.0174,0.9619 ± 0.2329,0.4131 ± 0.0297,nan ± nan,70.4 ± 1.53,90.27 ± 0.36,88.79 ± 0.46
