In [1]:
import pandas as pd
import wandb
import pandas as pd
import ast
import glob
import numpy as np
import warnings
from datetime import date
from collections import defaultdict

today = date.today()
api = wandb.Api()

# # Find all csv files in the current directory
csv_files = glob.glob("*.csv")
# # Collect all the names of the csv files without the extension
csv_names = [csv_file[:-4] for csv_file in csv_files]
project_name = "topobenchmark_22Apr2024"  #'best_results_edhnn'
user = "telyatnikov_sap"

if project_name not in csv_names:
    runs = api.runs(f"{user}/{project_name}")

    summary_list, config_list, name_list = [], [], []
    for run in runs:
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k, v in run.config.items() if not k.startswith("_")}
        )

        # .name is the human-readable name of the run.
        name_list.append(run.name)

    runs_df = pd.DataFrame(
        {"summary": summary_list, "config": config_list, "name": name_list}
    )

    runs_df.to_csv(f"{project_name}.csv")
else:
    runs_df = pd.read_csv(f"{project_name}.csv", index_col=0)

    for row in runs_df.iloc:
        row["summary"] = ast.literal_eval(row["summary"])
        row["config"] = ast.literal_eval(row["config"])


for row in runs_df.iloc:
    row["summary"].update(row["config"])

lst = [i["summary"] for i in runs_df.iloc]
df = pd.DataFrame.from_dict(lst)

df_init = df.copy()

# Get average epoch run time
df["epoch_run_time"] = df["_runtime"] / df["epoch"]

In [2]:
def normalize_column(df, column_to_normalize):
    # Use json_normalize to flatten the nested dictionaries into separate columns
    flattened_df = pd.json_normalize(df[column_to_normalize])
    # Rename columns to include 'nested_column' prefix
    flattened_df.columns = [
        f"{column_to_normalize}.{col}" for col in flattened_df.columns
    ]
    # Concatenate the flattened DataFrame with the original DataFrame
    result_df = pd.concat([df, flattened_df], axis=1)
    # Get new columns names
    new_columns = flattened_df.columns
    # Drop the original nested column if needed
    result_df.drop(column_to_normalize, axis=1, inplace=True)
    return result_df, new_columns


# Config columns to normalize
columns_to_normalize = ["model", "dataset", "callbacks"]

# Keep track of config columns added
config_columns = []
for column in columns_to_normalize:
    df, columns = normalize_column(df, column)
    config_columns.extend(columns)

In [3]:
# Remove columns that are not needed (we shouldn't vary them or their variation is not interesting)
remove_col = [
    "dataset.transforms.data_manipulations.selected_fields",
    "callbacks.model_checkpoint.dirpath",
    "dataset.transforms.infere_knn_connectivity.args.k",
    "dataset.transforms.infere_knn_connectivity.args.loop",
    "dataset.transforms.infere_knn_connectivity.args.cosine",
    "dataset.transforms.infere_knn_connectivity._target_",
    "dataset.transforms.infere_knn_connectivity.transform_type",
]
for col in remove_col:
    if col in df.columns:
        df = df.drop(col, axis=1)


# Ensure that removed columns are not present in config_columns
config_columns = [col for col in config_columns if col != remove_col]

In [4]:
df["dataset.transforms.infere_knn_connectivity.transform_name"][
    df["dataset.transforms.infere_knn_connectivity.transform_name"]
    != "InfereKNNConnectivity"
] = False
df["dataset.transforms.infere_knn_connectivity.transform_name"][
    df["dataset.transforms.infere_knn_connectivity.transform_name"]
    == "InfereKNNConnectivity"
] = True

df["Infere Connectivity"] = df[
    "dataset.transforms.infere_knn_connectivity.transform_name"
]
df = df.drop("dataset.transforms.infere_knn_connectivity.transform_name", axis=1)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['dataset.transforms.infere_knn_connectivity.transform_name'][df['dataset.transforms.infere_knn_connectivity.transform_name']!='InfereKNNConnectivity'] = False
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http

In [5]:
print(
    f"Number of rows with model.backbone._target_ = nan is {sum(df['model.backbone._target_'].isna())}"
)
# Drop na values if there are
df = df.dropna(subset=["model.backbone._target_"])
# Reset index
df = df.reset_index(drop=True)

# Drop rows that 'callbacks.early_stopping.monitor' isna
print(
    f"Number of rows with callbacks.early_stopping.monitor = nan is {sum(df['callbacks.early_stopping.monitor'].isna())}"
)
print("Because of SCCN and CWN false runs there were 96 such runs on 13/03/24")
df = df.dropna(subset=["callbacks.early_stopping.monitor"])
# Reset index
df = df.reset_index(drop=True)


# Get correct names for the models
df["model.backbone._target_"] = df["model.backbone._target_"].apply(
    lambda x: x.split(".")[-1]
)

Number of rows with model.backbone._target_ = nan is 7
Number of rows with callbacks.early_stopping.monitor = nan is 0
Because of SCCN and CWN false runs there were 96 such runs on 13/03/24


In [6]:
df["Infere Connectivity"]

0       False
1       False
2       False
3       False
4       False
        ...  
4827    False
4828    False
4829    False
4830    False
4831    False
Name: Infere Connectivity, Length: 4832, dtype: object

In [7]:
# Identify unique models in DataFrame
unique_models = df["model.backbone._target_"].unique()

# Identify unique datasets in DataFrame
unique_datasets = df["dataset.parameters.data_name"].unique()


collected_results = defaultdict(dict)
collected_aggregated_results = defaultdict(dict)
collected_non_aggregated_results = defaultdict(dict)
# Got over each dataset and model and find the best result
for dataset in unique_datasets:
    for model in unique_models:
        # Get the subset of the DataFrame for the current dataset and model
        subset = df[
            (df["dataset.parameters.data_name"] == dataset)
            & (df["model.backbone._target_"] == model)
        ]

        if subset.empty:
            print(f"---------")
            print(f"No results for {model} on {dataset}")
            print(f"---------")
            continue
        # Suppress all warnings
        warnings.filterwarnings("ignore")
        subset["Model"] = model
        warnings.filterwarnings("default")

        def get_metric(df):
            metric_ = df["callbacks.early_stopping.monitor"].unique()
            assert len(metric_) == 1, "There should be only one metric to optimize"
            metric = metric_[0]
            return metric.split("/")[-1]

        # Cols to get statistics later
        performance_cols = [f"val/{get_metric(subset)}", f"test/{get_metric(subset)}"]

        # Get the unique values for each config column
        unique_colums_values = {}
        for col in config_columns + ["Infere Connectivity"]:
            try:
                unique_colums_values[col] = subset[col].unique()
            except:
                print(f"Attention the columns: {col}, has issues with unique values")

        # Keep only those keys that have more than one unique value
        unique_colums_values = {
            k: v for k, v in unique_colums_values.items() if len(v) > 1
        }

        # Print the unique values for each config column

        print(f"Unique values for each config column for {model} on {dataset}:")
        for col, unique in unique_colums_values.items():
            print(f"{col}: {unique}")
            print()
        print(f"---------")

        # Check if "special colums" are not in unique_colums_values
        # For example dataset.parameters.data_seed should not be in aggregation columns
        # If it is, then we should remove it from the list
        special_columns = ["dataset.parameters.data_seed"]

        for col in special_columns:
            if col in unique_colums_values:
                unique_colums_values.pop(col)

        # Obtain the aggregation columns
        aggregation_columns = ["Model"] + list(unique_colums_values.keys())

        # Check if there if "Infere Connectivity" if not add it to aggregation columns
        if "Infere Connectivity" not in aggregation_columns:
            aggregation_columns.append("Infere Connectivity")

        collected_non_aggregated_results[dataset][model] = {
            "df": subset.copy(),
            "aggregation_columns": aggregation_columns,
            "performance_cols": performance_cols,
        }

        # Aggregate the subset by the aggregation columns and get the best result for each group
        aggregated = subset.groupby(aggregation_columns).agg(
            {col: ["mean", "std"] for col in performance_cols}
        )

        # Go from MultiIndex to Index
        aggregated = aggregated.reset_index()

        assert (
            len(subset["callbacks.early_stopping.mode"].unique()) == 1
        ), "There should be only one mode for early stopping"
        # Identify the mode of the early stopping mode

        # Find best results with respect to the validation set
        if subset["callbacks.early_stopping.mode"].unique()[0] == "max":
            ascending = False
            final_best_ = aggregated.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)
            final_best_ = (final_best_ * 100).round(2)
        else:
            ascending = True
            final_best_ = aggregated.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)

        # Get the best with respect to test set
        collected_results[dataset][model] = {
            "mean": final_best_[(f"test/{get_metric(subset)}", "mean")].values[0],
            "std": final_best_[(f"test/{get_metric(subset)}", "std")].values[0],
        }
        collected_aggregated_results[dataset][model] = aggregated.sort_values(
            by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
        )

    #  break

    # break

Attention the columns: model.feature_encoder.in_channels, has issues with unique values
Attention the columns: model.feature_encoder.selected_dimensions, has issues with unique values
Attention the columns: dataset.parameters.num_features, has issues with unique values
Attention the columns: dataset.transforms.infere_knn_connectivity.args.k, has issues with unique values
Attention the columns: dataset.transforms.infere_knn_connectivity.args.loop, has issues with unique values
Attention the columns: dataset.transforms.infere_knn_connectivity.args.cosine, has issues with unique values
Attention the columns: dataset.transforms.infere_knn_connectivity._target_, has issues with unique values
Attention the columns: dataset.transforms.infere_knn_connectivity.transform_name, has issues with unique values
Attention the columns: dataset.transforms.infere_knn_connectivity.transform_type, has issues with unique values
Attention the columns: callbacks.model_checkpoint.dirpath, has issues with uniqu

In [8]:
df[
    (df["dataset.parameters.data_name"] == "MUTAG")
    & (df["model.backbone._target_"] == "SCN2")
]

Unnamed: 0,epoch,val/loss,_timestamp,train/loss,val/accuracy,trainer/global_step,_step,_wandb,lr-Adam,_runtime,...,callbacks.model_checkpoint.every_n_epochs,callbacks.model_checkpoint.save_weights_only,callbacks.model_checkpoint.every_n_train_steps,callbacks.model_checkpoint.train_time_interval,callbacks.model_checkpoint.auto_insert_metric_name,callbacks.model_checkpoint.save_on_train_epoch_end,callbacks.rich_progress_bar._target_,callbacks.learning_rate_monitor._target_,callbacks.learning_rate_monitor.logging_interval,Infere Connectivity
0,95.0,0.440755,1.714498e+09,0.344573,0.829787,570.0,209.0,{'runtime': 9},0.000731,11.146549,...,,False,,,False,,lightning.pytorch.callbacks.RichProgressBar,lightning.pytorch.callbacks.LearningRateMonitor,epoch,False
1,130.0,0.698719,1.714498e+09,0.250919,0.765957,780.0,286.0,{'runtime': 11},0.000631,13.230936,...,,False,,,False,,lightning.pytorch.callbacks.RichProgressBar,lightning.pytorch.callbacks.LearningRateMonitor,epoch,False
2,100.0,0.571365,1.714498e+09,0.332483,0.659574,600.0,220.0,{'runtime': 9},0.000717,11.107776,...,,False,,,False,,lightning.pytorch.callbacks.RichProgressBar,lightning.pytorch.callbacks.LearningRateMonitor,epoch,False
3,130.0,0.424564,1.714498e+09,0.298073,0.744681,780.0,286.0,{'runtime': 11},0.000631,13.186315,...,,False,,,False,,lightning.pytorch.callbacks.RichProgressBar,lightning.pytorch.callbacks.LearningRateMonitor,epoch,False
4,70.0,0.591233,1.714498e+09,0.316741,0.723404,420.0,154.0,{'runtime': 6},0.000803,8.108738,...,,False,,,False,,lightning.pytorch.callbacks.RichProgressBar,lightning.pytorch.callbacks.LearningRateMonitor,epoch,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1426,60.0,1.160258,1.714195e+09,0.227150,0.808511,180.0,132.0,{'runtime': 13},0.008314,15.043748,...,,False,,,False,,lightning.pytorch.callbacks.RichProgressBar,lightning.pytorch.callbacks.LearningRateMonitor,epoch,True
1427,80.0,0.701639,1.714195e+09,0.314709,0.702128,240.0,176.0,{'runtime': 17},0.007743,18.072189,...,,False,,,False,,lightning.pytorch.callbacks.RichProgressBar,lightning.pytorch.callbacks.LearningRateMonitor,epoch,True
1429,105.0,0.421848,1.714195e+09,0.286058,0.787234,315.0,231.0,{'runtime': 21},0.007029,22.765034,...,,False,,,False,,lightning.pytorch.callbacks.RichProgressBar,lightning.pytorch.callbacks.LearningRateMonitor,epoch,True
1430,75.0,0.818408,1.714195e+09,0.283608,0.744681,225.0,165.0,{'runtime': 15},0.007886,16.458794,...,,False,,,False,,lightning.pytorch.callbacks.RichProgressBar,lightning.pytorch.callbacks.LearningRateMonitor,epoch,True


In [9]:
collected_results_inferred_connectivity = defaultdict(dict)

# Get the best result for each dataset and model for InfereKNNConnectivity case
for dataset in [
    "MUTAG",
    "REDDIT-BINARY",
    "NCI1",
    "PROTEINS",
    "PubMed",
    "Cora",
    "citeseer",
]:
    for model in collected_aggregated_results[dataset].keys():
        # try:

        col_name = "Infere Connectivity"
        a = collected_aggregated_results[dataset][model]

        a = a[a[col_name] == True]

        # Check if there are any results
        if a.empty:
            continue

        if subset["callbacks.early_stopping.mode"].unique()[0] == "max":
            ascending = False
            final_best_ = a.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)
            final_best_ = (final_best_ * 100).round(2)
        else:
            ascending = True
            final_best_ = a.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)

        collected_results_inferred_connectivity[dataset][model] = {
            "mean": final_best_[(f"test/{get_metric(subset)}", "mean")].values[0],
            "std": final_best_[(f"test/{get_metric(subset)}", "std")].values[0],
        }
        # except:
        #     pass

collected_results_original_connectivity = defaultdict(dict)
# Get the best result for each dataset and model for classical case
for dataset in [
    "REDDIT-BINARY",
    "MUTAG",
    "NCI1",
    "PROTEINS",
    "PubMed",
    "Cora",
    "citeseer",
]:
    for model in collected_aggregated_results[dataset].keys():
        # try:

        col_name = "Infere Connectivity"
        a = collected_aggregated_results[dataset][model]

        a = a[a[col_name] == False]

        # Check if there are any results
        if a.empty:
            continue

        if subset["callbacks.early_stopping.mode"].unique()[0] == "max":
            ascending = False
            final_best_ = a.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)
            final_best_ = (final_best_ * 100).round(2)
        else:
            ascending = True
            final_best_ = a.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)

        collected_results_original_connectivity[dataset][model] = {
            "mean": final_best_[(f"test/{get_metric(subset)}", "mean")].values[0],
            "std": final_best_[(f"test/{get_metric(subset)}", "std")].values[0],
        }
        # except:
        #     pass

In [10]:
# Convert nested dictionary to DataFrame
nested_dict = dict(collected_results_original_connectivity)
result_dict = pd.DataFrame.from_dict(
    {
        (i, j): nested_dict[i][j]
        for i in nested_dict.keys()
        for j in nested_dict[i].keys()
    },
    orient="index",
)


result_dict = result_dict.round(2)
result_dict["performance"] = result_dict.apply(
    lambda x: f"{x['mean']} ± {x['std']}", axis=1
)
result_dict = result_dict.drop(["mean", "std"], axis=1)

# Reset multiindex
result_dict = result_dict.reset_index()
# rename columns
result_dict.columns = ["Dataset", "Model", "Performance"]

result_dict.pivot_table(
    index="Model", columns="Dataset", values="Performance", aggfunc="first"
)

Dataset,Cora,MUTAG,NCI1,PROTEINS,REDDIT-BINARY,citeseer
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GCN,87.2 ± 1.2,77.3 ± 2.46,74.19 ± 1.22,75.27 ± 2.8,79.53 ± 1.4,72.11 ± 1.75
SCN2,84.79 ± 1.15,78.72 ± 2.13,74.29 ± 1.44,76.46 ± 1.77,,73.31 ± 2.55


In [11]:
# Convert nested dictionary to DataFrame
nested_dict = dict(collected_results_inferred_connectivity)
result_dict = pd.DataFrame.from_dict(
    {
        (i, j): nested_dict[i][j]
        for i in nested_dict.keys()
        for j in nested_dict[i].keys()
    },
    orient="index",
)


result_dict = result_dict.round(2)
result_dict["performance"] = result_dict.apply(
    lambda x: f"{x['mean']} ± {x['std']}", axis=1
)
result_dict = result_dict.drop(["mean", "std"], axis=1)

# Reset multiindex
result_dict = result_dict.reset_index()
# rename columns
result_dict.columns = ["Dataset", "Model", "Performance"]
result_dict.pivot_table(
    index="Model", columns="Dataset", values="Performance", aggfunc="first"
)

Dataset,Cora,MUTAG,NCI1,PROTEINS,PubMed,REDDIT-BINARY,citeseer
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GCN,67.65 ± 0.82,80.14 ± 4.43,68.84 ± 1.38,74.79 ± 1.36,81.59 ± 0.28,76.13 ± 0.81,56.46 ± 1.53
SCN2,64.3 ± 3.55,80.14 ± 3.25,66.93 ± nan,75.63 ± 1.86,nan ± nan,,


In [None]:
"""
Cora
GCN 87.2 ± 1.2
SCN 84.79 ± 1.15

Cora (KNN)
GCN 67.65 ± 0.82
SCN 64.3 ± 3.55

PROTEINS
GCN 75.27 ± 2.8
SCN 76.46 ± 1.77

PROTEINS (KNN)
GCN 74.79 ± 1.36
SCN 75.63 ± 1.86

NCI1
74.19 ± 1.22
74.29 ± 1.44

NCI1 (KNN)
GCN 68.84 ± 1.38
SCN -- ± --


Mutag
GCN 77.3 ± 2.46
SCN 78.72 ± 2.13

Mutag (KNN)
GCN 80.14 ± 4.43
SCN 80.14 ± 3.25
"""

In [None]:
collected_results_inferred_connectivity = defaultdict(dict)

# Get the best result for each dataset and model for InfereKNNConnectivity case
for dataset in [
    "MUTAG",
    "REDDIT-BINARY",
    "NCI1",
    "PROTEINS",
    "PubMed",
    "Cora",
    "citeseer",
]:
    for model in collected_aggregated_results[dataset].keys():
        # try:

        col_name = "Infere Connectivity"
        a = collected_aggregated_results[dataset][model]

        a = a[a[col_name] == True]

        # Check if there are any results
        if a.empty:
            continue

        if subset["callbacks.early_stopping.mode"].unique()[0] == "max":
            ascending = False
            final_best_ = a.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)
            final_best_ = (final_best_ * 100).round(2)
        else:
            ascending = True
            final_best_ = a.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)

        collected_results_inferred_connectivity[dataset][model] = {
            "mean": final_best_[(f"test/{get_metric(subset)}", "mean")].values[0],
            "std": final_best_[(f"test/{get_metric(subset)}", "std")].values[0],
        }
        # except:
        #     pass

collected_results_original_connectivity = defaultdict(dict)
# Get the best result for each dataset and model for classical case
for dataset in [
    "REDDIT-BINARY",
    "MUTAG",
    "NCI1",
    "PROTEINS",
    "PubMed",
    "Cora",
    "citeseer",
]:
    for model in collected_aggregated_results[dataset].keys():
        # try:

        col_name = "Infere Connectivity"
        a = collected_aggregated_results[dataset][model]

        a = a[a[col_name] == False]

        # Check if there are any results
        if a.empty:
            continue

        if subset["callbacks.early_stopping.mode"].unique()[0] == "max":
            ascending = False
            final_best_ = a.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)
            final_best_ = (final_best_ * 100).round(2)
        else:
            ascending = True
            final_best_ = a.sort_values(
                by=(f"val/{get_metric(subset)}", "mean"), ascending=ascending
            ).head(1)

        collected_results_original_connectivity[dataset][model] = {
            "mean": final_best_[(f"test/{get_metric(subset)}", "mean")].values[0],
            "std": final_best_[(f"test/{get_metric(subset)}", "std")].values[0],
        }
        # except:
        #     pass