In [2]:
from pathlib import Path
import pandas as pd

In [3]:
def grouped_results(merged_df, agg, target_column, join_symbol="±"):
    formating = lambda x: "{:.2f}".format(x)
    grouped_mean_formatted = merged_df.groupby(agg)[target_column].mean().map(formating)
    grouped_std_formatted = merged_df.groupby(agg)[target_column].std().map(formating)
    result_df = pd.concat([grouped_mean_formatted, grouped_std_formatted], axis=1)
    result_df.columns = ["Mean", "Std"]

    # Combine mean and std columns to display mean ± std
    result_df["Mean ± Std"] = result_df["Mean"] + f"{join_symbol}" + result_df["Std"]

    result_df["Mean ± Std"] = result_df["Mean ± Std"].map(lambda x: f"${x}$")
    return result_df


def compute_diffs(merged_df, merge_on=None, metric="avg_acc_tag"):
    if not merge_on:
        merge_on = ["num_tasks", "approach", "dataset", "seed"]
    reg_false_df = merged_df[merged_df["reg_layers"] == False]
    reg_true_df = merged_df[merged_df["reg_layers"] == True]

    # Merge the two dataframes based on 'num_classes', 'approach', 'dataset', and 'seed'
    merged_reg_diff = pd.merge(
        reg_false_df,
        reg_true_df,
        on=merge_on,
        suffixes=("_false", "_true"),
        how="inner",
    )
    merged_reg_diff[f"{metric}_diff"] = (
        merged_reg_diff[f"{metric}_true"] - merged_reg_diff[f"{metric}_false"]
    )
    return merged_reg_diff


def get_merged_df(paths):
    dfs = [pd.read_csv(path) for path in paths]

    merged_df = pd.concat(dfs, axis=0)
    return merged_df

# Big tables

In [4]:
root = Path("csvs")
convnext = root / "convnext_tiny"
paths = list(convnext.glob("imagenet*.csv"))


merged_df = get_merged_df(paths)

In [8]:
agg = ["num_tasks", "dataset", "approach", "reg_layers"]
grouped_results(merged_df, agg, "avg_acc_tag", join_symbol="\pm")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Mean,Std,Mean ± Std
num_tasks,dataset,approach,reg_layers,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10,imagenet_subset_kaggle,ewc,False,21.2,,$21.20\pmnan$
10,imagenet_subset_kaggle,ewc,True,33.88,,$33.88\pmnan$
10,imagenet_subset_kaggle,finetuning,False,17.38,,$17.38\pmnan$
10,imagenet_subset_kaggle,finetuning,True,23.64,,$23.64\pmnan$
10,imagenet_subset_kaggle,lwf,False,,,$nan\pmnan$
10,imagenet_subset_kaggle,lwf,True,43.68,,$43.68\pmnan$
10,imagenet_subset_kaggle,replay,False,34.98,,$34.98\pmnan$
10,imagenet_subset_kaggle,replay,True,34.62,,$34.62\pmnan$
20,imagenet_subset_kaggle,ewc,False,10.26,,$10.26\pmnan$
20,imagenet_subset_kaggle,ewc,True,20.78,,$20.78\pmnan$


In [10]:
agg = ["num_tasks", "dataset", "approach"]
merged_reg_diff = compute_diffs(merged_df)
grouped_results(merged_reg_diff, agg, "avg_acc_tag_diff", join_symbol="\pm")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Mean,Std,Mean ± Std
num_tasks,dataset,approach,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10,imagenet_subset_kaggle,ewc,12.68,,$12.68\pmnan$
10,imagenet_subset_kaggle,finetuning,6.26,,$6.26\pmnan$
10,imagenet_subset_kaggle,lwf,,,$nan\pmnan$
10,imagenet_subset_kaggle,replay,-0.36,,$-0.36\pmnan$
20,imagenet_subset_kaggle,ewc,10.52,,$10.52\pmnan$
20,imagenet_subset_kaggle,finetuning,4.64,,$4.64\pmnan$
20,imagenet_subset_kaggle,lwf,-1.62,,$-1.62\pmnan$
20,imagenet_subset_kaggle,replay,1.76,,$1.76\pmnan$


# Big task

In [214]:
root = Path("csvs")
convnext = root / "big_task"
paths = list(convnext.glob("*.csv"))


merged_df = get_merged_df(paths)

In [215]:
merged_df

Unnamed: 0,reg_layers,real_name,wavg_acc_tag,misc.seed,data.datasets
0,True,replay,35.87,0,c
1,True,lwf,54.09,0,c
2,True,finetuning,29.37,0,c
3,True,ewc,39.28,0,c
4,False,lwf,44.83,0,c
5,False,ewc,30.41,0,c
6,False,replay,29.15,0,c
7,False,finetuning,23.22,0,c


In [219]:
agg = ["real_name", "reg_layers"]
grouped_results(merged_df, agg, "wavg_acc_tag", join_symbol="\pm ")

Unnamed: 0_level_0,Unnamed: 1_level_0,Mean,Std,Mean ± Std
real_name,reg_layers,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ewc,False,30.41,,$30.41\pm nan$
ewc,True,39.28,,$39.28\pm nan$
finetuning,False,23.22,,$23.22\pm nan$
finetuning,True,29.37,,$29.37\pm nan$
lwf,False,44.83,,$44.83\pm nan$
lwf,True,54.09,,$54.09\pm nan$
replay,False,29.15,,$29.15\pm nan$
replay,True,35.87,,$35.87\pm nan$


In [221]:
agg = ["real_name"]
merged_reg_diff = compute_diffs(
    merged_df,
    merge_on=["real_name", "misc.seed", "data.datasets"],
    metric="wavg_acc_tag",
)
grouped_results(merged_reg_diff, agg, "wavg_acc_tag_diff", join_symbol="\pm ")

Unnamed: 0_level_0,Mean,Std,Mean ± Std
real_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ewc,8.87,,$8.87\pm nan$
finetuning,6.15,,$6.15\pm nan$
lwf,9.26,,$9.26\pm nan$
replay,6.72,,$6.72\pm nan$
