In [1]:
import pandas as pd
import wandb

api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("tunnels-ssl/05.15")

summary_list, config_list, name_list = [], [], []
for run in runs:
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict["test/avg_acc_tag"])

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame(
    {"avg_acc_tag": summary_list, "config": config_list, "name": name_list}
)

In [2]:
config_df = pd.json_normalize(runs_df["config"])
df = pd.concat([runs_df.drop(columns=["config"]), config_df], axis=1)

In [3]:
# import numpy as np


# df.loc[
#     (df["training.approach.name"] == "lwf")
#     & (df["data.exemplars.num_exemplars"] == 0)
#     & (df["data.num_tasks"] == 20)
#     & (df["model.network"] == "resnet34_skips")
#     & (df["misc.seed"] == 1)
#     & (df["training.vcreg.reg_layers"].isna()),
#     "avg_acc_tag",
# ] = np.nan

In [4]:
[column for column in df.columns if "dataset" in column]

['data.datasets', 'data.max_classes_per_dataset']

In [5]:
df["data.datasets"] = df["data.datasets"].apply(lambda x: x[0])

In [6]:
df["training.vcreg.reg_layers"].unique()

array([nan, '.*after_skipping', 'classifier$', 'fc$', '.*after_relu'],
      dtype=object)

In [19]:
from pathlib import Path
import pandas as pd

# Assuming df is your DataFrame
approaches = {
    "finetuning": ("finetuning", 0),
    "replay": ("finetuning", 2000),
    "ewc": ("ewc", 0),
    "lwf": ("lwf", 0),
}


# Filter the DataFrame for the common conditions
def get_table(network, seed, dataset):
    # fmt: off
    filtered_df = df[
        (df["model.network"] == network) 
        & (df["misc.seed"] == seed)
        & (df["data.datasets"] == dataset)
        # & (df["data.seed"] == seed)
        ]
    # fmt: on

    # Create a function to extract the relevant data
    def extract_data(approach_name, num_exemplars, reg_layers, num_tasks):
        condition = (
            (filtered_df["training.approach.name"] == approach_name)
            & (filtered_df["data.exemplars.num_exemplars"] == num_exemplars)
            & (filtered_df["data.num_tasks"] == num_tasks)
        )

        if reg_layers == "nan":
            condition &= filtered_df["training.vcreg.reg_layers"].isna()
        else:
            condition &= filtered_df["training.vcreg.reg_layers"].notna()
        assert filtered_df[condition].shape[0] <= 2
        return filtered_df[condition]["avg_acc_tag"].max()  # FIXME

    # Initialize the results list
    results = []

    # Define the parameters

    reg_layers_options = {"nan": False, "reg": True}
    num_tasks_options = [10, 20]  # FIXME

    # Extract the data
    for num_tasks in num_tasks_options:
        for approach_name, (approach, num_exemplars) in approaches.items():
            for reg_layers, is_reg_applied in reg_layers_options.items():
                avg_acc_tag = extract_data(
                    approach, num_exemplars, reg_layers, num_tasks
                )

                results.append(
                    (
                        num_tasks,
                        approach_name,
                        is_reg_applied,
                        avg_acc_tag,
                        dataset,
                        seed,
                    )
                )

    # Create a DataFrame from the results
    result_df = pd.DataFrame(
        results,
        columns=[
            "num_tasks",
            "approach",
            "reg_layers",
            "avg_acc_tag",
            "dataset",
            "seed",
        ],
    )

    save_path = Path(f"csvs/{network}/{dataset}_s{seed}.csv")
    save_path.parent.mkdir(exist_ok=True)

    result_df.to_csv(save_path, index=False)

    # Pivot the DataFrame to get the desired format
    # result_df = result_df.pivot_table(
    #     index=["num_tasks", "approach"],
    #     columns=["reg_layers"],
    #     values="avg_acc_tag",
    # )
    return result_df

In [22]:
import itertools


networks = ["convnext_tiny", "resnet34_skips"]
# seeds = [1, 2]
seeds = [0]
# datasets = ["cifar100_fixed", "imagenet_subset_kaggle"]

# Generate the Cartesian product
combinations = itertools.product(networks, seeds)

for network, seed in combinations:
    res = get_table(network, seed, "imagenet_subset_kaggle")
    break

In [23]:
res

Unnamed: 0,num_tasks,approach,reg_layers,avg_acc_tag,dataset,seed
0,10,finetuning,False,,imagenet_subset_kaggle,0
1,10,finetuning,True,23.64,imagenet_subset_kaggle,0
2,10,replay,False,,imagenet_subset_kaggle,0
3,10,replay,True,34.62,imagenet_subset_kaggle,0
4,10,ewc,False,,imagenet_subset_kaggle,0
5,10,ewc,True,33.88,imagenet_subset_kaggle,0
6,10,lwf,False,,imagenet_subset_kaggle,0
7,10,lwf,True,43.68,imagenet_subset_kaggle,0
8,20,finetuning,False,,imagenet_subset_kaggle,0
9,20,finetuning,True,12.0,imagenet_subset_kaggle,0


In [21]:
res

Unnamed: 0,num_tasks,approach,reg_layers,avg_acc_tag,dataset,seed
0,10,finetuning,False,,imagenet_subset_kaggle,0
1,10,finetuning,True,12.8,imagenet_subset_kaggle,0
2,10,replay,False,,imagenet_subset_kaggle,0
3,10,replay,True,41.26,imagenet_subset_kaggle,0
4,10,ewc,False,,imagenet_subset_kaggle,0
5,10,ewc,True,17.24,imagenet_subset_kaggle,0
6,10,lwf,False,,imagenet_subset_kaggle,0
7,10,lwf,True,38.16,imagenet_subset_kaggle,0
8,20,finetuning,False,,imagenet_subset_kaggle,0
9,20,finetuning,True,6.32,imagenet_subset_kaggle,0


In [135]:
import pandas as pd
import wandb

api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("tunnels-ssl/05.14")

summary_list, config_list, name_list = [], [], []
for run in runs:
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict["test/wavg_acc_tag"])

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame(
    {"wavg_acc_tag": summary_list, "config": config_list, "name": name_list}
)

In [136]:
config_df = pd.json_normalize(runs_df["config"])
df = pd.concat([runs_df.drop(columns=["config"]), config_df], axis=1)

In [137]:
df

Unnamed: 0,wavg_acc_tag,name,data.datasets,data.exemplars.num_exemplars,data.exemplars.exemplar_selection,data.exemplars.num_exemplars_per_class,data.extra_aug,data.num_tasks,data.batch_size,data.nc_per_task,...,training.approach.kwargs.alpha,training.approach.kwargs.fi_num_samples,training.approach.kwargs.fi_sampling_type,training.approach.kwargs.all_outputs,training.vcreg.scale,training.vcreg.cov_weight,training.vcreg.reg_layers,training.vcreg.smooth_cov,training.vcreg.var_weight,training.vcreg.n_first_task
0,29.540000,still-sunset-171,[cifar100_fixed],0,random,0,,10,128,,...,,,,,,,,,,
1,23.630000,hearty-plant-170,[cifar100_fixed],0,random,0,,20,128,,...,,,,,,,,,,
2,29.980000,eager-plant-169,[cifar100_fixed],0,random,0,,10,128,,...,,,,,,,,,,
3,40.830000,firm-firefly-168,[cifar100_fixed],0,random,0,,5,128,,...,,,,,,,,,,
4,21.780000,soft-violet-167,[cifar100_fixed],0,random,0,,20,128,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,45.533333,polished-lion-6,[imagenet_subset_kaggle],0,random,0,,10,128,,...,,,,False,False,4.72,classifier$,1.0,0.64,-1.0
166,45.200000,earthy-glitter-5,[imagenet_subset_kaggle],0,random,0,,10,128,,...,,,,False,False,1.74,classifier$,1.0,0.64,-1.0
167,41.333333,stilted-resonance-4,[imagenet_subset_kaggle],0,random,0,,10,128,,...,,,,False,False,0.64,classifier$,1.0,0.64,-1.0
168,37.933333,blooming-microwave-3,[imagenet_subset_kaggle],0,random,0,,10,128,,...,,,,False,,,,,,


In [138]:
[column for column in df.columns if "num" in column]

['data.exemplars.num_exemplars',
 'data.exemplars.num_exemplars_per_class',
 'data.num_tasks',
 'data.num_workers',
 'training.approach.kwargs.fi_num_samples']

In [139]:
# fmt: off
filtered_df = df[
    (df["data.nc_first_task"] == 50) 
    ]
# fmt: on

In [140]:
filtered_df["reg_layers"] = filtered_df["training.vcreg.reg_layers"].notna()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["reg_layers"] = filtered_df["training.vcreg.reg_layers"].notna()


In [141]:
filtered_df[["training.approach.name", "data.exemplars.num_exemplars"]]

Unnamed: 0,training.approach.name,data.exemplars.num_exemplars
95,finetuning,2000
97,lwf,0
98,finetuning,0
99,ewc,0
100,lwf,0
101,ewc,0
102,finetuning,2000
103,finetuning,0


In [142]:
def get_real_name(row):
    if row["data.exemplars.num_exemplars"] > 0:
        return "replay"
    return row["training.approach.name"]


# Apply the function to create the real_name column
filtered_df["real_name"] = filtered_df.apply(get_real_name, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["real_name"] = filtered_df.apply(get_real_name, axis=1)


In [144]:
filtered_df["data.datasets"] = filtered_df["data.datasets"].apply(lambda x: x[0])
filtered_df[
    ["reg_layers", "real_name", "wavg_acc_tag", "misc.seed", "data.datasets"]
].to_csv("big_task.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["data.datasets"] = filtered_df["data.datasets"].apply(lambda x: x[0])


In [146]:
to_dump = filtered_df[
    ["reg_layers", "real_name", "wavg_acc_tag", "misc.seed", "data.datasets"]
]
to_dump

Unnamed: 0,reg_layers,real_name,wavg_acc_tag,misc.seed,data.datasets
95,True,replay,35.87,0,c
97,True,lwf,54.09,0,c
98,True,finetuning,29.37,0,c
99,True,ewc,39.28,0,c
100,False,lwf,44.83,0,c
101,False,ewc,30.41,0,c
102,False,replay,29.15,0,c
103,False,finetuning,23.22,0,c


In [147]:
to_dump.to_csv("big_task.csv", index=False)