In [16]:
import pandas as pd
import wandb

api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("tunnels-ssl/05.14")

summary_list, config_list, name_list = [], [], []
for run in runs:
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict["test/avg_acc_tag"])

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame(
    {"avg_acc_tag": summary_list, "config": config_list, "name": name_list}
)

In [17]:
config_df = pd.json_normalize(runs_df["config"])
df = pd.concat([runs_df.drop(columns=["config"]), config_df], axis=1)

In [18]:
import numpy as np


df.loc[
    (df["training.approach.name"] == "lwf")
    & (df["data.exemplars.num_exemplars"] == 0)
    & (df["data.num_tasks"] == 20)
    & (df["model.network"] == "resnet34_skips")
    & (df["misc.seed"] == 1)
    & (df["training.vcreg.reg_layers"].isna()),
    "avg_acc_tag",
] = np.nan

In [19]:
[column for column in df.columns if "dataset" in column]

['data.datasets', 'data.max_classes_per_dataset']

In [20]:
df["data.datasets"] = df["data.datasets"].apply(lambda x: x[0])

In [21]:
df["training.vcreg.reg_layers"].unique()

array([nan, 'fc$', 'classifier$', '.*after_relu', '.*after_skipping'],
      dtype=object)

In [32]:
from pathlib import Path
import pandas as pd

# Assuming df is your DataFrame


# Filter the DataFrame for the common conditions
def get_table(network, seed, dataset):
    # fmt: off
    filtered_df = df[
        (df["model.network"] == network) 
        & (df["misc.seed"] == seed)
        & (df["data.datasets"] == dataset)
        # & (df["data.seed"] == seed)
        ]
    # fmt: on

    # Create a function to extract the relevant data
    def extract_data(approach_name, num_exemplars, reg_layers, num_tasks):
        condition = (
            (filtered_df["training.approach.name"] == approach_name)
            & (filtered_df["data.exemplars.num_exemplars"] == num_exemplars)
            & (filtered_df["data.num_tasks"] == num_tasks)
        )

        if reg_layers == "nan":
            condition &= filtered_df["training.vcreg.reg_layers"].isna()
        else:
            condition &= filtered_df["training.vcreg.reg_layers"].notna()
        return filtered_df[condition]["avg_acc_tag"].item()

    # Initialize the results list
    results = []

    # Define the parameters
    approaches = {
        "finetuning": ("finetuning", 0),
        "replay": ("finetuning", 2000),
        "ewc": ("ewc", 0),
        "lwf": ("lwf", 0),
    }
    reg_layers_options = {"nan": False, "reg": True}
    num_tasks_options = [5, 10, 20]

    # Extract the data
    for num_tasks in num_tasks_options:
        for approach_name, (approach, num_exemplars) in approaches.items():
            for reg_layers, is_reg_applied in reg_layers_options.items():
                avg_acc_tag = extract_data(
                    approach, num_exemplars, reg_layers, num_tasks
                )

                results.append(
                    (
                        num_tasks,
                        approach_name,
                        is_reg_applied,
                        avg_acc_tag,
                        dataset,
                        seed,
                    )
                )

    # Create a DataFrame from the results
    result_df = pd.DataFrame(
        results,
        columns=[
            "num_tasks",
            "approach",
            "reg_layers",
            "avg_acc_tag",
            "dataset",
            "seed",
        ],
    )

    save_path = Path(f"csvs/{network}/{dataset}_s{seed}.csv")
    save_path.parent.mkdir(exist_ok=True)

    result_df.to_csv(save_path, index=False)

    # Pivot the DataFrame to get the desired format
    # result_df = result_df.pivot_table(
    #     index=["num_tasks", "approach"],
    #     columns=["reg_layers"],
    #     values="avg_acc_tag",
    # )
    return result_df

In [33]:
import itertools


networks = ["convnext_tiny", "resnet34_skips"]
seeds = [1, 2]
# datasets = ["cifar100_fixed", "imagenet_subset_kaggle"]

# Generate the Cartesian product
combinations = itertools.product(networks, seeds)

for network, seed in combinations:
    res = get_table(network, seed, "cifar100_fixed")

Unnamed: 0,num_tasks,approach,reg_layers,avg_acc_tag,dataset
0,5,finetuning,False,20.07,cifar100_fixed
1,5,finetuning,True,27.67,cifar100_fixed
2,5,replay,False,39.37,cifar100_fixed
3,5,replay,True,43.68,cifar100_fixed
4,5,ewc,False,23.16,cifar100_fixed
5,5,ewc,True,33.66,cifar100_fixed
6,5,lwf,False,38.92,cifar100_fixed
7,5,lwf,True,49.51,cifar100_fixed
8,10,finetuning,False,11.49,cifar100_fixed
9,10,finetuning,True,13.69,cifar100_fixed
