In [19]:
len(broken_params)

11

In [83]:
import pandas as pd
import wandb

api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("tunnels-ssl/05.20")

summary_list, config_list, name_list = [], [], []
for run in runs:
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict["test/avg_acc_tag"])

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame(
    {"avg_acc_tag": summary_list, "config": config_list, "name": name_list}
)
config_df = pd.json_normalize(runs_df["config"])
df = pd.concat([runs_df.drop(columns=["config"]), config_df], axis=1)
df["data.datasets"] = df["data.datasets"].apply(lambda x: x[0])

In [85]:
filtered_df = df[
    (df["model.network"] == "resnet34_skips")
    & (df["training.vcreg.var_weight"] == 0.64)
    & (df["data.datasets"].str.contains("imagenet"))
    & (df["training.approach.name"] == "ewc")
    & (df["data.stop_at_task"] == 0)
    & (df["training.nepochs"] == 100)
    & (df["training.vcreg.n_first_task"] == -1)
]

filtered = filtered_df[
    [
        "training.approach.name",
        "training.vcreg.reg_layers",
        "training.vcreg.var_weight",
        "training.vcreg.cov_weight",
        "data.num_tasks",
        "model.network",
        "misc.seed",
        "avg_acc_tag",
    ]
]

# filtered.to_csv("ewc_imagent_seeds.csv", mode="a", index=False)

filtered

Unnamed: 0,training.approach.name,training.vcreg.reg_layers,training.vcreg.var_weight,training.vcreg.cov_weight,data.num_tasks,model.network,misc.seed,avg_acc_tag
4,ewc,.*after_relu,0.64,12.8,20,resnet34_skips,2,3.9
5,ewc,.*after_relu,0.64,12.8,20,resnet34_skips,1,5.4
6,ewc,.*after_relu,0.64,12.8,10,resnet34_skips,2,15.64
7,ewc,.*after_relu,0.64,12.8,10,resnet34_skips,1,14.74


In [68]:
import wandb
import pandas as pd
import matplotlib.pyplot as plt

# Initialize the API
api = wandb.Api()

# Replace 'username/project_name/run_id' with your specific run path
runs = api.runs("tunnels-ssl/05.14")
run = runs[0]
# run = api.run("username/project_name/run_id")

# Retrieve the history for the specific metric
history = run.history(keys=["test_acc_taw/t_0"], x_axis="_step")

# Convert to DataFrame for easier handling
# df = pd.DataFrame(history)

# # Plot the metric
# plt.figure(figsize=(10, 5))
# plt.plot(df["_step"], df["test_acc_taw/t_0"], label="test_acc_taw/t_0")
# plt.xlabel("Step")
# plt.ylabel("Test Accuracy")
# plt.title("Test Accuracy over Steps")
# plt.legend()
# plt.show()

In [25]:
history

Unnamed: 0,_step,test_acc_taw/t_0
0,1003,70.7
1,2277,70.9
2,3551,71.0
3,4825,71.6
4,6099,70.8
5,7373,71.3
6,8647,70.4
7,9921,70.4
8,11195,69.8
9,12469,68.9


In [11]:
hist = run.history()

In [18]:
hist[hist["test_acc_taw/t_0"].notna()]["test_acc_taw/t_0"]

148    73.8
489    35.0
Name: test_acc_taw/t_0, dtype: float64

In [26]:
filtered_df = df[
    (df["model.network"].str.contains("resnet"))
    & (df["data.datasets"].str.contains("cifar"))
    & (df["training.approach.name"] == "finetuning")
    & (df["data.exemplars.num_exemplars"] == 0)
    & (df["data.num_tasks"] == 5)
    # & (df["training.vcreg.n_first_task"] == 1)
]
vals = filtered_df[
    [
        "misc.seed",
        "training.vcreg.reg_layers",
        "misc.results_path",
        "training.vcreg.n_first_task",
    ]
].values

In [27]:
vals

array([], shape=(0, 4), dtype=object)

In [None]:
{
    "finetuning_first_task_reg": "results/2024/04.24/13-35-10/1",
    "resnet_finetuning_s:0_reg:True": "results/2024/04.24/13-35-10/0",
    "resnet_finetuning_s:0_reg:False": "results/2024/04.17/18-13-25/0",
    "resnet_finetuning_s:2_reg:False": "results/2024/05.14/13-28-06/1",
    "resnet_finetuning_s:1_reg:False": "results/2024/05.14/13-28-06/0",
    "resnet_finetuning_s:2_reg:True": "results/2024/05.14/13-27-53/19",
    "resnet_finetuning_s:1_reg:True": "results/2024/05.14/13-27-53/18",
}

In [18]:
import numpy as np

results = {}
for val in vals:
    results[f"resnet_finetuning_s:{val[0]}_reg:{not 'nan' == str(val[1])}"] = val[2]

In [19]:
results
{
    "resnet_finetuning_s:2_reg:False": "results/2024/05.14/13-28-06/1",
    "resnet_finetuning_s:1_reg:False": "results/2024/05.14/13-28-06/0",
    "resnet_finetuning_s:2_reg:True": "results/2024/05.14/13-27-53/19",
    "resnet_finetuning_s:1_reg:True": "results/2024/05.14/13-27-53/18",
}

{'resnet_finetuning_s:2_reg:False': 'results/2024/05.14/13-28-06/1',
 'resnet_finetuning_s:1_reg:False': 'results/2024/05.14/13-28-06/0',
 'resnet_finetuning_s:2_reg:True': 'results/2024/05.14/13-27-53/19',
 'resnet_finetuning_s:1_reg:True': 'results/2024/05.14/13-27-53/18'}

In [43]:
vals

array([[2, nan, 'results/2024/05.14/13-28-06/7'],
       [1, nan, 'results/2024/05.14/13-28-06/6'],
       [2, 'classifier$', 'results/2024/05.14/13-27-53/13'],
       [1, 'classifier$', 'results/2024/05.14/13-27-53/12'],
       [0, 'classifier$', 'results/2024/05.14/13-27-15/1'],
       [0, nan, 'results/2024/05.14/13-27-08/0']], dtype=object)

In [None]:
results

In [None]:
results

In [7]:
for data in range(5):
    for task in range(5):
        if data > task:
            continue
        print(f"{data=} {task=}")

data=0 task=0
data=0 task=1
data=0 task=2
data=0 task=3
data=0 task=4
data=1 task=1
data=1 task=2
data=1 task=3
data=1 task=4
data=2 task=2
data=2 task=3
data=2 task=4
data=3 task=3
data=3 task=4
data=4 task=4


In [91]:
from matplotlib.colors import LinearSegmentedColormap
import pandas as pd
import wandb

from typing import Optional
from itertools import product
from matplotlib import pyplot as plt
import seaborn as sns


# Project is specified by <entity/project-name>
def get_real_name(row):
    if row["data.exemplars.num_exemplars"] > 0:
        return "replay"
    return row["training.approach.name"]


def create_df_from_api(api, project_path):
    runs = api.runs(project_path)

    summary_list, config_list, name_list = [], [], []
    for run in runs:
        if run.state == "running":
            continue
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files
        summary_list.append(run.summary._json_dict["test/avg_acc_tag"])

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k, v in run.config.items() if not k.startswith("_")}
        )

        # .name is the human-readable name of the run.
        name_list.append(run.name)

    runs_df = pd.DataFrame(
        {"avg_acc_tag": summary_list, "config": config_list, "name": name_list}
    )

    config_df = pd.json_normalize(runs_df["config"])
    df = pd.concat([runs_df.drop(columns=["config"]), config_df], axis=1)
    df["data.datasets"] = df["data.datasets"].apply(lambda x: x[0])
    df["real_name"] = df.apply(get_real_name, axis=1)

    return df

In [92]:
api = wandb.Api()
# project_path = "tunnels-ssl/07.08"
# df1 = create_df_from_api(api, project_path)
project_path = "tunnels-ssl/07.11"
df = create_df_from_api(api, project_path)
df = df[
    ~((df["data.datasets"] == "imagenet_subset_kaggle") & (df["data.num_tasks"] == 5))
]

In [93]:
approaches_idx = ["finetuning", "replay", "ewc", "lwf"]
networks_idx = ["resnet34_skips", "convnext_tiny"]
idx = pd.MultiIndex.from_product(
    [networks_idx, approaches_idx], names=["network", "approach"]
)

pivotA = df.pivot_table(
    values="avg_acc_tag",
    index=["model.network", "real_name"],
    columns=["data.datasets", "data.num_tasks"],
    aggfunc=max,
).reindex(idx)

pivotA

Unnamed: 0_level_0,data.datasets,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,data.num_tasks,5,10,20,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
resnet34_skips,finetuning,25.74,15.81,10.35,14.92,8.26
resnet34_skips,replay,,,,41.14,38.46
resnet34_skips,ewc,29.13,18.33,12.69,20.84,13.56
resnet34_skips,lwf,50.15,38.42,19.84,37.38,21.66
convnext_tiny,finetuning,24.24,13.49,8.9,16.66,9.28
convnext_tiny,replay,,,,34.24,28.74
convnext_tiny,ewc,,,,23.82,12.76
convnext_tiny,lwf,,,,42.98,27.06


In [6]:
import pandas as pd
import numpy as np

# Step 1: Organize your raw data into a structured format
data = {
    "network": [
        "resnet34_skips",
        "resnet34_skips",
        "resnet34_skips",
        "resnet34_skips",
        "convnext_tiny",
        "convnext_tiny",
        "convnext_tiny",
        "convnext_tiny",
    ],
    "approach": [
        "finetuning",
        "replay",
        "ewc",
        "lwf",
        "finetuning",
        "replay",
        "ewc",
        "lwf",
    ],
    "cifar100_fixed_5": [
        "27.71 ±1.14",
        "43.62 ±0.14",
        "33.50 ±0.47",
        "50.30 ±0.74",
        "30.71 ±0.68",
        "37.30 ±1.14",
        "33.82 ±0.32",
        "49.38 ±0.43",
    ],
    "cifar100_fixed_10": [
        "13.58 ±0.18",
        "37.08 ±0.11",
        "17.58 ±0.37",
        "37.63 ±0.50",
        "20.46 ±0.12",
        "32.05 ±0.85",
        "24.11 ±0.42",
        "39.69 ±0.55",
    ],
    "cifar100_fixed_20": [
        "7.90 ±0.48",
        "32.45 ±0.28",
        "12.23 ±0.78",
        "20.68 ±0.64",
        "13.68 ±0.45",
        "30.22 ±0.09",
        "15.85 ±0.65",
        "31.53 ±0.91",
    ],
    "imagenet_subset_kaggle_10": [
        "12.42 ±0.90",
        "41.29 ±0.06",
        "16.67 ±0.74",
        "37.97 ±0.85",
        "23.87 ±0.63",
        "34.71 ±0.89",
        "33.90 ±0.61",
        "42.91 ±0.31",
    ],
    "imagenet_subset_kaggle_20": [
        "5.26 ±0.3",
        "37.57 ±0.42",
        "5.28 ±0.07",
        "21.17 ±0.40",
        "12.86 ±0.75",
        "29.91 ±1.19",
        "21.34 ±0.84",
        "27.56 ±0.27",
    ],
}

# Step 2: Create a DataFrame
df = pd.DataFrame(data)

# Step 3: Clean the data by removing the `±` part
for col in df.columns[2:]:
    df[col] = df[col].str.extract(r"(\d+\.\d+)").astype(float)

# Step 4: Create a pivot table
pivot_table = pd.pivot_table(
    df,
    values=[
        "cifar100_fixed_5",
        "cifar100_fixed_10",
        "cifar100_fixed_20",
        "imagenet_subset_kaggle_10",
        "imagenet_subset_kaggle_20",
    ],
    index=["network", "approach"],
)

# # Step 5: Adjust the pivot table format to match the desired output
pivot_table.columns = pd.MultiIndex.from_tuples(
    [
        ("cifar100_fixed", 10),
        ("cifar100_fixed", 20),
        ("cifar100_fixed", 5),
        ("imagenet_subset_kaggle", 10),
        ("imagenet_subset_kaggle", 20),
    ]
)
pivot_table = pivot_table.sort_index(axis=1)
# # pivot_table = pivot_table.sort_index(level=['network', 'approach'])
mix_reg = pivot_table.reindex(idx)

In [7]:
import pandas as pd
import numpy as np

# Step 1: Organize your raw data into a structured format
data = {
    "network": [
        "resnet34_skips",
        "resnet34_skips",
        "resnet34_skips",
        "resnet34_skips",
        "convnext_tiny",
        "convnext_tiny",
        "convnext_tiny",
        "convnext_tiny",
    ],
    "approach": [
        "finetuning",
        "replay",
        "ewc",
        "lwf",
        "finetuning",
        "replay",
        "ewc",
        "lwf",
    ],
    "cifar100_fixed_5": [
        "20.55 ±0.42",
        "40.63 ±1.09",
        "23.10 ±0.63",
        "39.26 ±0.29",
        "26.21 ±0.85",
        "34.53 ±0.30",
        "29.57 ±0.03",
        "40.70 ±0.12",
    ],
    "cifar100_fixed_10": [
        "11.54 ±0.13",
        "35.42 ±2.84",
        "12.22 ±0.14",
        "25.45 ±1.18",
        "15.31 ±0.45",
        "28.59 ±0.16",
        "16.92 ±0.67",
        "29.83 ±0.25",
    ],
    "cifar100_fixed_20": [
        "5.03 ±0.75",
        "31.60 ±1.89",
        "4.91 ±2.91",
        "16.89 ±1.03",
        "8.19 ±0.33",
        "25.42 ±0.31",
        "8.87 ±0.65",
        "23.01 ±1.07",
    ],
    "imagenet_subset_kaggle_10": [
        "11.52 ±0.14",
        "38.59 ±0.88",
        "13.73 ±0.20",
        "35.47 ±1.22",
        "17.29 ±0.37",
        "34.55 ±1.05",
        "21.65 ±0.86",
        "43.73 ±0.57",
    ],
    "imagenet_subset_kaggle_20": [
        "6.08 ±0.30",
        "35.00 ±2.15",
        "7.01 ±0.15",
        "19.61 ±0.46",
        "7.37 ±0.01",
        "29.78 ±0.91",
        "10.81 ±0.48",
        "29.60 ±0.87",
    ],
}

# Step 2: Create a DataFrame
df = pd.DataFrame(data)

# Step 3: Clean the data by removing the `±` part
for col in df.columns[2:]:
    df[col] = df[col].str.extract(r"(\d+\.\d+)").astype(float)

# Step 4: Create a pivot table
pivot_table = pd.pivot_table(
    df,
    values=[
        "cifar100_fixed_5",
        "cifar100_fixed_10",
        "cifar100_fixed_20",
        "imagenet_subset_kaggle_10",
        "imagenet_subset_kaggle_20",
    ],
    index=["network", "approach"],
)

# # Step 5: Adjust the pivot table format to match the desired output
pivot_table.columns = pd.MultiIndex.from_tuples(
    [
        ("cifar100_fixed", 10),
        ("cifar100_fixed", 20),
        ("cifar100_fixed", 5),
        ("imagenet_subset_kaggle", 10),
        ("imagenet_subset_kaggle", 20),
    ]
)
pivot_table = pivot_table.sort_index(axis=1)
# # pivot_table = pivot_table.sort_index(level=['network', 'approach'])
noreg = pivot_table.reindex(idx)
noreg

Unnamed: 0_level_0,Unnamed: 1_level_0,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,Unnamed: 1_level_1,5,10,20,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
resnet34_skips,finetuning,20.55,11.54,5.03,11.52,6.08
resnet34_skips,replay,40.63,35.42,31.6,38.59,35.0
resnet34_skips,ewc,23.1,12.22,4.91,13.73,7.01
resnet34_skips,lwf,39.26,25.45,16.89,35.47,19.61
convnext_tiny,finetuning,26.21,15.31,8.19,17.29,7.37
convnext_tiny,replay,34.53,28.59,25.42,34.55,29.78
convnext_tiny,ewc,29.57,16.92,8.87,21.65,10.81
convnext_tiny,lwf,40.7,29.83,23.01,43.73,29.6


In [94]:
pivotA

Unnamed: 0_level_0,data.datasets,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,data.num_tasks,5,10,20,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
resnet34_skips,finetuning,25.74,15.81,10.35,14.92,8.26
resnet34_skips,replay,,,,41.14,38.46
resnet34_skips,ewc,29.13,18.33,12.69,20.84,13.56
resnet34_skips,lwf,50.15,38.42,19.84,37.38,21.66
convnext_tiny,finetuning,24.24,13.49,8.9,16.66,9.28
convnext_tiny,replay,,,,34.24,28.74
convnext_tiny,ewc,,,,23.82,12.76
convnext_tiny,lwf,,,,42.98,27.06


In [77]:
pivotA - mix_reg

Unnamed: 0_level_0,data.datasets,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,data.num_tasks,5,10,20,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
resnet34_skips,finetuning,-1.97,2.23,2.45,2.5,3.0
resnet34_skips,replay,,,,-0.15,0.89
resnet34_skips,ewc,-4.37,0.75,0.46,4.17,8.28
resnet34_skips,lwf,-0.15,0.79,-0.84,-0.59,0.49
convnext_tiny,finetuning,-6.47,-6.97,-4.78,-7.21,-3.58
convnext_tiny,replay,,,,-0.47,-1.17
convnext_tiny,ewc,,,,-10.08,-8.58
convnext_tiny,lwf,,,,0.07,-0.5


In [95]:
pivotA - noreg

Unnamed: 0_level_0,data.datasets,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,data.num_tasks,5,10,20,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
resnet34_skips,finetuning,5.19,4.27,5.32,3.4,2.18
resnet34_skips,replay,,,,2.55,3.46
resnet34_skips,ewc,6.03,6.11,7.78,7.11,6.55
resnet34_skips,lwf,10.89,12.97,2.95,1.91,2.05
convnext_tiny,finetuning,-1.97,-1.82,0.71,-0.63,1.91
convnext_tiny,replay,,,,-0.31,-1.04
convnext_tiny,ewc,,,,2.17,1.95
convnext_tiny,lwf,,,,-0.75,-2.54


In [1]:
%cd /net/tscratch/people/plghpiotr/cl-adaptation

/net/tscratch/people/plghpiotr/cl-adaptation


In [2]:
import pandas as pd
from src.wandb_wrapper.wandb_wrapper import WandbWrapper


wrapper = WandbWrapper()


old_df = wrapper.create_df_from_project("tunnels-ssl/05.15")

old_df = old_df[
    (old_df["data.num_tasks"].isin([5, 10, 20]))
    & (old_df["training.vcreg.reg_layers"].str.contains(".*", regex=False))
]


approaches_idx = ["finetuning", "replay", "ewc", "lwf"]
networks_idx = ["resnet34_skips", "convnext_tiny"]
idx = pd.MultiIndex.from_product(
    [networks_idx, approaches_idx], names=["network", "approach"]
)

old_pivot = old_df.pivot_table(
    values="avg_acc_tag",
    index=["model.network", "real_name"],
    columns=["data.datasets", "data.num_tasks"],
    # aggfunc=list,
).reindex(idx)

In [6]:
from collections import namedtuple

RegInfo = namedtuple("RegInfo", ["network", "dataset", "var_weight", "cov_weight"])
regs = [
    RegInfo("resnet34_skips", "cifar100_fixed", 12.8, 4.72),
    RegInfo("resnet34_skips", "imagenet_subset_kaggle", 12.8, 12.8),
    RegInfo("convnext_tiny", "cifar100_fixed", 12.8, 0.64),
    RegInfo("convnext_tiny", "imagenet_subset_kaggle", 1.74, 12.8),
]


def filter_out_df(df: pd.DataFrame) -> pd.DataFrame:
    return df.loc[
        df.set_index(
            [
                "model.network",
                "data.datasets",
                "training.vcreg.var_weight",
                "training.vcreg.cov_weight",
            ]
        ).index.isin(regs)
    ]

In [3]:
df = wrapper.create_df_from_project("tunnels-ssl/07.11")

df = df[
    (df["data.num_tasks"].isin([10, 20]))
    & (df["training.vcreg.reg_layers"].str.contains(".*", regex=False))
    & (df["data.datasets"].str.contains("imagenet", regex=False))
]

df = filter_out_df(df)


approaches_idx = ["finetuning", "replay", "ewc", "lwf"]
networks_idx = ["resnet34_skips", "convnext_tiny"]
idx = pd.MultiIndex.from_product(
    [networks_idx, approaches_idx], names=["network", "approach"]
)

corrected_mistake_pivot = df.pivot_table(
    values="avg_acc_tag",
    index=["model.network", "real_name"],
    columns=["data.datasets", "data.num_tasks"],
    aggfunc=lambda x: x,
).reindex(idx)

NameError: name 'wrapper' is not defined

In [129]:
corrected_mistake_pivot

Unnamed: 0_level_0,data.datasets,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,data.num_tasks,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2
resnet34_skips,finetuning,14.92,8.26
resnet34_skips,replay,40.36,35.32
resnet34_skips,ewc,20.84,13.56
resnet34_skips,lwf,36.26,18.68
convnext_tiny,finetuning,16.66,9.28
convnext_tiny,replay,33.54,28.3
convnext_tiny,ewc,21.96,11.4
convnext_tiny,lwf,41.76,26.54


In [1]:
df = wrapper.create_df_from_project("tunnels-ssl/07.17")

df = df[
    (df["data.num_tasks"].isin([5, 10, 20]))
    & (df["training.vcreg.reg_layers"].str.contains(".*", regex=False))
    # & (df["data.datasets"].str.contains("imagenet", regex=False))
]

df = filter_out_df(df)


approaches_idx = ["finetuning", "replay", "ewc", "lwf"]
networks_idx = ["resnet34_skips", "convnext_tiny"]
idx = pd.MultiIndex.from_product(
    [networks_idx, approaches_idx], names=["network", "approach"]
)

cifar_pivot = df.pivot_table(
    values="avg_acc_tag",
    index=["model.network", "real_name"],
    columns=["data.datasets", "data.num_tasks"],
    aggfunc=lambda x: x,
).reindex(idx)
cifar_pivot

NameError: name 'wrapper' is not defined

In [None]:
corrected_mistake_pivot

Unnamed: 0_level_0,data.datasets,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,data.num_tasks,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2
resnet34_skips,finetuning,14.92,8.26
resnet34_skips,replay,40.36,35.32
resnet34_skips,ewc,20.84,13.56
resnet34_skips,lwf,36.26,18.68
convnext_tiny,finetuning,16.66,9.28
convnext_tiny,replay,33.54,28.3
convnext_tiny,ewc,21.96,11.4
convnext_tiny,lwf,41.76,26.54


In [2]:
corrected_pivot = pd.merge(
    cifar_pivot, corrected_mistake_pivot, left_index=True, right_index=True
)
corrected_pivot

NameError: name 'pd' is not defined

In [3]:
def color_negative_red(val):
    color = "red" if val <= 0 else "green" if val > 0 else "black"
    return f"color: {color}"

In [4]:
(corrected_pivot - noreg).style.applymap(color_negative_red).format("{:.2f}")

NameError: name 'corrected_pivot' is not defined

In [5]:
(corrected_pivot - mix_reg).style.applymap(color_negative_red).format("{:.2f}")

NameError: name 'corrected_pivot' is not defined

In [6]:
df = wrapper.create_df_from_project("tunnels-ssl/07.18")

df = df[
    (df["data.num_tasks"].isin([5, 10, 20]))
    & (df["data.stop_at_task"] == 0)
    & (df["training.vcreg.reg_layers"].str.contains(r"\.\*.*\$"))
    # & (df["data.datasets"].str.contains("imagenet", regex=False))
]

# df = filter_out_df(df)
# df


approaches_idx = ["finetuning", "replay", "ewc", "lwf"]
networks_idx = ["resnet34_skips", "convnext_tiny"]
idx = pd.MultiIndex.from_product(
    [networks_idx, approaches_idx], names=["network", "approach"]
)

cifar_pivot = df.pivot_table(
    values="avg_acc_tag",
    index=["model.network", "real_name"],
    columns=["data.datasets", "data.num_tasks"],
    aggfunc=max,
).reindex(idx)
cifar_pivot

NameError: name 'wrapper' is not defined

In [7]:
(cifar_pivot - mix_reg).style.applymap(color_negative_red).format("{:.2f}")

NameError: name 'cifar_pivot' is not defined

In [9]:
(cifar_pivot - noreg).style.applymap(color_negative_red).format("{:.2f}")

Unnamed: 0_level_0,data.datasets,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,data.num_tasks,5,10,20,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
resnet34_skips,finetuning,5.44,3.86,5.91,2.94,1.94
resnet34_skips,replay,-1.6,-5.27,-9.94,1.65,2.78
resnet34_skips,ewc,6.0,6.76,7.18,8.61,1.99
resnet34_skips,lwf,11.13,12.96,2.43,-2.37,
convnext_tiny,finetuning,2.23,2.66,4.62,2.45,4.53
convnext_tiny,replay,0.19,1.7,-0.62,0.01,
convnext_tiny,ewc,3.31,4.09,5.91,,
convnext_tiny,lwf,5.07,8.48,8.11,-1.51,-3.92


## 29.08.2024

In [3]:
%cd /net/tscratch/people/plghpiotr/cl-adaptation

/net/tscratch/people/plghpiotr/cl-adaptation


In [7]:
from src.wandb_wrapper.wandb_wrapper import WandbWrapper
import pandas as pd
import numpy as np


wrapper = WandbWrapper()


df_intermediate = wrapper.create_df_from_project("tunnels-ssl/08.16")
df_intermediate = df_intermediate[(df_intermediate["data.nc_first_task"].isna())]

In [15]:
def parse_intermediate(df: pd.DataFrame) -> pd.DataFrame:
    approaches_idx = ["finetuning", "replay", "ewc", "lwf"]
    networks_idx = ["resnet34_skips", "convnext_tiny"]
    idx = pd.MultiIndex.from_product(
        [networks_idx, approaches_idx], names=["network", "approach"]
    )
    df = df.sort_values(by="misc.seed")
    pivot = df.pivot_table(
        values="avg_acc_tag",
        index=["model.network", "real_name"],
        columns=["data.datasets", "data.num_tasks", "misc.seed"],
        aggfunc=lambda x: x,
    ).reindex(idx)

    return pivot


intermediate_pivot = parse_intermediate(df_intermediate)
intermediate_pivot

Unnamed: 0_level_0,data.datasets,cifar100_fixed,cifar100_fixed,cifar100_fixed,cifar100_fixed,cifar100_fixed,cifar100_fixed,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle,imagenet_subset_kaggle,imagenet_subset_kaggle,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,data.num_tasks,5,5,5,10,10,10,20,20,20,10,10,10,20,20,20
Unnamed: 0_level_2,misc.seed,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2
network,approach,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
resnet34_skips,finetuning,25.47,25.1,25.49,14.8,15.3,15.21,10.35,10.61,10.67,14.92,14.44,14.7,8.26,8.36,9.44
resnet34_skips,replay,38.06,38.71,37.77,30.33,29.77,30.01,21.97,21.8,20.98,40.36,40.28,40.62,35.32,36.18,35.64
resnet34_skips,ewc,29.13,29.34,28.61,17.28,17.77,17.26,12.52,12.08,12.91,20.84,20.4,21.64,13.56,13.18,12.84
resnet34_skips,lwf,50.15,51.16,50.56,38.42,38.69,38.15,19.84,19.19,19.24,36.26,34.46,36.0,18.68,18.46,19.12
convnext_tiny,finetuning,24.24,25.06,24.69,13.49,13.55,14.12,8.9,8.83,9.02,16.66,14.84,15.44,9.28,7.38,7.88
convnext_tiny,replay,34.88,34.11,34.14,28.31,27.03,27.44,21.74,21.44,21.64,33.54,31.72,33.32,28.3,27.56,27.3
convnext_tiny,ewc,30.28,30.64,30.6,17.34,18.37,18.42,10.98,9.66,10.34,21.96,22.06,21.6,11.4,10.4,11.3
convnext_tiny,lwf,49.66,48.41,48.36,40.71,37.55,40.25,31.4,31.44,31.01,41.76,44.12,42.2,26.54,26.94,27.82


In [35]:
import numpy as np

approaches_idx = ["finetuning", "replay", "ewc", "lwf"]
networks_idx = ["resnet34_skips", "convnext_tiny"]
idx = pd.MultiIndex.from_product(
    [networks_idx, approaches_idx], names=["network", "approach"]
)

mean_std_intermediate = df_intermediate.pivot_table(
    values="avg_acc_tag",
    index=["model.network", "real_name"],
    columns=["data.datasets", "data.num_tasks"],
    aggfunc=lambda x: f"${np.mean(x):.2f}$ \tiny{{$\pm{np.std(x):.2f}$}}",
).reindex(idx)


mean_std_intermediate

Unnamed: 0_level_0,data.datasets,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,data.num_tasks,5,10,20,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
resnet34_skips,finetuning,$25.35$ \tiny{$\pm0.18$},$15.10$ \tiny{$\pm0.22$},$10.54$ \tiny{$\pm0.14$},$14.69$ \tiny{$\pm0.20$},$8.69$ \tiny{$\pm0.53$}
resnet34_skips,replay,$38.18$ \tiny{$\pm0.39$},$30.04$ \tiny{$\pm0.23$},$21.58$ \tiny{$\pm0.43$},$40.42$ \tiny{$\pm0.15$},$35.71$ \tiny{$\pm0.35$}
resnet34_skips,ewc,$29.03$ \tiny{$\pm0.31$},$17.44$ \tiny{$\pm0.24$},$12.50$ \tiny{$\pm0.34$},$20.96$ \tiny{$\pm0.51$},$13.19$ \tiny{$\pm0.29$}
resnet34_skips,lwf,$50.62$ \tiny{$\pm0.41$},$38.42$ \tiny{$\pm0.22$},$19.42$ \tiny{$\pm0.30$},$35.57$ \tiny{$\pm0.79$},$18.75$ \tiny{$\pm0.27$}
convnext_tiny,finetuning,$24.66$ \tiny{$\pm0.34$},$13.72$ \tiny{$\pm0.28$},$8.92$ \tiny{$\pm0.08$},$15.65$ \tiny{$\pm0.76$},$8.18$ \tiny{$\pm0.80$}
convnext_tiny,replay,$34.38$ \tiny{$\pm0.36$},$27.59$ \tiny{$\pm0.53$},$21.61$ \tiny{$\pm0.12$},$32.86$ \tiny{$\pm0.81$},$27.72$ \tiny{$\pm0.42$}
convnext_tiny,ewc,$30.51$ \tiny{$\pm0.16$},$18.04$ \tiny{$\pm0.50$},$10.33$ \tiny{$\pm0.54$},$21.87$ \tiny{$\pm0.20$},$11.03$ \tiny{$\pm0.45$}
convnext_tiny,lwf,$48.81$ \tiny{$\pm0.60$},$39.50$ \tiny{$\pm1.39$},$31.28$ \tiny{$\pm0.19$},$42.69$ \tiny{$\pm1.02$},$27.10$ \tiny{$\pm0.53$}


In [28]:
from pathlib import Path
from functools import reduce
from typing import List

dirs = ["resnet34_skips", "convnext_tiny"]


def get_paths(dirs: List[str]) -> List[Path]:
    result = []
    for dir_ in dirs:
        root = Path("notebooks", "csvs_new")
        dir_path = root / dir_
        paths = list(dir_path.glob("*.csv"))
        result.extend(paths)
    return result


def get_merged_df(paths: List[Path]):
    dfs = []
    for path in paths:
        df = pd.read_csv(path)
        df["network"] = path.parent.name
        dfs.append(df)

    merged_df = pd.concat(dfs, axis=0)
    return merged_df


def apply(x, f):
    return f(x)


def compose(*callables):
    return lambda x: reduce(apply, callables, x)

In [38]:
df = compose(
    get_paths,
    get_merged_df,
)(dirs)
df

Unnamed: 0,num_tasks,approach,reg_layers,avg_acc_tag,dataset,seed,network
0,10,finetuning,False,11.36,imagenet_subset_kaggle,2,resnet34_skips
1,10,finetuning,True,13.44,imagenet_subset_kaggle,2,resnet34_skips
2,10,replay,False,38.90,imagenet_subset_kaggle,2,resnet34_skips
3,10,replay,True,41.24,imagenet_subset_kaggle,2,resnet34_skips
4,10,ewc,False,13.92,imagenet_subset_kaggle,2,resnet34_skips
...,...,...,...,...,...,...,...
19,20,replay,True,30.11,cifar100_fixed,2,convnext_tiny
20,20,ewc,False,8.12,cifar100_fixed,2,convnext_tiny
21,20,ewc,True,15.10,cifar100_fixed,2,convnext_tiny
22,20,lwf,False,21.78,cifar100_fixed,2,convnext_tiny


In [39]:
reg = df[df["reg_layers"] == True]
noreg = df[df["reg_layers"] == False]

In [40]:
def func1(df: pd.DataFrame) -> pd.DataFrame:
    approaches_idx = ["finetuning", "replay", "ewc", "lwf"]
    networks_idx = ["resnet34_skips", "convnext_tiny"]
    idx = pd.MultiIndex.from_product(
        [networks_idx, approaches_idx], names=["network", "approach"]
    )
    df = df.sort_values(by="seed")
    pivot = df.pivot_table(
        values="avg_acc_tag",
        index=["network", "approach"],
        columns=["dataset", "num_tasks", "seed"],
        aggfunc=lambda x: x,
    ).reindex(idx)
    return pivot


noreg_pivot = func1(noreg)

In [82]:
df = intermediate_pivot - noreg_pivot

# df = df.melt( value_name="avg_acc_tag")
pivot_reset = df.reset_index()
df = pd.melt(
    pivot_reset,
    id_vars=["network", "approach"],
    var_name=["dataset", "num_tasks", "seed"],
    value_name="avg_acc_tag",
)
approaches_idx = ["finetuning", "replay", "ewc", "lwf"]
networks_idx = ["resnet34_skips", "convnext_tiny"]
idx = pd.MultiIndex.from_product(
    [networks_idx, approaches_idx], names=["network", "approach"]
)
pivot = df.pivot_table(
    values="avg_acc_tag",
    index=["network", "approach"],
    columns=["dataset", "num_tasks"],
    aggfunc=lambda x: f"\positive{{${np.mean(x):.2f}$ \tiny{{$\pm{np.std(x):.2f}$}}}}",
).reindex(idx)

pivot

Unnamed: 0_level_0,dataset,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_1,num_tasks,5,10,20,10,20
network,approach,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
resnet34_skips,finetuning,\positive{$4.81$ \tiny{$\pm0.44$}},\positive{$3.56$ \tiny{$\pm0.32$}},\positive{$5.51$ \tiny{$\pm0.64$}},\positive{$3.17$ \tiny{$\pm0.26$}},\positive{$2.61$ \tiny{$\pm0.60$}}
resnet34_skips,replay,\positive{$-2.45$ \tiny{$\pm0.65$}},\positive{$-5.38$ \tiny{$\pm2.11$}},\positive{$-10.02$ \tiny{$\pm1.56$}},\positive{$1.83$ \tiny{$\pm0.72$}},\positive{$0.71$ \tiny{$\pm2.02$}}
resnet34_skips,ewc,\positive{$5.93$ \tiny{$\pm0.69$}},\positive{$5.22$ \tiny{$\pm0.33$}},\positive{$7.59$ \tiny{$\pm2.07$}},\positive{$7.23$ \tiny{$\pm0.45$}},\positive{$6.18$ \tiny{$\pm0.30$}}
resnet34_skips,lwf,\positive{$11.37$ \tiny{$\pm0.47$}},\positive{$12.97$ \tiny{$\pm1.13$}},\positive{$2.65$ \tiny{$\pm0.43$}},\positive{$0.11$ \tiny{$\pm1.73$}},\positive{$-0.86$ \tiny{$\pm0.54$}}
convnext_tiny,finetuning,\positive{$-1.55$ \tiny{$\pm0.75$}},\positive{$-1.59$ \tiny{$\pm0.65$}},\positive{$0.72$ \tiny{$\pm0.20$}},\positive{$-1.64$ \tiny{$\pm0.84$}},\positive{$0.81$ \tiny{$\pm0.81$}}
convnext_tiny,replay,\positive{$-0.15$ \tiny{$\pm0.32$}},\positive{$-1.00$ \tiny{$\pm0.57$}},\positive{$-3.81$ \tiny{$\pm0.26$}},\positive{$-1.69$ \tiny{$\pm0.23$}},\positive{$-2.06$ \tiny{$\pm1.15$}}
convnext_tiny,ewc,\positive{$0.94$ \tiny{$\pm0.15$}},\positive{$1.13$ \tiny{$\pm0.51$}},\positive{$1.45$ \tiny{$\pm0.76$}},\positive{$0.23$ \tiny{$\pm0.90$}},\positive{$0.22$ \tiny{$\pm0.77$}}
convnext_tiny,lwf,\positive{$8.11$ \tiny{$\pm0.66$}},\positive{$9.67$ \tiny{$\pm1.48$}},\positive{$8.27$ \tiny{$\pm0.68$}},\positive{$-1.03$ \tiny{$\pm1.35$}},\positive{$-2.50$ \tiny{$\pm0.50$}}


In [93]:
def func2(pivot):
    # Calculate the mean across the seed level

    mean_pivot = pivot.groupby(level=["data.datasets", "data.num_tasks"], axis=1).mean()

    # Calculate the standard deviation across the seed level
    std_pivot = pivot.groupby(level=["data.datasets", "data.num_tasks"], axis=1).std()

    # Optionally, combine the mean and std into a single DataFrame
    summary_pivot = pd.concat([mean_pivot, std_pivot], axis=1, keys=["Mean", "Std"])

    return summary_pivot


def color_negative_red(val):
    color = "red" if val <= 0 else "green" if val > 0 else "black"
    return f"color: {color}"


func2(intermediate_pivot - noreg_pivot).style.applymap(color_negative_red).format(
    "{:.2f}"
)

  std_pivot = pivot.groupby(level=["data.datasets", "data.num_tasks"], axis=1).std()


Unnamed: 0_level_0,Unnamed: 1_level_0,Mean,Mean,Mean,Mean,Mean,Std,Std,Std,Std,Std
Unnamed: 0_level_1,data.datasets,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle,cifar100_fixed,cifar100_fixed,cifar100_fixed,imagenet_subset_kaggle,imagenet_subset_kaggle
Unnamed: 0_level_2,data.num_tasks,5,10,20,10,20,5,10,20,10,20
network,approach,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3
resnet34_skips,finetuning,4.81,3.56,5.51,3.17,2.61,0.54,0.4,0.78,0.32,0.74
resnet34_skips,replay,-2.45,-5.38,-10.02,1.83,0.71,0.8,2.58,1.91,0.88,2.47
resnet34_skips,ewc,5.93,5.22,7.59,7.23,6.18,0.84,0.41,2.53,0.55,0.36
resnet34_skips,lwf,11.37,12.97,2.65,0.11,-0.86,0.58,1.38,0.61,2.12,0.66
convnext_tiny,finetuning,-1.55,-1.59,0.72,-1.64,0.81,0.92,0.8,0.25,1.03,0.99
convnext_tiny,replay,-0.15,-1.0,-3.81,-1.69,-2.06,0.4,0.7,0.32,0.28,1.4
convnext_tiny,ewc,0.94,1.13,1.45,0.23,0.22,0.19,0.62,0.94,1.1,0.94
convnext_tiny,lwf,8.11,9.67,8.27,-1.03,-2.5,0.8,1.82,0.83,1.65,0.61


In [1]:
## Big task

In [24]:
from src.wandb_wrapper.wandb_wrapper import WandbWrapper
import pandas as pd
import numpy as np


wrapper = WandbWrapper()


df_intermediate = wrapper.create_df_from_project("tunnels-ssl/09.03")
df_intermediate = df_intermediate[(df_intermediate["data.nc_first_task"].notna())]
df_intermediate

Unnamed: 0,avg_acc_tag,name,data.datasets,data.exemplars.num_exemplars,data.exemplars.exemplar_selection,data.exemplars.num_exemplars_per_class,data.extra_aug,data.num_tasks,data.batch_size,data.nc_per_task,...,training.no_learning,training.eval_on_train,training.multi_softmax,training.early_stopping.lr_min,training.early_stopping.lr_factor,training.early_stopping.lr_patience,training.gridsearch_tasks,training.scheduler_milestones,training.select_best_model_by_val_loss,real_name
0,46.303333,denim-salad-2,cifar100_fixed,0,random,0,,6,128,,...,False,True,False,0.0001,3,5,0,"[30, 60, 80]",False,lwf
1,47.006667,rural-cherry-2,cifar100_fixed,0,random,0,,6,128,,...,False,True,False,0.0001,3,5,0,"[30, 60, 80]",False,lwf
2,46.266667,hopeful-moon-1,cifar100_fixed,0,random,0,,6,128,,...,False,True,False,0.0001,3,5,0,"[30, 60, 80]",False,lwf


In [53]:
pivot = df_intermediate.pivot_table(
    values="avg_acc_tag",
    index=["model.network", "training.approach.name"],
    columns=["data.datasets"],
    aggfunc=list,
)
pivot = pivot.applymap(np.array)
pivot

Unnamed: 0_level_0,data.datasets,cifar100_fixed
model.network,training.approach.name,Unnamed: 2_level_1
convnext_tiny,lwf,"[46.303333333333335, 47.00666666666666, 46.266..."


In [71]:
df = compose(
    get_paths,
    get_merged_df,
)(["big_task"])
df = df[
    (df["data.datasets"].str.contains("cifar"))
    & (df["real_name"].str.contains("lwf"))
    & (df["reg_layers"] == False)
]

df = df.rename(
    columns={
        "real_name": "training.approach.name",
        "network": "model.network",
        "wavg_acc_tag": "avg_acc_tag",
    }
)
df["model.network"] = "convnext_tiny"
df

Unnamed: 0,reg_layers,training.approach.name,avg_acc_tag,misc.seed,data.datasets,model.network
4,False,lwf,43.58,1,cifar100_fixed,convnext_tiny
4,False,lwf,44.83,0,cifar100_fixed,convnext_tiny
4,False,lwf,42.34,2,cifar100_fixed,convnext_tiny


In [72]:
noreg_pivot = df.pivot_table(
    values="avg_acc_tag",
    index=["model.network", "training.approach.name"],
    columns=["data.datasets"],
    aggfunc=list,
)
noreg_pivot = noreg_pivot.applymap(np.array)
noreg_pivot

Unnamed: 0_level_0,data.datasets,cifar100_fixed
model.network,training.approach.name,Unnamed: 2_level_1
convnext_tiny,lwf,"[43.58, 44.83, 42.34]"


In [None]:
pivot.applymap(lambda array: f"${array.mean():.2f}$ \tiny{{$\pm{array.std():.2f}$}}")

Unnamed: 0_level_0,data.datasets,cifar100_fixed
model.network,training.approach.name,Unnamed: 2_level_1
convnext_tiny,lwf,$46.53$ \tiny{$\pm0.34$}


In [73]:
pos_or_neg = lambda x: "\positive" if x > 0 else "\negative"
res = (pivot - noreg_pivot).applymap(
    lambda array: f"{pos_or_neg(array.mean())}{{${array.mean():.2f}$ \tiny{{$\pm{array.std():.2f}$}}}}"
)
# np.apply_along_axis(lambda x: ' & '.join(x), 1, res.values.astype(str))
res

Unnamed: 0_level_0,data.datasets,cifar100_fixed
model.network,training.approach.name,Unnamed: 2_level_1
convnext_tiny,lwf,\positive{$2.94$ \tiny{$\pm0.73$}}
