In [3]:
import pandas as pd
import wandb

api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("tunnels-ssl/06.17")

summary_list, config_list, name_list = [], [], []
for run in runs:
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict["test/avg_acc_tag"])

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame(
    {"avg_acc_tag": summary_list, "config": config_list, "name": name_list}
)

config_df = pd.json_normalize(runs_df["config"])
df = pd.concat([runs_df.drop(columns=["config"]), config_df], axis=1)
df["data.datasets"] = df["data.datasets"].apply(lambda x: x[0])

In [20]:
# filtered_df = df[
#     (df["model.network"].str.contains("resnet"))
#     & (df["misc.seed"] == 0)
#     & (df["data.datasets"].str.contains("imagenet"))
#     & (df["training.approach.name"] == "ewc")
#     & (df["data.num_tasks"] == 10)
# ]
# filtered_df["avg_acc_tag"]

filtered_df = df[df["data.num_tasks"] != 6]
filtered_df.pivot_table(
    index="training.approach.name",
    columns=["misc.seed", "data.num_tasks"],
    values="avg_acc_tag",
    # aggfunc=lambda x: list(round(x, 10)),
)

misc.seed,0,0,1,1,2,2
data.num_tasks,10,20,10,20,10,20
training.approach.name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
ewc,17.24,5.32,16.94,5.32,15.84,5.2
finetuning,12.06,5.4,11.76,4.9,13.44,5.48
lwf,42.68,27.58,43.26,27.82,42.78,27.28


## big First Task

In [45]:
import pandas as pd
import wandb

api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("tunnels-ssl/06.17")


summary_list, config_list, name_list = [], [], []
for run in runs:
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict["test/wavg_acc_tag"])

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame(
    {"wavg_acc_tag": summary_list, "config": config_list, "name": name_list}
)
config_df = pd.json_normalize(runs_df["config"])
df = pd.concat([runs_df.drop(columns=["config"]), config_df], axis=1)
# fmt: off
filtered_df = df[
    (df["data.nc_first_task"] == 50) 
    ]
# fmt: on
filtered_df["reg_layers"] = filtered_df["training.vcreg.reg_layers"].notna()
filtered_df[["training.approach.name", "data.exemplars.num_exemplars"]].shape


def get_real_name(row):
    if row["data.exemplars.num_exemplars"] > 0:
        return "replay"
    return row["training.approach.name"]


# Apply the function to create the real_name column
filtered_df["real_name"] = filtered_df.apply(get_real_name, axis=1)
filtered_df["data.datasets"] = filtered_df["data.datasets"].apply(lambda x: x[0])
filtered_df[filtered_df["misc.seed"] == 2][
    ["reg_layers", "real_name", "wavg_acc_tag", "misc.seed", "data.datasets"]
].to_csv("big_task_2.csv", index=False)
to_dump = filtered_df[
    ["reg_layers", "real_name", "wavg_acc_tag", "misc.seed", "data.datasets"]
]
to_dump

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["reg_layers"] = filtered_df["training.vcreg.reg_layers"].notna()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["real_name"] = filtered_df.apply(get_real_name, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["data.datasets"] = filtered_df["data.datasets"].

Unnamed: 0,reg_layers,real_name,wavg_acc_tag,misc.seed,data.datasets
0,False,replay,35.18,0,imagenet_subset_kaggle
1,True,replay,36.26,1,imagenet_subset_kaggle
2,False,replay,34.8,1,imagenet_subset_kaggle
3,True,finetuning,28.4,2,imagenet_subset_kaggle
4,True,lwf,55.02,1,imagenet_subset_kaggle
5,False,lwf,53.5,2,imagenet_subset_kaggle
6,True,finetuning,29.82,1,imagenet_subset_kaggle
7,True,lwf,54.3,2,imagenet_subset_kaggle
8,True,ewc,43.6,2,imagenet_subset_kaggle
9,True,ewc,46.54,0,imagenet_subset_kaggle


In [46]:
to_dump.to_csv("imagenet_bigtask.csv", index=False)

In [151]:
import numpy as np

pivot = to_dump
pivot = pivot.pivot_table(
    values="wavg_acc_tag",
    columns=["reg_layers"],
    index="real_name",
    aggfunc=lambda x: list(x),
)

In [152]:
def elementwise_difference(row):
    return [t - f for f, t in zip(row[False], row[True])]


# Apply the function to each row to create the 'Difference' column
pivot["Difference"] = pivot.apply(elementwise_difference, axis=1)

In [153]:
pivot = pivot.applymap(lambda x: (np.mean(x), np.std(x)))
pivot = pivot.reindex(index=["finetuning", "replay", "ewc", "lwf"])

In [154]:
pivot

reg_layers,False,True,Difference
real_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
finetuning,"(20.313333333333333, 0.5007549855523723)","(29.80666666666667, 1.1431340934271723)","(9.493333333333336, 1.4957123906538874)"
replay,"(34.653333333333336, 0.500754985552373)","(36.81333333333333, 0.42089850980438737)","(2.1600000000000015, 0.7845168364456323)"
ewc,"(34.059999999999995, 0.5238320341483516)","(45.06666666666667, 1.2002592312589089)","(11.006666666666673, 0.7717224601860162)"
lwf,"(53.76666666666666, 0.3771236166328207)","(54.706666666666656, 0.3012566274051965)","(0.9399999999999954, 0.6707210050882971)"


In [159]:
def label(mean_diff):
    if mean_diff >= 0:
        return "positive", "+"
    else:
        return "negative", ""


latex_rows = ""
for name, noreg, reg, diff in pivot.itertuples():
    latex_rows += f"{name} & ${noreg[0]:.2f}$ \\tiny{{$\\pm$ {noreg[1]:.2f}}}\\\\\n"
    latex_rows += (
        f"\\quad + \\ours{{}} & ${reg[0]:.2f}$ \\tiny{{$\\pm$ {reg[1]:.2f}}}\\\\\n"
    )

    p_label, sign = label(diff[0])
    latex_rows += f"\\quad $\\Delta$ & \\{p_label}{{${sign}{diff[0]:.2f}$ \\tiny{{$\\pm$ {diff[1]:.2f}}}}}\\\\\\midrule\n"

In [160]:
print(latex_rows)

finetuning & $20.31$ \tiny{$\pm$ 0.50}\\
\quad + \ours{} & $29.81$ \tiny{$\pm$ 1.14}\\
\quad $\Delta$ & \positive{$+9.49$ \tiny{$\pm$ 1.50}}\\\midrule
replay & $34.65$ \tiny{$\pm$ 0.50}\\
\quad + \ours{} & $36.81$ \tiny{$\pm$ 0.42}\\
\quad $\Delta$ & \positive{$+2.16$ \tiny{$\pm$ 0.78}}\\\midrule
ewc & $34.06$ \tiny{$\pm$ 0.52}\\
\quad + \ours{} & $45.07$ \tiny{$\pm$ 1.20}\\
\quad $\Delta$ & \positive{$+11.01$ \tiny{$\pm$ 0.77}}\\\midrule
lwf & $53.77$ \tiny{$\pm$ 0.38}\\
\quad + \ours{} & $54.71$ \tiny{$\pm$ 0.30}\\
\quad $\Delta$ & \positive{$+0.94$ \tiny{$\pm$ 0.67}}\\\midrule



In [None]:
import numpy as np

pivot = to_dump
pivot = pivot.pivot_table(
    values="wavg_acc_tag",
    columns=["reg_layers"],
    index="real_name",
    aggfunc=lambda x: list(x),
)

In [101]:
pivot

reg_layers,False,True,Difference
real_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ewc,34.06,45.066667,11.006667
finetuning,20.313333,29.806667,9.493333
lwf,53.766667,54.706667,0.94
replay,34.653333,36.813333,2.16


In [64]:
pivot_mean_std = pivot.applymap(lambda x: (np.mean(x), np.std(x)))
differences = pivot.apply(
    lambda row: np.array(row[True]) - np.array(row[False]), axis=1
)
differences_mean_std = differences.apply(lambda x: (np.mean(x), np.std(x)))

In [65]:
pivot_mean_std
differences_mean_std

real_name
ewc           (11.006666666666673, 0.7717224601860162)
finetuning     (9.493333333333336, 1.4957123906538874)
lwf           (0.9399999999999954, 0.6707210050882971)
replay        (2.1600000000000015, 0.7845168364456323)
dtype: object

In [66]:
differences_df = pd.DataFrame(
    differences_mean_std.tolist(),
    index=differences_mean_std.index,
    columns=["Mean Difference", "Std Difference"],
)

In [67]:
differences_df

Unnamed: 0_level_0,Mean Difference,Std Difference
real_name,Unnamed: 1_level_1,Unnamed: 2_level_1
ewc,11.006667,0.771722
finetuning,9.493333,1.495712
lwf,0.94,0.670721
replay,2.16,0.784517
