In [1]:
from collections import defaultdict
import os
from pathlib import Path
from pprint import pprint
import re

import yaml

# Define the directory path and search pattern
directory_path = "results/2024/04.19/13-45-48"
search_text = "Exception ignored in: <function Logger.__del__"

# Find files matching the pattern
files = Path(directory_path).rglob(pattern="stderr*")

# Define a regular expression pattern to match the search text
pattern = re.compile(search_text)

broken_params = []
# Iterate through each file and search for the text
for file_path in files:
    with file_path.open("r") as file:
        for line_number, line in enumerate(file, start=1):
            if pattern.search(line):
                print(
                    f"Found in file: {file_path}, line: {line_number}, content: {line.strip()}"
                )
                yaml_file = file_path.parent.parent / ".hydra" / "overrides.yaml"
                broken_params.append(yaml.safe_load(yaml_file.open("r")))
                break
# pprint(broken_params)

# grouped_data = defaultdict(list)

# # Iterate through the data and group it based on the first element of each sublist
# for sublist in broken_params:
#     key = sublist[0]
#     grouped_data[key].append(sublist[1:])

# # Convert defaultdict to dict
# grouped_data = dict(grouped_data)

# pprint(grouped_data)

Found in file: results/2024/04.19/13-45-48/4/cifar100_fixed_ewc/stderr-2024-04-19-16-49.txt, line: 8, content: Exception ignored in: <function Logger.__del__ at 0x14c6d9302560>
Found in file: results/2024/04.19/13-45-48/3/cifar100_fixed_ewc/stderr-2024-04-19-16-48.txt, line: 8, content: Exception ignored in: <function Logger.__del__ at 0x1490da6ea560>


In [2]:
import pandas as pd
import numpy as np

params = np.asarray(broken_params)


def delete_to_pandas(name, arr):
    return map(lambda x: x.replace(name, ""), arr)


names = [
    "training.vcreg.reg_layers=",
    "training.vcreg.var_weight=",
    "training.vcreg.cov_weight=",
]
df = pd.DataFrame(
    [delete_to_pandas(name, params[:, idx]) for idx, name in enumerate(names)]
).T

In [3]:
df = df.rename(
    columns={
        0: "reg_layers",
        1: "var_weight",
        2: "cov_weight",
    }
)

In [4]:
list(df.T.to_dict().values())

[{'reg_layers': 'fc$', 'var_weight': '1.74', 'cov_weight': '0.64'},
 {'reg_layers': 'fc$', 'var_weight': '0.64', 'cov_weight': '12.8'}]

In [19]:
len(broken_params)

11

In [83]:
import pandas as pd
import wandb

api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("tunnels-ssl/05.20")

summary_list, config_list, name_list = [], [], []
for run in runs:
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict["test/avg_acc_tag"])

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame(
    {"avg_acc_tag": summary_list, "config": config_list, "name": name_list}
)
config_df = pd.json_normalize(runs_df["config"])
df = pd.concat([runs_df.drop(columns=["config"]), config_df], axis=1)
df["data.datasets"] = df["data.datasets"].apply(lambda x: x[0])

In [85]:
filtered_df = df[
    (df["model.network"] == "resnet34_skips")
    & (df["training.vcreg.var_weight"] == 0.64)
    & (df["data.datasets"].str.contains("imagenet"))
    & (df["training.approach.name"] == "ewc")
    & (df["data.stop_at_task"] == 0)
    & (df["training.nepochs"] == 100)
    & (df["training.vcreg.n_first_task"] == -1)
]

filtered = filtered_df[
    [
        "training.approach.name",
        "training.vcreg.reg_layers",
        "training.vcreg.var_weight",
        "training.vcreg.cov_weight",
        "data.num_tasks",
        "model.network",
        "misc.seed",
        "avg_acc_tag",
    ]
]

# filtered.to_csv("ewc_imagent_seeds.csv", mode="a", index=False)

filtered

Unnamed: 0,training.approach.name,training.vcreg.reg_layers,training.vcreg.var_weight,training.vcreg.cov_weight,data.num_tasks,model.network,misc.seed,avg_acc_tag
4,ewc,.*after_relu,0.64,12.8,20,resnet34_skips,2,3.9
5,ewc,.*after_relu,0.64,12.8,20,resnet34_skips,1,5.4
6,ewc,.*after_relu,0.64,12.8,10,resnet34_skips,2,15.64
7,ewc,.*after_relu,0.64,12.8,10,resnet34_skips,1,14.74


In [68]:
import wandb
import pandas as pd
import matplotlib.pyplot as plt

# Initialize the API
api = wandb.Api()

# Replace 'username/project_name/run_id' with your specific run path
runs = api.runs("tunnels-ssl/05.14")
run = runs[0]
# run = api.run("username/project_name/run_id")

# Retrieve the history for the specific metric
history = run.history(keys=["test_acc_taw/t_0"], x_axis="_step")

# Convert to DataFrame for easier handling
# df = pd.DataFrame(history)

# # Plot the metric
# plt.figure(figsize=(10, 5))
# plt.plot(df["_step"], df["test_acc_taw/t_0"], label="test_acc_taw/t_0")
# plt.xlabel("Step")
# plt.ylabel("Test Accuracy")
# plt.title("Test Accuracy over Steps")
# plt.legend()
# plt.show()

In [25]:
history

Unnamed: 0,_step,test_acc_taw/t_0
0,1003,70.7
1,2277,70.9
2,3551,71.0
3,4825,71.6
4,6099,70.8
5,7373,71.3
6,8647,70.4
7,9921,70.4
8,11195,69.8
9,12469,68.9


In [11]:
hist = run.history()

In [18]:
hist[hist["test_acc_taw/t_0"].notna()]["test_acc_taw/t_0"]

148    73.8
489    35.0
Name: test_acc_taw/t_0, dtype: float64

In [26]:
filtered_df = df[
    (df["model.network"].str.contains("resnet"))
    & (df["data.datasets"].str.contains("cifar"))
    & (df["training.approach.name"] == "finetuning")
    & (df["data.exemplars.num_exemplars"] == 0)
    & (df["data.num_tasks"] == 5)
    # & (df["training.vcreg.n_first_task"] == 1)
]
vals = filtered_df[
    [
        "misc.seed",
        "training.vcreg.reg_layers",
        "misc.results_path",
        "training.vcreg.n_first_task",
    ]
].values

In [27]:
vals

array([], shape=(0, 4), dtype=object)

In [None]:
{
    "finetuning_first_task_reg": "results/2024/04.24/13-35-10/1",
    "resnet_finetuning_s:0_reg:True": "results/2024/04.24/13-35-10/0",
    "resnet_finetuning_s:0_reg:False": "results/2024/04.17/18-13-25/0",
    "resnet_finetuning_s:2_reg:False": "results/2024/05.14/13-28-06/1",
    "resnet_finetuning_s:1_reg:False": "results/2024/05.14/13-28-06/0",
    "resnet_finetuning_s:2_reg:True": "results/2024/05.14/13-27-53/19",
    "resnet_finetuning_s:1_reg:True": "results/2024/05.14/13-27-53/18",
}

In [18]:
import numpy as np

results = {}
for val in vals:
    results[f"resnet_finetuning_s:{val[0]}_reg:{not 'nan' == str(val[1])}"] = val[2]

In [19]:
results
{
    "resnet_finetuning_s:2_reg:False": "results/2024/05.14/13-28-06/1",
    "resnet_finetuning_s:1_reg:False": "results/2024/05.14/13-28-06/0",
    "resnet_finetuning_s:2_reg:True": "results/2024/05.14/13-27-53/19",
    "resnet_finetuning_s:1_reg:True": "results/2024/05.14/13-27-53/18",
}

{'resnet_finetuning_s:2_reg:False': 'results/2024/05.14/13-28-06/1',
 'resnet_finetuning_s:1_reg:False': 'results/2024/05.14/13-28-06/0',
 'resnet_finetuning_s:2_reg:True': 'results/2024/05.14/13-27-53/19',
 'resnet_finetuning_s:1_reg:True': 'results/2024/05.14/13-27-53/18'}

In [43]:
vals

array([[2, nan, 'results/2024/05.14/13-28-06/7'],
       [1, nan, 'results/2024/05.14/13-28-06/6'],
       [2, 'classifier$', 'results/2024/05.14/13-27-53/13'],
       [1, 'classifier$', 'results/2024/05.14/13-27-53/12'],
       [0, 'classifier$', 'results/2024/05.14/13-27-15/1'],
       [0, nan, 'results/2024/05.14/13-27-08/0']], dtype=object)

In [None]:
results

In [None]:
results

In [7]:
for data in range(5):
    for task in range(5):
        if data > task:
            continue
        print(f"{data=} {task=}")

data=0 task=0
data=0 task=1
data=0 task=2
data=0 task=3
data=0 task=4
data=1 task=1
data=1 task=2
data=1 task=3
data=1 task=4
data=2 task=2
data=2 task=3
data=2 task=4
data=3 task=3
data=3 task=4
data=4 task=4
