In [10]:
import wandb
import pandas as pd
import os
import shutil
import json
import pickle
import seaborn as sns

from matplotlib import pyplot as plt

In [4]:
config_of_interest = {
    'regime': lambda x: x['train'], 
    'dataset': lambda x: "KANDY_2" if x['use_global_concepts'] else "KANDY_1", 
    'model': lambda x: x['model'], 
    'triplet_lambda': lambda x: x['triplet_lambda'],
    'concept_intersection': lambda x: x['use_mask'] if x['triplet_lambda'] > 0. else "no",
    'hamming_margin': lambda x: 0 if x['triplet_lambda'] == 0. else x['hamming_margin'], 
    'replay_lambda': lambda x: x['replay_lambda'], 
    'concept_normalization': lambda x: "no" if not x['decorrelate_concepts'] else ("bn" if x['decorrelation_groups'] == 1 else "dbn")
    }
series_of_interest = ['avg_accuracy-val', 'avg_accuracy-test', 'avg_forgetting-test', 'backward_transfer-test', 'cas-test', 
                      'cas_extended-test', 'ccs-test', 'ccs_extended-test', 'cvs-test', 'cvs_extended-test', 'forward_transfer-test', 
                      'tas-test', 'tas_extended-test', 'tcs-test', 'tcs_extended-test', 'tvs-test', 'tvs_extended-test']
 
matrices_of_interest = ['concept_correlation_phi_pp_continual-tab-test', 
                      'concept_correlation_phi_pp_continual_extended-tab-test', 'concept_correlation_phi_pt_continual-tab-test', 
                      'concept_correlation_phi_pt_continual_extended-tab-test']

downloads_of_interest = ['acc_matrix-test', 'acc_matrix-val', 'concept_correlation_phi_pt-test-fig', 
                         'concept_correlation_phi_tt-test-fig', 'concept_correlation_phi_pp-test-fig',
                         'concept_correlation_phi_pt_continual_extended-train', 'concept_correlation_phi_pt_continual_extended-val', 
                         'concept_correlation_phi_pt_continual_extended-test']


def config_to_string(cfg):
    opts = [v(cfg) for v in config_of_interest.values()]
    return "{}-{}-{}-t{}-{}-h{}-r{}-{}".format(*opts)

def get_history(run):
    out = {}
    max_task_id = 0
    counting_k = None
    for log in run.scan_history():
        for k in matrices_of_interest:
            if log[k] is not None:
                out[k] = log[k]
        for k in downloads_of_interest:
            if log[k] is not None:
                out[k] = log[k]
        for k in series_of_interest:
            if log[k] is not None:
                if k not in out:
                    out[k] = []
                    if counting_k is None:
                        counting_k = k
                out[k].append(log[k])
                if counting_k == k:
                    max_task_id += 1
    for k in series_of_interest:
        out[k] = pd.Series(out[k])
    return out, max_task_id

def download_files(run, name, history):
    os.makedirs("data/{}".format(name), exist_ok=True)
    for k in downloads_of_interest:
        print("Downloading {}/{}".format(name, k))
        run.file(history[k]["path"]).download(exist_ok=True)
        ext = history[k]["path"].split(".")[-1]
        shutil.move(history[k]["path"], "data/{}/{}.{}".format(name, k, ext))
        history[k] = "data/{}/{}.{}".format(name, k, ext)

    return history

def build_matrices(history, n_tasks):
    for k, v in history.items():
        if k in matrices_of_interest:
            tmp = []
            for i in range(n_tasks):
                tmp.append(pd.DataFrame(
                    v[i]["data"],
                    index=v[i]["x_label"],
                    columns=v[i]["y_label"]
                ))
            history[k] = pd.Series(tmp)
        if k in downloads_of_interest and k.startswith("acc_matrix"):
            with open(history[k]) as file:
                tmp = json.load(file)
            col = tmp["columns"][1:]
            rows = []
            data = []
            for r in tmp["data"]:
                rows.append(r[0])
                data.append(r[1:])
            
            history[k] = pd.DataFrame(data, index=rows, columns=col)
    return history

os.makedirs("data", exist_ok=True)


In [16]:
if os.path.exists("models.pkl"):
    models = {}
    with open("models.pkl", "rb") as file:
        models = pickle.load(file)
else:

    api = wandb.Api()
    sweeps = ["l-lorello/kandy-cem-experiments/hsp0mjp6", "l-lorello/kandy-cem-experiments/eafdwfzq", 
              "l-lorello/kandy-cem-experiments/fq3hi0qu","l-lorello/kandy-cem-experiments/k11az40k"]
    
    models = {}
    for s in sweeps:
        sweep = api.sweep(s)
        for r in sweep.runs:
            name = config_to_string(r.config)
            if name not in models:
                print("Importing data from configuration {}".format(name))
                history, n_tasks = get_history(r)
                history = download_files(r, name, history)
                history = build_matrices(history, n_tasks)
                models[name] = history
    
    with open("models.pkl", "wb") as file:
        pickle.dump(json_models, file)


Importing data from configuration continual_task-KANDY_2-vit_head_only-t1-fuzzy-h4-r10-bn
Downloading continual_task-KANDY_2-vit_head_only-t1-fuzzy-h4-r10-bn/acc_matrix-test
Downloading continual_task-KANDY_2-vit_head_only-t1-fuzzy-h4-r10-bn/acc_matrix-val
Downloading continual_task-KANDY_2-vit_head_only-t1-fuzzy-h4-r10-bn/concept_correlation_phi_pt-test-fig
Downloading continual_task-KANDY_2-vit_head_only-t1-fuzzy-h4-r10-bn/concept_correlation_phi_tt-test-fig


KeyboardInterrupt: 

In [None]:
DA QUA IN POI PRIMA MODEL SELECTION E POI PLOT

best_joint = ???
best_continual = ???

In [13]:
print(sweep.runs[0].config["train"])
h = sweep.runs[0].scan_history()
for task_id, log in enumerate(h):
    print(task_id, log["concept_correlation_phi_pt_continual-val"])# log["cas_extended-val"])

# Per le metriche con step, è da 0 a task_id, per le metriche senza step è un indice non None

continual_task
0 None
1 None
2 None
3 None
4 None
5 None
6 None
7 None
8 None
9 None
10 None
11 None
12 None
13 None
14 None
15 None
16 None
17 None
18 None
19 None
20 None
21 None
22 None
23 None
24 None
25 None
26 None
27 None
28 None
29 None
30 None
31 None
32 None
33 None
34 None
35 None
36 None
37 None
38 None
39 None
40 None
41 None
42 None
43 None
44 None
45 None
46 None
47 None
48 None
49 None
50 None
51 None
52 None
53 None
54 None
55 None
56 None
57 None
58 {'height': 1000, 'sha256': '6e0c3d4c20e61ce5f40534acd466e9b70f30a40a534ddf94bfdcf1bec2f38f36', 'path': 'media/videos/concept_correlation_phi_pt_continual-val_58_6e0c3d4c20e61ce5f405.gif', 'size': 3613045.0, '_type': 'video-file', 'width': 1000}
59 None
60 None
61 None
62 None
63 None
64 None
65 None


In [4]:
for k, v in config_of_interest.items():
    print("{}: {}".format(k, v(sweep.runs[100].config)))

NameError: name 'config_of_interest' is not defined

In [26]:
for r in sweep.runs:
    print(config_to_string(r.config))

continual_task-KANDY-2-vit_head_only-1-fuzzy-4-10-bn
continual_task-KANDY-2-vit_head_only-1-fuzzy-4-10-no
continual_task-KANDY-2-vit_head_only-1-fuzzy-4-10-no
continual_task-KANDY-2-vit_head_only-1-fuzzy-4-1-dbn
continual_task-KANDY-2-vit_head_only-1-fuzzy-4-1-dbn
continual_task-KANDY-2-vit_head_only-1-fuzzy-4-1-bn
continual_task-KANDY-2-vit_head_only-1-fuzzy-4-1-bn
continual_task-KANDY-2-vit_head_only-1-fuzzy-4-1-no
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-10-dbn
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-10-dbn
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-10-bn
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-10-bn
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-10-no
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-10-no
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-1-dbn
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-1-dbn
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-1-bn
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-1-bn
continual_task-KANDY-2-vit_head_only-1-fuzzy-1-1-

In [None]:
BUG BUG BUG
Continual NON ha passato nessuna use_mask (bug in run_experiments.py), quindi ha fatto due repliche con mask fuzzy per ogni combinazione
(filtrarle con un dizionario sulla stringa di configurazione) -> for r in sweep.runs: if config_to_string(r.config) not in runs: ...

Ripetere le run (solo continual) cambiando in run_experiments d2.items in m2.items

In [40]:
import shutil
import os
os.makedirs("plots/test", exist_ok=True) # plots/{DATASET}/{REGIME}/{SPLIT}/{MODEL}

file = None
for h in sweep.runs[100].scan_history():
    if h["concept_correlation_phi_pt-test-fig"] is not None:
        file = h["concept_correlation_phi_pt-test-fig"]
        break

print(file)
sweep.runs[100].file(file["path"]).download(exist_ok=True) # Viene scaricato nella stessa cartella (./media/images)
shutil.move(file["path"], "{}/{}.{}".format("plots/test", config_to_string(sweep.runs[100].config), file["format"]))

NameError: name 'ERROR' is not defined

In [None]:
dataset in figure separate a prescindere.
    
Single contribution:
Boxplot fino a 3 variabili:
1)hue = joint vs continual
2)x = model
3)y = VARIABILE -> concept_intersection/(hamming_margin+triplet_lambda combinati)/concept_normalization/replay_lambda
I valori sono avg_accuracy/cas_extended[-1]/tas_extended[-1]

2-variable interaction:
???

Barplot:
x=task_id
y=cas/tas

Ridgeplot con confidence interval (min/max): TUTTI continual [ https://seaborn.pydata.org/examples/kde_ridgeplot oppure https://stackoverflow.com/questions/66009115/3d-plot-of-multiple-time-series-in-python ]
# COME AGGIUNGERE MIN/MAX?
x=task_id
y=cas_extended/tas_extended/loss
z=VARIABILE -> concept_intersection/(hamming_margin+triplet_lambda combinati)/concept_normalization/replay_lambda
hue = model


Heatmap: tt/pt/pp cherry picked
