In [1]:
import os
import pandas as pd
import yaml

In [2]:
with open("../../enriched_experiments.yml", "r") as f:
    exps = yaml.safe_load(f)

In [3]:
experiments = exps["experiments"]

In [4]:
experiments[-1].keys()

dict_keys(['slurm_id', 'cache', 'experiment_nm', 'test_nm', 'wandb_urls', 'test/vaec_test4_vit_2/loss', 'test/vaec_test3_vit_2/loss', 'train/vaec_vit/loss_epoch', 'val/vaec_vit/loss', 'train/vasr_vit/loss_epoch', 'test/vaec_test5_vit_2/loss', 'test/vaec_test2_vit_2/loss', 'val/vasr_vit/loss', 'test/vaec_test1_vit_2/loss', 'val/loss', 'test/vaec_test4_vit_2/accuracy', 'train/vasr_vit/accuracy_epoch', 'test/vaec_test5_vit_2/accuracy', 'test/vaec_test2_vit_2/accuracy', 'train/vaec_vit/accuracy_epoch', 'val/vasr_vit/accuracy', 'test/vaec_test3_vit_2/accuracy', 'test/vaec_test1_vit_2/accuracy', 'val/vaec_vit/accuracy', 'max_epoch', 'loss/best_ckpt', 'datasets'])

In [5]:
list(filter(lambda x: "accuracy" in x, experiments[-1].keys()))

['test/vaec_test4_vit_2/accuracy',
 'train/vasr_vit/accuracy_epoch',
 'test/vaec_test5_vit_2/accuracy',
 'test/vaec_test2_vit_2/accuracy',
 'train/vaec_vit/accuracy_epoch',
 'val/vasr_vit/accuracy',
 'test/vaec_test3_vit_2/accuracy',
 'test/vaec_test1_vit_2/accuracy',
 'val/vaec_vit/accuracy']

In [6]:
# Get all runs that produced some accuracy results
unprocessed_count = 0
processed_count = 0
structured_results = []
for experiment in experiments:
    accuracy_cols = [key for key in experiment.keys() if "accuracy" in key]
    if len(accuracy_cols) == 0:
        # TODO: later collect STSN runs
        unprocessed_count += 1
        continue
    structured_results.append(
        {col: experiment[col]["best_loss"] for col in accuracy_cols}
        | {
            "loss/best_ckpt": experiment.get("loss/best_ckpt"),
            "wandb_urls": ",".join(experiment.get("wandb_urls")),
            "experiment_nm": experiment.get("experiment_nm"),
            "test_nm": experiment.get("test_nm"),
            "based_on/slurm_id": experiment.get("based_on", {}).get("slurm_id"),
            "max_epoch": experiment.get("max_epoch"),
        }
    )
    # TODO: enrich with information from "additional_inforamations" (TODO: add relevant fields in enrich_experiments.py)
    processed_count += 1
print(f"Processed {processed_count} experiments")
print(f"Unprocessed {unprocessed_count} experiments")

Processed 114 experiments
Unprocessed 34 experiments


In [7]:
# TODO: split by dataset (not mutually exclusive -- lets duplicate the information)
# VAEC/VASR/HOI/LOGO/LOGO+HOI/VAEC+VASR/LOGO+VAEC/HOI+VASR

# might be nice to add identifiers different than slurm_ids
# (autoincrement value would work but could be hard to
# automatically assign it so that it makes sense in paper)

In [8]:
df = pd.DataFrame(structured_results)

In [9]:
# normalize columns

In [10]:
cols = df.columns

In [11]:
cols

Index(['val/bongard_hoi/accuracy', 'train/bongard_hoi/accuracy_epoch',
       'loss/best_ckpt', 'wandb_urls', 'experiment_nm', 'test_nm',
       'based_on/slurm_id', 'max_epoch', 'train/vasr/accuracy_epoch',
       'val/vasr/accuracy', 'test/bongard_hoi_seen-seen/accuracy',
       'test/bongard_hoi_unseen-seen/accuracy',
       'test/bongard_hoi_seen-unseen/accuracy',
       'test/bongard_hoi_unseen-unseen/accuracy', 'test/vasr/accuracy',
       'val/bongard_logo/accuracy', 'train/bongard_logo/accuracy_epoch',
       'train/vaec/accuracy_epoch', 'val/vaec/accuracy',
       'test/bongard_logo_test_hd_novel/accuracy',
       'test/bongard_logo_test_ff/accuracy',
       'test/bongard_logo_test_bd/accuracy',
       'test/bongard_logo_test_hd_comb/accuracy', 'test/vaec_test3/accuracy',
       'test/vaec_test2/accuracy', 'test/vaec_test4/accuracy',
       'test/vaec_test5/accuracy', 'test/vaec/accuracy',
       'test/vaec_test1/accuracy', 'test/bongard_hoi_unseen-seen_vit/accuracy',
       '

In [12]:
train_acc = [col for col in cols if "train" in col and "accuracy" in col]
val_acc = [col for col in cols if "val" in col and "accuracy" in col]

In [13]:
datasets = ["bongard_logo", "bongard_hoi", "vaec", "vasr"]

In [14]:
for dataset in datasets:
    _cols = [col for col in train_acc if dataset in col]
    df[f"train/{dataset}/accuracy"] = df[_cols].bfill(axis=1).iloc[:, 0]

    _cols = [col for col in val_acc if dataset in col]
    df[f"val/{dataset}/accuracy"] = df[_cols].bfill(axis=1).iloc[:, 0]

In [15]:
df = df.drop(
    columns=set(train_acc) - {f"train/{dataset}/accuracy" for dataset in datasets}
)
df = df.drop(columns=set(val_acc) - {f"val/{dataset}/accuracy" for dataset in datasets})

In [16]:
test_acc = [col for col in cols if "test" in col and "accuracy" in col]

In [17]:
test_types = [
    ("bongard_logo", "test_bd"),
    ("bongard_logo", "test_ff"),
    ("bongard_logo", "test_hd_comb"),
    ("bongard_logo", "test_hd_novel"),
    ("vaec", "test1"),
    ("vaec", "test2"),
    ("vaec", "test3"),
    ("vaec", "test4"),
    ("vaec", "test5"),
    ("bongard_hoi", "seen-seen"),
    ("bongard_hoi", "seen-unseen"),
    ("bongard_hoi", "unseen-seen"),
    ("bongard_hoi", "unseen-unseen"),
    ("vasr", "vasr"),
]

In [18]:
for dataset, test_type in test_types:
    _cols = [col for col in test_acc if test_type in col]
    df[f"test/{dataset}/{test_type}"] = df[_cols].bfill(axis=1).iloc[:, 0]

In [19]:
df = df.drop(columns=test_acc)

In [20]:
df.columns

Index(['val/bongard_hoi/accuracy', 'loss/best_ckpt', 'wandb_urls',
       'experiment_nm', 'test_nm', 'based_on/slurm_id', 'max_epoch',
       'val/vasr/accuracy', 'val/bongard_logo/accuracy', 'val/vaec/accuracy',
       'train/bongard_logo/accuracy', 'train/bongard_hoi/accuracy',
       'train/vaec/accuracy', 'train/vasr/accuracy',
       'test/bongard_logo/test_bd', 'test/bongard_logo/test_ff',
       'test/bongard_logo/test_hd_comb', 'test/bongard_logo/test_hd_novel',
       'test/vaec/test1', 'test/vaec/test2', 'test/vaec/test3',
       'test/vaec/test4', 'test/vaec/test5', 'test/bongard_hoi/seen-seen',
       'test/bongard_hoi/seen-unseen', 'test/bongard_hoi/unseen-seen',
       'test/bongard_hoi/unseen-unseen', 'test/vasr/vasr'],
      dtype='object')

In [21]:
df.head()

Unnamed: 0,val/bongard_hoi/accuracy,loss/best_ckpt,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/vasr/accuracy,val/bongard_logo/accuracy,val/vaec/accuracy,...,test/vaec/test1,test/vaec/test2,test/vaec/test3,test/vaec/test4,test/vaec/test5,test/bongard_hoi/seen-seen,test/bongard_hoi/seen-unseen,test/bongard_hoi/unseen-seen,test/bongard_hoi/unseen-unseen,test/vasr/vasr
0,0.604056,/mnt/evafs/groups/mandziuk-lab/akaminski/model...,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring,frozen_slot,839928.0,43,,,,...,,,,,,,,,,
1,0.580541,/mnt/evafs/groups/mandziuk-lab/akaminski/model...,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_full_train,trained_slot_aux_test,839928.0,42,,,,...,,,,,,,,,,
2,,/mnt/evafs/groups/mandziuk-lab/akaminski/model...,https://wandb.ai/avr_universal/AVR_universal/r...,vasr_scoring_full_train,trained_slot_aux_test,839928.0,5,0.25228,,,...,,,,,,,,,,
3,0.607584,/mnt/evafs/groups/mandziuk-lab/akaminski/model...,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_full_train,trained_slot_aux_test,839928.0,34,,,,...,,,,,,,,,,
4,,/mnt/evafs/groups/mandziuk-lab/akaminski/model...,https://wandb.ai/avr_universal/AVR_universal/r...,vasr_scoring_full_train,trained_slot_aux_test,839928.0,5,0.286448,,,...,,,,,,,,,,


In [22]:
for dataset, test_type in test_types:
    _dataset_cols = [col for col in df.columns if dataset in col and "test" not in col]
    cols = [
        "wandb_urls",
        "experiment_nm",
        "test_nm",
        "based_on/slurm_id",
        "max_epoch",
        *_dataset_cols,
    ]
    display(
        df[[f"test/{dataset}/{test_type}"] + cols]
        .sort_values(f"test/{dataset}/{test_type}", ascending=False)
        .head(5)
    )

Unnamed: 0,test/bongard_logo/test_bd,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/bongard_logo/accuracy,train/bongard_logo/accuracy
25,0.88125,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,frozen_slot,871045.0,413,0.807778,0.822796
54,0.864583,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871045.0,174,0.773333,0.764194
41,0.833333,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871039.0,172,0.748889,0.766237
42,0.827083,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871039.0,137,0.751111,0.753441
53,0.825,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871045.0,255,0.755556,0.764839


Unnamed: 0,test/bongard_logo/test_ff,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/bongard_logo/accuracy,train/bongard_logo/accuracy
25,0.765,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,frozen_slot,871045.0,413,0.807778,0.822796
41,0.755,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871039.0,172,0.748889,0.766237
54,0.725,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871045.0,174,0.773333,0.764194
53,0.721667,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871045.0,255,0.755556,0.764839
42,0.711667,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871039.0,137,0.751111,0.753441


Unnamed: 0,test/bongard_logo/test_hd_comb,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/bongard_logo/accuracy,train/bongard_logo/accuracy
25,0.71,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,frozen_slot,871045.0,413,0.807778,0.822796
54,0.695,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871045.0,174,0.773333,0.764194
42,0.6925,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871039.0,137,0.751111,0.753441
53,0.6775,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871045.0,255,0.755556,0.764839
41,0.6725,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871039.0,172,0.748889,0.766237


Unnamed: 0,test/bongard_logo/test_hd_novel,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/bongard_logo/accuracy,train/bongard_logo/accuracy
42,0.7,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871039.0,137,0.751111,0.753441
25,0.69375,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,frozen_slot,871045.0,413,0.807778,0.822796
53,0.675,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871045.0,255,0.755556,0.764839
54,0.653125,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871045.0,174,0.773333,0.764194
41,0.65,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_scoring,trained_slot_aux_test,871039.0,172,0.748889,0.766237


Unnamed: 0,test/vaec/test1,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/vaec/accuracy,train/vaec/accuracy
61,0.996722,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871052.0,114,0.997057,1.0
33,0.994131,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,train_slot_no_aux,871037.0,73,0.993889,1.0
37,0.988054,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,train_slot_no_aux,871052.0,115,0.988237,1.0
46,0.986088,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871039.0,208,0.987817,1.0
62,0.97619,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871052.0,115,0.983356,1.0


Unnamed: 0,test/vaec/test2,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/vaec/accuracy,train/vaec/accuracy
33,0.868881,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,train_slot_no_aux,871037.0,73,0.993889,1.0
46,0.864647,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871039.0,208,0.987817,1.0
61,0.813528,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871052.0,114,0.997057,1.0
47,0.782212,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871039.0,37,0.959347,0.997632
62,0.778461,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871052.0,115,0.983356,1.0


Unnamed: 0,test/vaec/test3,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/vaec/accuracy,train/vaec/accuracy
33,0.799392,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,train_slot_no_aux,871037.0,73,0.993889,1.0
61,0.521531,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871052.0,114,0.997057,1.0
47,0.519244,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871039.0,37,0.959347,0.997632
46,0.517974,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871039.0,208,0.987817,1.0
98,0.458799,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring_esnb_vit-2,relation_esnbv2_single,,499,0.456343,0.753042


Unnamed: 0,test/vaec/test4,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/vaec/accuracy,train/vaec/accuracy
98,0.426579,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring_esnb_vit-2,relation_esnbv2_single,,499,0.456343,0.753042
107,0.411176,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_vasr_scoring_esnb_vit-2,relation_esnbv2_dual-lower_lr,,112,0.448634,0.863958
101,0.410693,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_vasr_scoring_esnb_vit-2,relation_esnbv2_dual,,112,0.464097,0.91863
47,0.405407,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871039.0,37,0.959347,0.997632
110,0.401738,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring_esnb_vit-2,relation_esnbv2_single-smaller_scoring,,499,0.454768,0.713183


Unnamed: 0,test/vaec/test5,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/vaec/accuracy,train/vaec/accuracy
37,0.582979,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,train_slot_no_aux,871052.0,115,0.988237,1.0
61,0.455198,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring,trained_slot_aux_test,871052.0,114,0.997057,1.0
98,0.454434,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_scoring_esnb_vit-2,relation_esnbv2_single,,499,0.456343,0.753042
101,0.444742,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_vasr_scoring_esnb_vit-2,relation_esnbv2_dual,,112,0.464097,0.91863
107,0.441873,https://wandb.ai/avr_universal/AVR_universal/r...,vaec_vasr_scoring_esnb_vit-2,relation_esnbv2_dual-lower_lr,,112,0.448634,0.863958


Unnamed: 0,test/bongard_hoi/seen-seen,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/bongard_hoi/accuracy,train/bongard_hoi/accuracy
72,0.633494,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb,relation_esnb_single,,216,0.782775,0.860813
109,0.620123,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb-2,relation_esnbv2_single-smaller_scoring,,499,0.664315,0.999566
85,0.619465,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb-2,relation_esnb_single_2,,499,0.663727,1.0
106,0.61552,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_hoi_scoring_esnb_vit-2,relation_esnbv2_dual-lower_lr,,499,0.653733,0.996834
112,0.614862,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_hoi_scoring_esnb_vit-2,relation_esnbv2_dual-smaller_scoring,,499,0.644033,0.998301


Unnamed: 0,test/bongard_hoi/seen-unseen,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/bongard_hoi/accuracy,train/bongard_hoi/accuracy
72,0.78973,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb,relation_esnb_single,,216,0.782775,0.860813
76,0.671832,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_hoi_scoring_esnb_vit,relation_esnb_dual,,17,0.669606,0.663079
89,0.650124,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb-2_larger,relation_esnb_single_2,,499,0.665197,1.0
106,0.64206,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_hoi_scoring_esnb_vit-2,relation_esnbv2_dual-lower_lr,,499,0.653733,0.996834
112,0.641439,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_hoi_scoring_esnb_vit-2,relation_esnbv2_dual-smaller_scoring,,499,0.644033,0.998301


Unnamed: 0,test/bongard_hoi/unseen-seen,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/bongard_hoi/accuracy,train/bongard_hoi/accuracy
72,0.633494,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb,relation_esnb_single,,216,0.782775,0.860813
109,0.620123,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb-2,relation_esnbv2_single-smaller_scoring,,499,0.664315,0.999566
85,0.619465,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb-2,relation_esnb_single_2,,499,0.663727,1.0
106,0.61552,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_hoi_scoring_esnb_vit-2,relation_esnbv2_dual-lower_lr,,499,0.653733,0.996834
112,0.614862,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_hoi_scoring_esnb_vit-2,relation_esnbv2_dual-smaller_scoring,,499,0.644033,0.998301


Unnamed: 0,test/bongard_hoi/unseen-unseen,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/bongard_hoi/accuracy,train/bongard_hoi/accuracy
72,0.665012,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb,relation_esnb_single,,216,0.782775,0.860813
89,0.650124,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb-2_larger,relation_esnb_single_2,,499,0.665197,1.0
106,0.64206,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_hoi_scoring_esnb_vit-2,relation_esnbv2_dual-lower_lr,,499,0.653733,0.996834
112,0.641439,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_logo_hoi_scoring_esnb_vit-2,relation_esnbv2_dual-smaller_scoring,,499,0.644033,0.998301
85,0.640819,https://wandb.ai/avr_universal/AVR_universal/r...,bongard_hoi_scoring_esnb-2,relation_esnb_single_2,,499,0.663727,1.0


Unnamed: 0,test/vasr/vasr,wandb_urls,experiment_nm,test_nm,based_on/slurm_id,max_epoch,val/vasr/accuracy,train/vasr/accuracy
87,0.393305,https://wandb.ai/avr_universal/AVR_universal/r...,vasr_scoring_esnb-2,relation_esnb_single_2,,499,0.402501,0.881502
79,0.388606,https://wandb.ai/avr_universal/AVR_universal/r...,vasr_scoring_esnb,relation_esnb_single,871052.0,50,0.392536,0.560199
91,0.385082,https://wandb.ai/avr_universal/AVR_universal/r...,vasr_scoring_esnb-2_larger,relation_esnb_single_2,,466,0.400776,0.995999
111,0.383923,https://wandb.ai/avr_universal/AVR_universal/r...,vasr_scoring_esnb-2,relation_esnbv2_single-smaller_scoring,,499,0.395654,0.51417
105,0.382457,https://wandb.ai/avr_universal/AVR_universal/r...,vasr_scoring_esnb-2,relation_esnbv2_single-lower_lr,,216,0.388706,0.444127


In [23]:
df.to_csv("../../structured_results.csv", index=False)

In [None]:
# split by dataset 