In [4]:
import wandb
import pandas as pd
from loguru import logger
from tqdm import tqdm

api = wandb.Api()

In [5]:
def filter_runs(runs, filters: dict):
    return [run for run in runs if all(getattr(run, key, None) == value for key, value in filters.items())]


def summarize_run(run):
    return dict(
        target=(column := run.config["dataset"]["targets"][0]["column"]),
        train_dataset=run.config["dataset"]["name"],
        test_dataset=run.config["test"]["dataset"]["name"],
        model=run.config["model"]["_target_"].split(".")[-1],
        feature_extractor=run.config["settings"]["feature_extractor"],
        augmentations=run.config["dataset"]["augmentations"]["name"],
        seed=run.config["seed"],
        train_auroc=run.summary[f"train/{column}/auroc"]["best"],
        val_auroc=run.summary[f"val/{column}/auroc"]["best"],
        test_auroc=run.summary[f"test/{column}/auroc"]["best"],
    )


runs = list(api.runs("histaug"))
runs = filter_runs(runs, {"state": "finished"})
runs = [summarize_run(run) for run in tqdm(runs, desc="Loading run data")]

Loading run data:  46%|████▌     | 240/521 [07:53<09:14,  1.97s/it]
Loading run data: 100%|██████████| 520/520 [00:00<00:00, 10481.28it/s]


In [7]:
df = pd.DataFrame(runs)
df = df.set_index(
    ["target", "train_dataset", "test_dataset", "model", "feature_extractor", "augmentations", "seed"]
).sort_index()
# df = df.query(
#     "train_dataset == 'tcga_brca_subtype' and model == 'AttentionMIL' and augmentations in ['none', 'Macenko_patchwise']"
# )
df = df.query("augmentations in ['none', 'Macenko_patchwise']")
df.to_csv("/app/results.csv")

In [53]:
df["diff"] = df["test_auroc"] - df["val_auroc"]
d = df.groupby(level=["target", "train_dataset", "test_dataset", "model", "feature_extractor", "augmentations"]).mean()
d = d.query("model == 'AttentionMIL' and feature_extractor == 'ctranspath'")
d_macenko = d.query("augmentations == 'Macenko_patchwise'").droplevel("augmentations")
d_none = d.query("augmentations == 'none'").droplevel("augmentations")
print(
    "Mean relative difference [none - Macenko_patchwise] (if >0, no augmentation is better):",
    (d_none - d_macenko)["diff"].mean(),
)
print(d_none.mean())
d

Mean relative difference [none - Macenko_patchwise] (if >0, no augmentation is better): 0.01899754703044891
train_auroc    0.925642
val_auroc      0.798824
test_auroc     0.763793
diff          -0.035032
dtype: float64


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,train_auroc,val_auroc,test_auroc,diff
target,train_dataset,test_dataset,model,feature_extractor,augmentations,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,ctranspath,Macenko_patchwise,0.939841,0.819002,0.781154,-0.037848
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,ctranspath,none,0.943016,0.824408,0.809138,-0.01527
PIK3CA,tcga_brca_PIK3CA,cptac_brca_PIK3CA,AttentionMIL,ctranspath,Macenko_patchwise,0.857404,0.668728,0.604686,-0.064042
PIK3CA,tcga_brca_PIK3CA,cptac_brca_PIK3CA,AttentionMIL,ctranspath,none,0.858131,0.651474,0.6218,-0.029674
TP53,tcga_brca_TP53,cptac_brca_TP53,AttentionMIL,ctranspath,Macenko_patchwise,0.955682,0.857276,0.78165,-0.075627
TP53,tcga_brca_TP53,cptac_brca_TP53,AttentionMIL,ctranspath,none,0.933527,0.861501,0.801178,-0.060322
subtype,tcga_brca_subtype,cptac_brca_subtype,AttentionMIL,ctranspath,Macenko_patchwise,0.95478,0.850429,0.811828,-0.038601
subtype,tcga_brca_subtype,cptac_brca_subtype,AttentionMIL,ctranspath,none,0.967895,0.857915,0.823055,-0.034861
