In [1]:
import wandb
import pandas as pd
from loguru import logger
from tqdm import tqdm

api = wandb.Api()

INDEX_COLS = ["target", "train_dataset", "test_dataset", "model", "feature_extractor", "augmentations", "seed"]

In [2]:
def filter_runs(runs, filters: dict):
    return [run for run in runs if all(getattr(run, key, None) == value for key, value in filters.items())]


def summarize_run(run):
    # return dict(
    #     target=(column := run.config["dataset"]["targets"][0]["column"]),
    #     train_dataset=run.config["dataset"]["name"],
    #     test_dataset=run.config["test"]["dataset"]["name"],
    #     model=run.config["model"]["_target_"].split(".")[-1],
    #     feature_extractor=run.config["settings"]["feature_extractor"],
    #     augmentations=run.config["dataset"]["augmentations"]["name"],
    #     seed=run.config["seed"],
    #     train_auroc=run.summary[f"train/{column}/auroc"]["best"],
    #     val_auroc=run.summary[f"val/{column}/auroc"]["best"],
    #     test_auroc=run.summary[f"test/{column}/auroc"]["best"],
    # )

    hist = run.history().groupby("epoch").first()
    hist = hist[~hist.index.isna()]
    best = hist.sort_values("val/loss", ascending=True).iloc[0]
    return dict(
        target=(column := run.config["dataset"]["targets"][0]["column"]),
        train_dataset=run.config["dataset"]["name"],
        test_dataset=run.config["test"]["dataset"]["name"],
        model=run.config["model"]["_target_"].split(".")[-1],
        feature_extractor=run.config["settings"]["feature_extractor"],
        augmentations=run.config["dataset"]["augmentations"]["name"],
        seed=run.config["seed"],
        train_auroc=best[f"train/{column}/auroc"],
        val_auroc=best[f"val/{column}/auroc"],
        test_auroc=run.summary[f"test/{column}/auroc"]["best"],
    )


runs = list(api.runs("histaug"))
runs = filter_runs(runs, {"state": "finished"})
runs = [summarize_run(run) for run in tqdm(runs, desc="Loading run data")]

Loading run data: 100%|██████████| 1160/1160 [08:28<00:00,  2.28it/s]


In [3]:
df = pd.DataFrame(runs)
df = df.set_index(INDEX_COLS).sort_index()
# df = df.query(
#     "train_dataset == 'tcga_brca_subtype' and model == 'AttentionMIL' and augmentations in ['none', 'Macenko_patchwise']"
# )
# df = df.query("augment  ations in ['none', 'Macenko_patchwise']")
df.to_csv("/app/results.csv")

In [2]:
df = pd.read_csv("/app/results.csv").set_index(INDEX_COLS).sort_index()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,train_auroc,val_auroc,test_auroc
target,train_dataset,test_dataset,model,feature_extractor,augmentations,seed,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,bt,Macenko_patchwise,0,0.795646,0.819653,0.776128
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,bt,Macenko_patchwise,1,0.828331,0.805249,0.760137
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,bt,Macenko_patchwise,2,0.795870,0.829321,0.743575
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,bt,Macenko_patchwise,3,0.810821,0.811168,0.762421
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,bt,Macenko_patchwise,4,0.823637,0.814917,0.766990
...,...,...,...,...,...,...,...,...,...
subtype,tcga_brca_subtype,cptac_brca_subtype,MeanAveragePooling,vit,simple_rotate,0,0.900774,0.809816,0.745174
subtype,tcga_brca_subtype,cptac_brca_subtype,MeanAveragePooling,vit,simple_rotate,1,0.891602,0.812212,0.755641
subtype,tcga_brca_subtype,cptac_brca_subtype,MeanAveragePooling,vit,simple_rotate,2,0.907711,0.810231,0.766543
subtype,tcga_brca_subtype,cptac_brca_subtype,MeanAveragePooling,vit,simple_rotate,3,0.879893,0.812215,0.755432


In [3]:
df.index.unique("augmentations")

Index(['Macenko_patchwise', 'all', 'none', 'simple_rotate'], dtype='object', name='augmentations')

In [4]:
macenko = df.query("augmentations == 'Macenko_patchwise'")["test_auroc"].droplevel("augmentations")
orig = df.query("augmentations == 'none'")["test_auroc"].droplevel("augmentations")
# Mean diff across seeds
d = (
    (macenko - orig)
    .rename("test_auroc_diff")
    .reset_index()
    .drop(columns="seed")
    .groupby(["target", "train_dataset", "test_dataset", "model", "feature_extractor"])
    .agg(["mean", "std"])
)
o = (
    orig.rename("test_auroc_orig")
    .reset_index()
    .drop(columns="seed")
    .groupby(["target", "train_dataset", "test_dataset", "model", "feature_extractor"])
    .agg(["mean", "std"])
)
d = pd.concat([d, o], axis=1)
d.query("model == 'AttentionMIL'")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_auroc_diff,test_auroc_diff,test_auroc_orig,test_auroc_orig
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,std,mean,std
target,train_dataset,test_dataset,model,feature_extractor,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,bt,0.010737,0.03596,0.751114,0.036792
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,ctranspath,-0.027984,0.044912,0.809138,0.01612
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,dino_p16,0.010851,0.040105,0.781382,0.04396
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,owkin,0.031411,0.016264,0.726214,0.019111
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,resnet50,-0.109138,0.043949,0.727813,0.040233
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,retccl,-0.010737,0.019674,0.77807,0.008963
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,swav,0.024557,0.052704,0.751342,0.029276
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,swin,-0.041405,0.064137,0.64723,0.022377
CDH1,tcga_brca_CDH1,cptac_brca_CDH1,AttentionMIL,vit,-0.018047,0.025253,0.704969,0.014207
PIK3CA,tcga_brca_PIK3CA,cptac_brca_PIK3CA,AttentionMIL,bt,-0.025886,0.010879,0.536114,0.025031


# What is the best feature extractor?

In [5]:
d = df.query("augmentations == 'none'").droplevel("augmentations")["test_auroc"].reset_index()
d = df.query("augmentations == 'Macenko_patchwise'").droplevel("augmentations")["test_auroc"].reset_index()
d.drop(columns=["train_dataset", "test_dataset"]).to_csv("/app/res.csv", index=False)

In [54]:
from itertools import product
import numpy as np
from tqdm import tqdm


# Function to compute average ranks for a given subset of data
def compute_average_ranks(sub_df):
    pivot_data = sub_df.pivot(index="seed", columns="feature_extractor", values="test_auroc")
    feature_extractors = pivot_data.columns.values
    seeds = pivot_data.index.values
    combinations = product(*pivot_data.values.T)
    n_combinations = int(len(seeds) ** len(feature_extractors))
    ranks_array = np.zeros(len(feature_extractors))

    for i, auroc_values in enumerate(tqdm(combinations, total=n_combinations)):
        # sorted_indices = np.argsort(auroc_values)[::-1]
        # ranks_array[sorted_indices] += np.arange(1, len(feature_extractors) + 1)
        diffs = np.array(auroc_values).max() - np.array(auroc_values)
        ranks_array += diffs

    return {feature_extractors[i]: rank / n_combinations for i, rank in enumerate(ranks_array)}


# Compute average ranks for each (target, model) pair
results = {}
unique_pairs = d[["target", "model"]].drop_duplicates().values

for target, model in unique_pairs:
    sub_data = d[(d["target"] == target) & (d["model"] == model)]  # & ~d["feature_extractor"].isin(["bt", "swav"])]
    results[(target, model)] = compute_average_ranks(sub_data)
    print(
        f"{target:10s} {model:20s}:",
        ", ".join(
            f"{k}={v:.2f}" for (k, v) in sorted(results[(target, model)].items(), key=lambda x: x[1], reverse=False)
        ),
    )

  0%|          | 0/1953125 [00:00<?, ?it/s]

100%|██████████| 1953125/1953125 [00:23<00:00, 81394.39it/s]


CDH1       AttentionMIL        : dino_p16=0.01, ctranspath=0.02, swav=0.03, retccl=0.03, bt=0.04, owkin=0.04, vit=0.12, resnet50=0.18, swin=0.20


100%|██████████| 1953125/1953125 [00:22<00:00, 87615.32it/s]


CDH1       MeanAveragePooling  : ctranspath=0.01, owkin=0.01, dino_p16=0.02, retccl=0.02, swav=0.02, bt=0.02, vit=0.09, resnet50=0.11, swin=0.12


100%|██████████| 1953125/1953125 [00:23<00:00, 82126.23it/s]


PIK3CA     AttentionMIL        : resnet50=0.01, dino_p16=0.02, retccl=0.02, ctranspath=0.04, vit=0.04, swin=0.05, swav=0.05, owkin=0.05, bt=0.13


100%|██████████| 1953125/1953125 [00:24<00:00, 79381.93it/s]


PIK3CA     MeanAveragePooling  : ctranspath=0.01, swin=0.01, resnet50=0.03, retccl=0.03, vit=0.03, dino_p16=0.03, bt=0.05, owkin=0.08, swav=0.11


100%|██████████| 1953125/1953125 [00:22<00:00, 85841.95it/s]


TP53       AttentionMIL        : bt=0.02, ctranspath=0.03, retccl=0.03, swav=0.04, dino_p16=0.04, owkin=0.09, vit=0.12, resnet50=0.17, swin=0.24


100%|██████████| 1953125/1953125 [00:23<00:00, 83772.52it/s]


TP53       MeanAveragePooling  : ctranspath=0.02, bt=0.03, swav=0.04, dino_p16=0.04, retccl=0.05, vit=0.07, resnet50=0.07, swin=0.11, owkin=0.11


100%|██████████| 1953125/1953125 [00:22<00:00, 87082.59it/s]


subtype    AttentionMIL        : ctranspath=0.00, dino_p16=0.04, swav=0.05, swin=0.08, retccl=0.09, vit=0.11, owkin=0.11, bt=0.11, resnet50=0.17


100%|██████████| 1953125/1953125 [00:22<00:00, 85703.96it/s]

subtype    MeanAveragePooling  : ctranspath=0.00, retccl=0.01, vit=0.03, dino_p16=0.05, bt=0.06, swav=0.06, swin=0.06, resnet50=0.08, owkin=0.11





In [44]:
pd.DataFrame(results).T.rename_axis(["target", "model"]).query("model == 'AttentionMIL'").mean().sort_values()

ctranspath    0.019920
dino_p16      0.026628
retccl        0.042122
owkin         0.072018
vit           0.094241
resnet50      0.132303
swin          0.137736
dtype: float64

In [51]:
scores = pd.DataFrame(results).T.rename_axis(["target", "model"]).T.swaplevel(axis=1).sort_index(axis=1)

formatters = {  # make bold if best
    col: (lambda c: lambda x: f"\\textbf{{{x:.2f}}}" if x == scores[c].min() else f"{x:.2f}")(col)
    for col in scores.columns
}
scores.to_latex("/app/scores.tex", formatters=formatters)

In [48]:
scores[scores.columns[0]].min()

0.009998592586517334