# Plotting results for paper

You need to run all inference notebooks first.

In [None]:
import sys

sys.path.append("/vol/biomedic3/mb121/causal-contrastive")

import matplotlib
import re
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from evaluation.helper_functions import (
    extract_train_label_prop,
)


def extract_finetuning_type(run_name):
    run_name = re.split(r"-", run_name, maxsplit=1)[0]
    if "head" in run_name:
        return "Linear Probing"
    return "Finetuning"


def extract_pretraining_type(run_name):
    run_name = re.split(r"-", run_name, maxsplit=1)[0]
    if "simclrcfbad" in run_name:
        return "BadCF"
    elif "simclrcffine" in run_name:
        return "CF+"
    elif "simclrcf" in run_name:
        return "BaseCF"
    elif "simclr" in run_name:
        return "No CF"


color_dict = {
    "CF+": "red",
    "BadCF": "#232f23ff",
    "BaseCF": "orange",
    "No CF": "blue",
}


type_error = ("se", 1)

plt_kwargs = {
    "errorbar": type_error,
    "palette": color_dict,
    "linewidth": 3,
}

training_prop = {
    "Selenia Dimensions": 89,
    "Senograph 2000D ADS_17.5": 4.4,
    "Lorad Selenia": 3.5,
    "Clearview CSm": 2.7,
    "Senographe Pristina": 0.2,
}

vindr_training_prop = {
    "(OOD) VinDr\nMammomat Inspiration": 80,
    "(OOD) VinDr\nPlanmed Nuance": 20,
}
order_dict = {
    "CF+": 3,
    "BadCF": 1,
    "BaseCF": 2,
    "No CF": 0,
}


style_dict = {"Finetuning": [2, 2], "Linear Probing": ""}

# Difference plots

In [None]:
sns.set_theme(context="paper", style="whitegrid", font_scale=0.7)
matplotlib.rcParams["font.family"] = "serif"
rotation = 90
f, ax = plt.subplots(2, 4, figsize=(12, 6), facecolor="none")
ax = ax.ravel()
for m, mode in enumerate(["Linear Probing"]):
    df = pd.read_csv(f"../outputs/classification_tissueden_results_ablation.csv")
    df["ctrain_label_prop"] = df.run_name.apply(extract_train_label_prop)
    df["Pretraining"] = df.run_name.apply(lambda x: extract_pretraining_type(x))
    df["Classifier"] = df.run_name.apply(lambda x: extract_finetuning_type(x))

    df2 = df.dropna(subset=["ROC", "ctrain_label_prop"])
    xticks = [0.01, 0.05, 0.1, 0.25, 1.0]
    df2 = df2.loc[df2.ctrain_label_prop.isin(xticks)]
    plt.subplots_adjust(
        left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.2, hspace=0.58
    )

    for i, model in enumerate(
        df2.sort_values(by="N_test", ascending=False)["Model Name"].unique(), 0
    ):
        df3 = df2.loc[df2["Model Name"] == model]
        df3["o"] = df3.Pretraining.apply(
            lambda x: order_dict[x]
        ) + df3.Classifier.apply(lambda x: 0 if x == "Linear Probing" else 10)
        df_baseline = (
            df3.loc[df3.Pretraining == "No CF"]
            .drop(columns=["o", "run_name", "Pretraining"])
            .groupby(["Model Name", "ctrain_label_prop", "Classifier", "N_test"])
            .mean()
        )
        df_with_base = pd.merge(
            df3,
            df_baseline,
            on=["Model Name", "ctrain_label_prop", "Classifier", "N_test"],
            suffixes=("", "_base"),
        )
        df_with_base[f"diff_{mode}"] = df_with_base["ROC"] - df_with_base["ROC_base"]
        df_with_base = df_with_base.loc[df_with_base["Classifier"] == mode]
        df_with_base = df_with_base.loc[
            ~df_with_base["Pretraining"].isin(["No CF", "ImageNet"])
        ]
        sns.barplot(
            data=df_with_base.sort_values(by="o"),
            x="ctrain_label_prop",
            y=f"diff_{mode}",
            hue="Pretraining",
            ax=ax[i],
            legend=i == 0,
            **plt_kwargs,
        )

        ax[i].set_title(
            r"$\bf{(ID)}$"
            + f" {model}\n{training_prop[model]}% ID train set, N test = {df3.N_test.unique()[0]}"
        )
        ax[i].set_xlabel("Total number of EMBED labels\n(proportion of training set)")
        n_label_train_total = 223086

        xtickslabels = [
            f"N={int(n_label_train_total * x)}\n({x * 100}%)" for x in xticks
        ]
        ax[i].set_xticklabels(xtickslabels, rotation=rotation)
        if i % 4 > 0:
            ax[i].set_ylabel("")

    df = pd.read_csv("../outputs/embed_results_ood_ablation.csv")
    df["ctrain_label_prop"] = df.run_name.apply(extract_train_label_prop)
    df["Pretraining"] = df.run_name.apply(lambda x: extract_pretraining_type(x))
    df["Classifier"] = df.run_name.apply(lambda x: extract_finetuning_type(x))
    df2 = df.dropna(subset=["ROC", "ctrain_label_prop"])
    xticks = [0.05, 0.1, 0.25, 1.0]
    df2 = df2.loc[df2.ctrain_label_prop.isin(xticks)]
    for i, model in enumerate(
        df2.sort_values(by="N_test", ascending=False)["Model Name"].unique(), 5
    ):
        df3 = df2.loc[df2["Model Name"] == model]
        df3["o"] = df3.Pretraining.apply(
            lambda x: order_dict[x]
        ) + df3.Classifier.apply(lambda x: 0 if x == "Linear Probing" else 10)
        df_baseline = (
            df3.loc[df3.Pretraining == "No CF"]
            .drop(columns=["o", "run_name", "Pretraining"])
            .groupby(["Model Name", "ctrain_label_prop", "Classifier", "N_test"])
            .mean()
        )
        df_with_base = pd.merge(
            df3,
            df_baseline,
            on=["Model Name", "ctrain_label_prop", "Classifier", "N_test"],
            suffixes=("", "_base"),
        )
        df_with_base[f"diff_{mode}"] = df_with_base["ROC"] - df_with_base["ROC_base"]
        df_with_base = df_with_base.loc[df_with_base["Classifier"] == mode]
        df_with_base = df_with_base.loc[
            ~df_with_base["Pretraining"].isin(["No CF", "ImageNet"])
        ]
        sns.barplot(
            data=df_with_base.sort_values(by="o"),
            x="ctrain_label_prop",
            y=f"diff_{mode}",
            hue="Pretraining",
            ax=ax[i],
            legend=False,
            **plt_kwargs,
        )
        ax[i].set_title(
            r"$\bf{(OOD)}$"
            + f" Senographe Essential\n100% OOD train set, N test = {df3.N_test.unique()[0]}"
        )
        ax[i].set_xlabel(
            "Total number of Senograph labels\n(proportion of training set)"
        )
        n_label_train_total = 10927

        xtickslabels = [
            f"N={int(n_label_train_total * x)}\n({x * 100}%)" for x in xticks
        ]
        ax[i].set_xticklabels(xtickslabels, rotation=rotation)

    df = pd.read_csv("../outputs/vindr_ablation.csv")
    df["ctrain_label_prop"] = df.run_name.apply(extract_train_label_prop)
    df["Pretraining"] = df.run_name.apply(lambda x: extract_pretraining_type(x))
    df["Classifier"] = df.run_name.apply(lambda x: extract_finetuning_type(x))
    df2 = df.dropna(subset=["ROC", "ctrain_label_prop"])
    xticks = [0.05, 0.1, 0.25, 1.0]
    df2 = df2.loc[df2.ctrain_label_prop.isin(xticks)]
    df2 = df2.loc[df2["Model Name"] != "(OOD) VinDr"]
    for i, model in enumerate(
        df2.sort_values(by="N_test", ascending=False)["Model Name"].unique(), 6
    ):
        df3 = df2.loc[df2["Model Name"] == model]
        df3["o"] = df3.Pretraining.apply(
            lambda x: order_dict[x]
        ) + df3.Classifier.apply(lambda x: 0 if x == "Linear Probing" else 10)
        df_baseline = (
            df3.loc[df3.Pretraining == "No CF"]
            .drop(columns=["o", "run_name", "Pretraining"])
            .groupby(["Model Name", "ctrain_label_prop", "Classifier", "N_test"])
            .mean()
        )
        df_with_base = pd.merge(
            df3,
            df_baseline,
            on=["Model Name", "ctrain_label_prop", "Classifier", "N_test"],
            suffixes=("", "_base"),
        )
        df_with_base[f"diff_{mode}"] = df_with_base["ROC"] - df_with_base["ROC_base"]
        df_with_base = df_with_base.loc[df_with_base["Classifier"] == mode]
        df_with_base = df_with_base.loc[
            ~df_with_base["Pretraining"].isin(["No CF", "ImageNet"])
        ]
        sns.barplot(
            data=df_with_base.sort_values(by="o"),
            x="ctrain_label_prop",
            y=f"diff_{mode}",
            hue="Pretraining",
            ax=ax[i],
            legend=False,
            **plt_kwargs,
        )
        modelname = model.replace("\n", " - ")
        modelname = modelname.replace("(OOD)", r"$\bf{(OOD)}$")
        ax[i].set_title(
            f"{modelname}\n{vindr_training_prop[model]}% OOD train set, N test = {df3.N_test.unique()[0]}"
        )
        ax[i].set_xlabel("Total number of VinDR labels\n(proportion of training set)")
        n_label_train_total = 11212

        xtickslabels = [
            f"N={int(n_label_train_total * x)}\n({x * 100}%)" for x in xticks
        ]
        ax[i].set_xticklabels(xtickslabels, rotation=rotation)

f.tight_layout()
plt.savefig(f"figures/simclr_embed_diff_ablation.pdf", bbox_inches="tight", dpi=300)
plt.show()

In [None]:
sns.set_theme(context="paper", style="whitegrid", font_scale=0.7)
matplotlib.rcParams["font.family"] = "serif"
rotation = 90
f, ax = plt.subplots(1, 2, figsize=(10, 3), facecolor="none")
ax = ax.reshape((1, -1))
for m, mode in enumerate(["Linear Probing"]):
    i = 0
    df = pd.read_csv(f"../outputs/classification_tissueden_results_ablation.csv")
    df["ctrain_label_prop"] = df.run_name.apply(extract_train_label_prop)
    df["Pretraining"] = df.run_name.apply(lambda x: extract_pretraining_type(x))
    df["Classifier"] = df.run_name.apply(lambda x: extract_finetuning_type(x))

    df2 = df.dropna(subset=["ROC", "ctrain_label_prop"])
    xticks = [0.01, 0.05, 0.1, 0.25, 1.0]
    df2 = df2.loc[df2.ctrain_label_prop.isin(xticks)]
    plt.subplots_adjust(
        left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.2, hspace=0.58
    )

    df3 = df2
    df3["o"] = df3.Pretraining.apply(lambda x: order_dict[x]) + df3.Classifier.apply(
        lambda x: 0 if x == "Linear Probing" else 10
    )
    df_baseline = (
        df3.loc[df3.Pretraining == "No CF"]
        .drop(columns=["o", "run_name", "Pretraining"])
        .groupby(["Model Name", "ctrain_label_prop", "Classifier", "N_test"])
        .mean()
    )
    df_with_base = pd.merge(
        df3,
        df_baseline,
        on=["Model Name", "ctrain_label_prop", "Classifier", "N_test"],
        suffixes=("", "_base"),
    )
    df_with_base[f"diff_{mode}"] = df_with_base["ROC"] - df_with_base["ROC_base"]
    df_with_base = df_with_base.loc[df_with_base["Classifier"] == mode]
    df_with_base = df_with_base.loc[
        ~df_with_base["Pretraining"].isin(["No CF", "ImageNet"])
    ]
    sns.barplot(
        data=df_with_base.sort_values(by="o"),
        x="ctrain_label_prop",
        y=f"diff_{mode}",
        hue="Pretraining",
        ax=ax[m, i],
        legend=i == 0 == m,
        **plt_kwargs,
    )

    ax[m, i].set_xlabel("Total number of EMBED labels\n(proportion of training set)")
    n_label_train_total = 223086

    xtickslabels = [f"N={int(n_label_train_total * x)}\n({x * 100}%)" for x in xticks]
    ax[m, i].set_xticklabels(xtickslabels, rotation=rotation)
    if i > 0:
        ax[m, i].set_ylabel("")

    df = pd.read_csv("../outputs/vindr_ablation.csv")
    df["ctrain_label_prop"] = df.run_name.apply(extract_train_label_prop)
    df["Pretraining"] = df.run_name.apply(lambda x: extract_pretraining_type(x))
    df["Classifier"] = df.run_name.apply(lambda x: extract_finetuning_type(x))
    df2 = df.dropna(subset=["ROC", "ctrain_label_prop"])
    xticks = [0.05, 0.1, 0.25, 1.0]
    df2 = df2.loc[df2.ctrain_label_prop.isin(xticks)]
    df2 = df2.loc[df2["Model Name"] != "(OOD) VinDr"]
    i = 1
    df3 = df2
    df3["o"] = df3.Pretraining.apply(lambda x: order_dict[x]) + df3.Classifier.apply(
        lambda x: 0 if x == "Linear Probing" else 10
    )
    df_baseline = (
        df3.loc[df3.Pretraining == "No CF"]
        .drop(columns=["o", "run_name", "Pretraining"])
        .groupby(["Model Name", "ctrain_label_prop", "Classifier", "N_test"])
        .mean()
    )
    df_with_base = pd.merge(
        df3,
        df_baseline,
        on=["Model Name", "ctrain_label_prop", "Classifier", "N_test"],
        suffixes=("", "_base"),
    )
    df_with_base[f"diff_{mode}"] = df_with_base["ROC"] - df_with_base["ROC_base"]
    df_with_base = df_with_base.loc[df_with_base["Classifier"] == mode]
    df_with_base = df_with_base.loc[
        ~df_with_base["Pretraining"].isin(["No CF", "ImageNet"])
    ]
    sns.barplot(
        data=df_with_base.sort_values(by="o"),
        x="ctrain_label_prop",
        y=f"diff_{mode}",
        hue="Pretraining",
        ax=ax[m, i],
        legend=False,
        **plt_kwargs,
    )
    modelname = model.replace("\n", " - ")
    modelname = modelname.replace("(OOD)", r"$\bf{(OOD)}$")
    ax[m, i].set_xlabel("Total number of VinDR labels\n(proportion of training set)")
    n_label_train_total = 11212

    xtickslabels = [f"N={int(n_label_train_total * x)}\n({x * 100}%)" for x in xticks]
    ax[m, i].set_xticklabels(xtickslabels, rotation=rotation)
    if i > 0:
        ax[m, i].set_ylabel("")
    ax[0, 0].set_title("Average performance difference over all EMBED scanners (ID)")
    ax[0, 1].set_title("Average performance difference over all VinDr scanners (OOD)")
    # plt.tight_layout()
    # handles, labels = ax[-1].get_legend_handles_labels()
    # print(handles, labels)
    # labels[-1] += "          "
    # labels[0] = r"$\bf{" + labels[0] + r"}$"
    # labels[-3] = r"$\bf{" + labels[-3] + r"}$"
    # ax[-1].legend(
    #     handles,
    #     labels,
    #     loc="upper left",
    #     bbox_to_anchor=(-3.5, -0.5),
    #     ncol=8,
    #     fontsize=16,
    # )
f.tight_layout()
plt.savefig(
    f"figures/simclr_embed_diff_ablation_macro.pdf", bbox_inches="tight", dpi=300
)
plt.show()