In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


## Load and transform data

In [None]:
conversion_matrix = {
    "mibi_decidua": "data/mibi_decidua/conversion_matrix.csv",
    "codex_colon": "data/codex_colon/conversion_matrix.csv",
    "mibi_breast": "data/mibi_breast/TONIC_conversion_matrix.csv",
}
conversion_df = {
    tissue: pd.read_csv(path, index_col=0) for tissue, path in conversion_matrix.items()
}

experiments_df = pd.read_csv(os.path.join("data", "experimental_results.csv"), index_col=0)

#### Plot supplement figures

In [None]:
# make cmap with three colors: white, blue, gray
# white = 0, blue = 1, gray = 2
for key in conversion_df.keys():
    
    cmap = sns.color_palette(["darkcyan", "cyan", "lightgray"])

    clustergrid = sns.clustermap(conversion_df[key], cmap=cmap)
    clustergrid.ax_row_dendrogram.set_visible(False)
    clustergrid.ax_col_dendrogram.set_visible(False)
    ax = clustergrid.ax_heatmap

    plt.title(key)
    # yaxis label
    plt.yticks(rotation=0)
    ax.set(xlabel="Marker activity", ylabel="Cell Type")

    colorbar = ax.collections[0].colorbar
    r = colorbar.vmax - colorbar.vmin
    n = 3
    colorbar.set_ticks([colorbar.vmin + 0.5 * r / (n) + r * i / (n) for i in range(n)])
    colorbar.set_ticklabels(["negative", "positive", "undefined"])
    # color bar position
    colorbar.ax.yaxis.set_ticks_position("default")
    plt.savefig(f"figures/supplement/conversion_matrix_{key}.png", dpi=300)
    plt.show()

In [None]:

out_dir = "figures/supplement"
os.makedirs(out_dir, exist_ok=True)
fig_name = "naive_vs_noise_robust.svg" 

df = experiments_df[["noise_robust", "naive"]]

df = df.sort_values(by="noise_robust", ascending=False)
# plot grouped bars
ax = df.plot(kind='bar', stacked=False, figsize=(4,4), rot=0, legend=False, color=["Darkblue", "Gray"])
ax.set_ylim(0.0,1)
ax.set_title("Performance of Naive vs. Noise-robust training")
ax.set(xlabel='Metric', ylabel='Value')
# add legend to top right
ax.legend(["Noise robust", "Naive"], loc="upper right")
# make plot larger
plt.gcf().set_size_inches(5, 5)
plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()

In [None]:
out_dir = "figures/supplement"
os.makedirs(out_dir, exist_ok=True)
fig_name = "two_vs_four_channel.svg" 

df = experiments_df[["two_channel", "four_channel"]]

df = df.sort_values(by="two_channel", ascending=False)
# plot grouped bars
ax = df.plot(kind='bar', stacked=False, figsize=(4,4), rot=0, legend=False, color=["Darkblue", "Gray"])
ax.set_ylim(0.0,1)
ax.set_title("Performance of two vs. four channel baseline models")
ax.set(xlabel='Metric', ylabel='Value')
# add legend to top right
ax.legend(["two channels", "four channels"], loc="upper right")
# make plot larger
plt.gcf().set_size_inches(5, 5)
plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()

In [None]:
out_dir = "figures/supplement"
os.makedirs(out_dir, exist_ok=True)
fig_name = "full_vs_half_resolution.svg" 

df = experiments_df[["full_resolution", "half_resolution"]]

df = df.sort_values(by="half_resolution", ascending=False)
df.loc["Throughput\n per second"] = [0.834, 1.341]

# plot grouped bars
fig, ax = plt.subplots()

df.loc[["Specificity", "Precision", "F1 score", "Recall"]].plot(kind='bar', stacked=False, figsize=(4,4), rot=0, legend=False, color=["Darkblue", "Gray"], ax=ax)

ax.set_ylim(0.0,1)
ax.set_title("Performance of two vs. four channel baseline models")
ax.set(xlabel='Metric', ylabel='Value')
# add second y-axis on the right


# add legend to top right
ax.legend(["full resolution", "quarter resolution"], loc="upper right")
# make plot larger
plt.gcf().set_size_inches(5, 5)
plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()

fig_name = "throughput.svg"
fig, ax = plt.subplots()

df.loc[["Throughput\n per second"]].plot(kind='bar', stacked=False, figsize=(4,4), rot=0, legend=False, color=["Darkblue", "Gray"], ax=ax)

ax.set_ylim(0.0,1.4)
ax.set_title("Performance of two vs. four channel baseline models")
ax.legend(["full resolution", "quarter resolution"], loc="upper right")

plt.gcf().set_size_inches(5, 5)
plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()


In [None]:
out_dir = "figures/supplement"
os.makedirs(out_dir, exist_ok=True)
fig_name = "different_backbones.svg" 

df = experiments_df[[
    'Res-U-Net', 'EfficientNetV2-S', 'EfficientNetV2-M', 'EfficientNetV2-L',
    'NASNet-M', 'NASNet-L', 'ResNet50', 'ResNet101', 'ResNet152'
]].T
df["backbone"] = df.index
df = df.melt(id_vars="backbone", var_name="metric", value_name="value")
# df = df.sort_values(by="F1 score", ascending=False)
# plot grouped bars
ax = sns.barplot(
    data=df, x="backbone", y="value", hue="metric", palette="tab10", hue_order=["F1 score", "Recall", "Precision", "Specificity"], saturation=1
)
plt.xticks(rotation=45)
# ax = df.plot(kind='bar', stacked=False, figsize=(4,4), rot=45, legend=False, color=["Orange", "Green", "Red", "Blue"])
ax.set_ylim(0.0,1)
ax.set_title("Performance of different backbone architectures vs. gold standard")
ax.set(xlabel='Backbone architectures - noise naive training', ylabel='Score')
# add legend to top right
# make plot larger
plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score

merged_df = pd.read_csv("data/gt_pred_ie_consolidated.csv")
merged_subset = merged_df[merged_df["gt_proofread"] < 2]
merged_subset = merged_subset[merged_subset["gt_noisy"] < 2]

roc_curves = {
    "dataset": [],
    "fpr": [],
    "tpr": [],
    "thresholds": [],
    "roc_auc_score": []
}
fpr, tpr, thresholds = roc_curve(merged_subset["gt_proofread"], merged_subset["nimbus"])
auc = roc_auc_score(merged_subset["gt_proofread"], merged_subset["nimbus"])
roc_curves["dataset"].append("all")
roc_curves["fpr"].append(fpr)
roc_curves["tpr"].append(tpr)
roc_curves["thresholds"].append(thresholds)
roc_curves["roc_auc_score"].append(auc)

for dataset in merged_df.dataset.unique():
    m = merged_df["dataset"] == dataset
    fpr, tpr, thresholds = roc_curve(merged_subset[m]["gt_proofread"], merged_subset[m]["nimbus"])
    auc = roc_auc_score(merged_subset[m]["gt_proofread"], merged_subset[m]["nimbus"])
    roc_curves["dataset"].append(dataset)
    roc_curves["fpr"].append(fpr)
    roc_curves["tpr"].append(tpr)
    roc_curves["thresholds"].append(thresholds)
    roc_curves["roc_auc_score"].append(auc)


In [None]:
rename_dataset = {'all': "Pan-M",
 'codex_colon': "Codex Colon",
 'vectra_colon': "Vectra Colon",
 'vectra_pancreas': "Vectra Pancreas",
 'mibi_breast': "MIBI-TOF Breast",
 'mibi_decidua': "MIBI-TOF Decidua"}

# plot roc curves
plt.figure(figsize=(10, 10))
for i, dataset in enumerate(roc_curves["dataset"]):
    if dataset == "all":
        plt.plot(roc_curves["fpr"][i], roc_curves["tpr"][i], label=f"{rename_dataset[dataset]} (AUC = {roc_curves['roc_auc_score'][i]:.2f})", color="black")
    else:
        plt.plot(roc_curves["fpr"][i], roc_curves["tpr"][i], label=f"{rename_dataset[dataset]} (AUC = {roc_curves['roc_auc_score'][i]:.2f})", alpha=0.5)
plt.plot([0, 1], [0, 1], color='black', linestyle='--')
# increase font size of axis labels, title and ticks
plt.xlabel("False Positive Rate", fontsize=24)
plt.ylabel("True Positive Rate", fontsize=20)
plt.title("ROC Curve", fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
# set the x and y axis limits
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.legend()
plt.savefig("figures/supplement/roc_curve.svg")