In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
from skimage.segmentation import find_boundaries
from imageio import imread
from ct_assignment import (classify_vectra_colon, classify_vectra_pancreas, codex_colon_assignment,
                           mibi_decidua_assignment, mibi_breast_assignment, marker_localization,
                           reverse_dict)

## 1 Load and transform data

In [None]:
base_dir = "data"

cell_table_paths ={
    "mibi_decidua": os.path.join(base_dir, "mibi_decidua", "Supplementary_table_3_single_cells_updated.csv"),
    "codex_colon": os.path.join(base_dir, "codex_colon", "cell_table.csv"),
    "vectra_colon": os.path.join(base_dir, "vectra_colon", "cell_table.csv"),
    "vectra_pancreas": os.path.join(base_dir, "vectra_pancreas", "cell_table.csv"),
    "mibi_breast": os.path.join(base_dir, "mibi_breast", "combined_cell_table_normalized_cell_labels_updated.csv")
}

proofread_cell_table_paths ={
    "mibi_decidua": os.path.join(base_dir, "mibi_decidua", "ground_truth.csv"),
    "codex_colon": os.path.join(base_dir, "codex_colon", "ground_truth.csv"),
    "vectra_colon": os.path.join(base_dir, "vectra_colon", "ground_truth.csv"),
    "vectra_pancreas": os.path.join(base_dir, "vectra_pancreas", "ground_truth.csv"),
    "mibi_breast": os.path.join(base_dir, "mibi_breast", "ground_truth.csv")
}
df_dict = {}
for key, path in cell_table_paths.items():
    df_dict[key] = pd.read_csv(path)

proofread_df_dict = {}
for key, path in proofread_cell_table_paths.items():
    proofread_df_dict[key] = pd.read_csv(path)

In [None]:
proofread_cells_per_dataset = {key: len(df) for key, df in proofread_df_dict.items()}
cells_per_dataset = {key: len(df) - proofread_cells_per_dataset[key] for key, df in df_dict.items()}

imaging_plattforms = {
    "mibi_decidua": "MIBI-TOF",
    "codex_colon": "CODEX",
    "vectra_colon": "Vectra",
    "vectra_pancreas": "Vectra",
    "mibi_breast": "MIBI-TOF"}
tissue_type = {
    "mibi_decidua": "Decidua",
    "codex_colon": "Colon",
    "vectra_colon": "Colon",
    "vectra_pancreas": "Pancreas",
    "mibi_breast": "Breast"}
chan_per_dataset = {
    "mibi_decidua": 23,
    "codex_colon": 36,
    "vectra_colon": 6,
    "vectra_pancreas": 6,
    "mibi_breast": 21
}

In [None]:
num_cells_df = pd.DataFrame(cells_per_dataset, index=["number of cells"]).T
proofread_num_cells_df = pd.DataFrame(proofread_cells_per_dataset, index=["number of cells"]).T
num_cells_df = pd.concat([num_cells_df, proofread_num_cells_df], axis=0)
num_cells_df["Annotations"] = ["Silver"]*5 + ["Gold"]*5
num_cells_df["imaging_platform"] = [imaging_plattforms[key] for key in num_cells_df.index]
num_cells_df["tissue_type"] = [tissue_type[key] for key in num_cells_df.index]
num_cells_df = num_cells_df.sort_values(by="number of cells", ascending=False)
num_cells_df["dataset"] = num_cells_df.index
num_cells_df["num_annotations"] = num_cells_df.apply(lambda x: x["number of cells"] * chan_per_dataset[x["dataset"]], axis=1)

In [None]:
df = pd.read_csv(os.path.join(base_dir,"gt_pred_ie_consolidated.csv"), index_col=0)
df_subset = df[df["gt_noisy"] != 2]
df_subset = df_subset[df_subset["gt_proofread"] != 2]

# make visualization of cell wise f1 score
df_subset["tp"] = np.logical_and(df_subset["gt_proofread"] == 1, df_subset["nimbus"] > 0.5).astype(int)
df_subset["fp"] = np.logical_and(df_subset["gt_proofread"] == 0, df_subset["nimbus"] > 0.5).astype(int)
df_subset["fn"] = np.logical_and(df_subset["gt_proofread"] == 1, df_subset["nimbus"] <= 0.5).astype(int)
df_subset["tn"] = np.logical_and(df_subset["gt_proofread"] == 0, df_subset["nimbus"] <= 0.5).astype(int)
precision = df_subset.groupby("dataset").sum(["tp", "fp", "fn", "tn"]).apply(lambda x: x["tp"]/(x["tp"] + x["fp"]), axis=1).sort_values(ascending=False)
recall = df_subset.groupby("dataset").sum(["tp", "fp", "fn", "tn"]).apply(lambda x: x["tp"]/(x["tp"] + x["fn"]), axis=1).sort_values(ascending=False)
specificity = df_subset.groupby("dataset").sum(["tp", "fp", "fn", "tn"]).apply(lambda x: x["tn"]/(x["tn"] + x["fp"]), axis=1).sort_values(ascending=False)
f1 = 2 * precision * recall / (precision + recall)
nimbus_metric_df = pd.DataFrame({"precision": precision, "recall": recall, "specificity": specificity, "f1": f1})


## 2 Figures

### Figure 2 b

In [None]:
fig, ax = plt.subplots()
bottom = np.zeros(3)

df_temp = num_cells_df
df_temp = df_temp.groupby("imaging_platform").sum()
# reverse order
df_temp = df_temp.loc[['MIBI-TOF', 'Vectra', 'CODEX']]
df_temp["imaging_platform"] = df_temp.index
p = ax.bar(df_temp["imaging_platform"], df_temp["num_annotations"]/1e6, width=0.8, bottom=bottom, color="darkblue")
bottom += df_temp["num_annotations"]
# reverse legend order
handles, labels = ax.get_legend_handles_labels()
plt.yticks(np.arange(0, 110, 10))
# ax.legend(handles[::-1], labels[::-1], title="Annotations", loc="upper right")
ax.set_ylabel("Number of annotations in millions")
ax.set_xlabel("Imaging platform")
ax.set_title("Annotations per imaging platform")
plt.savefig("figures/figure_2/number_of_annotations_per_imaging_platform.svg", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
fig, ax = plt.subplots()
bottom = np.zeros(4)

df_temp = num_cells_df
df_temp = df_temp.groupby("tissue_type").sum()
# reverse order
df_temp = df_temp.loc[['Breast', 'Colon', 'Pancreas', 'Decidua']]
df_temp["tissue_type"] = df_temp.index
p = ax.bar(df_temp["tissue_type"], df_temp["num_annotations"]/1e6, width=0.8, bottom=bottom, color="darkblue")
bottom += df_temp["num_annotations"]
# reverse legend order
handles, labels = ax.get_legend_handles_labels()
plt.yticks(np.arange(0, 100, 10))
# ax.legend(handles[::-1], labels[::-1], title="Annotations", loc="upper right")
ax.set_ylabel("Number of annotations in millions")
ax.set_xlabel("Tissue type")
ax.set_title("Annotations per tissue type")
plt.savefig("figures/figure_2/number_of_annotations_per_tissue_type.svg", dpi=300, bbox_inches="tight")
plt.show()

### Figure 2 c

In [None]:
def reverse_dict(dictionary):
    rev_dict = {}
    for key in dictionary.keys():
        for ct in dictionary[key]:
            rev_dict[ct] = key
    return rev_dict

# assign cell types to datasets and store in plot_lineage
codex_colon_assignment_rev = reverse_dict(codex_colon_assignment)
df_dict["codex_colon"]["plot_lineage"] = df_dict["codex_colon"]["Cell Type"].astype('str').map(codex_colon_assignment_rev)
mibi_decidua_assignment_rev = reverse_dict(mibi_decidua_assignment)
df_dict["mibi_decidua"]["plot_lineage"] = df_dict["mibi_decidua"]["lineage"].astype('str').map(mibi_decidua_assignment_rev)
mibi_breast_assignment_rev = reverse_dict(mibi_breast_assignment)
df_dict["mibi_breast"]["plot_lineage"] = df_dict["mibi_breast"]["cell_meta_cluster"].astype('str').map(mibi_breast_assignment_rev)
df_dict["vectra_colon"]["plot_lineage"] = df_dict["vectra_colon"].apply(classify_vectra_colon, axis=1)
df_dict["vectra_pancreas"]["plot_lineage"] = df_dict["vectra_pancreas"].apply(classify_vectra_pancreas, axis=1)

# assign cell types to datasets and store in plot_lineage
proofread_df_dict["codex_colon"] = proofread_df_dict["codex_colon"].merge(df_dict["codex_colon"][["fov", "labels", "plot_lineage"]], how="inner", on=["fov", "labels"])
df_dict["mibi_decidua"].rename(columns={"Point": "fov", "cell_ID_in_Point": "labels"}, inplace=True)
proofread_df_dict["mibi_decidua"] = proofread_df_dict["mibi_decidua"].merge(df_dict["mibi_decidua"][["fov", "labels", "plot_lineage"]], how="inner", on=["fov", "labels"])
df_dict["mibi_breast"].rename(columns={"label":"labels"}, inplace=True)
proofread_df_dict["mibi_breast"] = proofread_df_dict["mibi_breast"].merge(df_dict["mibi_breast"][["fov", "labels", "plot_lineage"]], how="inner", on=["fov", "labels"])
proofread_df_dict["vectra_colon"] = proofread_df_dict["vectra_colon"].merge(df_dict["vectra_colon"][["fov", "labels", "plot_lineage"]], how="inner", on=["fov", "labels"])
proofread_df_dict["vectra_pancreas"] = proofread_df_dict["vectra_pancreas"].merge(df_dict["vectra_pancreas"][["fov", "labels", "plot_lineage"]], how="inner", on=["fov", "labels"])
proofread_df_dict["vectra_colon"].rename(columns={"plot_lineage_x": "plot_lineage"}, inplace=True)
proofread_df_dict["vectra_pancreas"].rename(columns={"plot_lineage_x": "plot_lineage"}, inplace=True)
proofread_df_dict["vectra_colon"].replace({"Other": "Pan Negative"}, inplace=True)
proofread_df_dict["vectra_pancreas"].replace({"Other": "Pan Negative"}, inplace=True)

In [None]:
count_df = pd.DataFrame(
    columns=["codex_colon", "mibi_breast", "mibi_decidua", "vectra_colon", "vectra_pancreas"],
    index=["Epithelial", "Lymphocytes", "Muscle", "Myeloids", "Other", "Other Immune", "Precursors", "Stroma", "Vasculature", "Pan-Negative"]
)
count_df["codex_colon"] = df_dict["codex_colon"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"]
count_df["mibi_breast"] = pd.concat([df_dict["mibi_breast"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"], pd.Series({"Epithelial":0})], axis=0)
count_df["mibi_decidua"] = df_dict["mibi_decidua"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"]
count_df["vectra_colon"] = df_dict["vectra_colon"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"]
count_df["vectra_pancreas"] = df_dict["vectra_pancreas"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"]
count_df = count_df.fillna(0)
count_df["lineage"] = count_df.index
count_df

proofread_count_df = pd.DataFrame(
    columns=["codex_colon", "mibi_breast", "mibi_decidua", "vectra_colon", "vectra_pancreas"],
    index=["Epithelial", "Lymphocytes", "Muscle", "Myeloids", "Other", "Other Immune", "Precursors", "Stroma", "Vasculature", "Pan-Negative"]
)
proofread_count_df["codex_colon"] = proofread_df_dict["codex_colon"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"]
proofread_count_df["mibi_breast"] = pd.concat([proofread_df_dict["mibi_breast"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"], pd.Series({"Epithelial":0})], axis=0)
proofread_count_df["mibi_decidua"] = proofread_df_dict["mibi_decidua"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"]
proofread_count_df["vectra_colon"] = proofread_df_dict["vectra_colon"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"]
proofread_count_df["vectra_pancreas"] = proofread_df_dict["vectra_pancreas"][["labels", "plot_lineage"]].groupby("plot_lineage").count()["labels"]
proofread_count_df = proofread_count_df.fillna(0)
proofread_count_df["lineage"] = proofread_count_df.index

In [None]:
df_temp = pd.concat([count_df , proofread_count_df])
df_temp = df_temp.groupby("lineage").sum().sum(1)
# df_temp

fig, ax = plt.subplots()

# reverse order
loc_list = ["Pan-Negative", 'Epithelial', 'Lymphocytes', 'Other Immune', "Stroma", 'Myeloids', "Vasculature", 'Muscle', 'Other', 'Precursors']
df_temp = df_temp.loc[loc_list]
p = ax.bar(df_temp.index, df_temp.values/1e6, width=0.8, color="darkblue")
# reverse legend order
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], title="Annotations", loc="upper right")
plt.xticks(rotation=45)
ax.set_yticks(np.arange(0, 3.5, 1))
ax.set_ylabel("Number of cells in millions")
ax.set_xlabel("Lineage")
ax.set_title("Number of cells per lineage")
plt.savefig("figures/figure_2/number_of_cells_per_lineage.svg", dpi=300, bbox_inches="tight")
plt.show()

#### Figure 2 d

In [None]:
df = pd.read_csv(os.path.join(base_dir,"gt_pred_ie_consolidated.csv"), index_col=0)
df_subset = df[df["gt_noisy"] != 2]
df_subset = df_subset[df_subset["gt_proofread"] != 2]
df_subset["marker localization"] = df_subset["channel"].apply(lambda x: reverse_dict(marker_localization)[x])



In [None]:
marker_dict = {"ECM": ['aSMA', 'CD31', 'MUC1', 'MUC2', 'Vimentin', 'Collagen1', 'FAP',
        'Fibronectin', 'SMA', 'VIM'],
    "membrane": ['aDefensin5', 'BCL2', 'CD117', 'CD11c', 'CD138', 'CD15', 'CD16',
        'CD161', 'CD163', 'CD19', 'CD206', 'CD21', 'CD3', 'CD34', 'CD36',
        'CD38', 'CD4', 'CD45', 'CD56', 'CD57', 'CD66', 'CD68', 'CD7',
        'CD8', 'CHGA', 'Cytokeratin', 'HLADR', 'Podoplanin',
        'Synaptophysin', 'CD90', 'ICOS', 'panCK+CK7+CAM5.2', 'PD-L1',
        'CD40', 'CD40-L', 'panCK', 'PD-1', 'Calprotectin', 'CD14', 'CD20',
        'ChyTr', 'CK17', 'ECAD', 'CK7', 'DCSIGN', 'HLAG'],
    "nuclear": ['SOX9', 'FoxP3'],
}
df_tmp = pd.DataFrame(
    {k: [len(v)] for k,v in marker_dict.items()}, index=["Counts"]
).T.reset_index().rename(columns={"index": "Marker Localization"})


In [None]:
out_dir = "figures/figure_2"
fig_name = "number_of_markers_per_loc.svg" 

marker_dict = {"ECM": ['aSMA', 'CD31', 'MUC1', 'MUC2', 'Vimentin', 'Collagen1', 'FAP',
        'Fibronectin', 'SMA', 'VIM'],
    "membrane": ['aDefensin5', 'BCL2', 'CD117', 'CD11c', 'CD138', 'CD15', 'CD16',
        'CD161', 'CD163', 'CD19', 'CD206', 'CD21', 'CD3', 'CD34', 'CD36',
        'CD38', 'CD4', 'CD45', 'CD56', 'CD57', 'CD66', 'CD68', 'CD7',
        'CD8', 'CHGA', 'Cytokeratin', 'HLADR', 'Podoplanin',
        'Synaptophysin', 'CD90', 'ICOS', 'panCK+CK7+CAM5.2', 'PD-L1',
        'CD40', 'CD40-L', 'panCK', 'PD-1', 'Calprotectin', 'CD14', 'CD20',
        'ChyTr', 'CK17', 'ECAD', 'CK7', 'DCSIGN', 'HLAG'],
    "nuclear": ['SOX9', 'FoxP3'],
}
df_tmp = pd.DataFrame(
    {k: [len(v)] for k,v in marker_dict.items()}, index=["Counts"]
).T.reset_index().rename(columns={"index": "Marker Localization"})


df_tmp = df_tmp.sort_values(by="Counts", ascending=False)
ax = df_tmp.plot(kind='bar', stacked=False, figsize=(4,6), rot=0, color="Darkblue", legend=False)
# set xlabel and ylabel
plt.xlabel("Marker Localization")
plt.ylabel("Number of Markers")
# add xticks
plt.xticks(np.arange(0, 3), df_tmp["Marker Localization"])
ax.set_title("Number of Markers per location")

plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()

### Figure 2 e

In [None]:
# annotations compared to other datasets
out_dir = "figures/figure_2"
fig_name = "num_expert_annotations_vs_other_publications.svg" 
os.makedirs(out_dir, exist_ok=True)


df = pd.DataFrame({
    # "CellSighter": [5658],
    # "DeepLIIF": [68180],
    "All previously\npublished annotations": [68180+5658],
    "Nimbus": [1071428],
}, index= ["Nimbus"])
df = df.T.sort_values(by="Nimbus", ascending=False)

ax = df.plot(kind='bar', stacked=False, figsize=(4,4), rot=0, legend=False, color="Darkblue")
# set y ticks to 100,000
ax.set_yticks(np.arange(0, 1200000, 200000))
# ax.axhline(0.7212, linestyle='--', color="Gray", label="Silver Standard")
ax.set_title("Number of Gold-Standard Annotations")
ax.set(xlabel='Publication', ylabel='Number of Annotations in millions')

plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()

#### Figure 2 f

In [None]:
# 208h for 1.1m annotations
# 39h for 197m annotations
# 1.1m / 208h = 5453 annotations per hour
# 197m / 39h = 5051282 annotations per hour

In [None]:
out_dir = "figures/figure_2"
fig_name = "gold_vs_silver_annotation_effort.svg" 

df = pd.DataFrame({
    "Gold": [24+16,36+12,20+16,20+16,36+12],
    "Silver": [6, 6, 9, 9, 9,]
})
df.index = ["decidua", "hickey colon", "msk colon", "msk pancreas", "tonic tnbc"]
df = df.sort_values(by="Gold", ascending=False)
df.sum().plot(kind='bar', stacked=False, figsize=(8,6), rot=0, color="Darkblue")
# set xlabel and ylabel
plt.xlabel("Standard")
plt.ylabel("Annotation effort in hours")

plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()

## 3 Overlays

#### Figure 2 a

In [None]:

from skimage.segmentation import find_boundaries
from matplotlib.colors import ListedColormap

df = pd.read_csv(os.path.join(base_dir,"gt_pred_ie_consolidated.csv"), index_col=0)
seg_img = np.squeeze(imread(os.path.join(base_dir, "mibi_breast", "segmentation_data", "deepcell_output", "TONIC_TMA10_R5C4_feature_0.tif")))
def relabel_segmentation(seg_img, df_example, key="plot_lineage_cat", add=0):
    seg_img_relabel = np.zeros_like(seg_img)
    for _, row in df_example.iterrows():
        seg_img_relabel[seg_img == row["labels"]] = row[key] + add
    return seg_img_relabel

for chan in ["ECAD", "CD45", "CD8"]:
    image = np.squeeze(imread(os.path.join(base_dir, "mibi_breast", "image_data", "samples", "TONIC_TMA10_R5C4", f"{chan}.tiff")))
    df_example = df[df.fov == "TONIC_TMA10_R5C4"]
    df_example = df_example[df_example.channel == chan]
    gold_standard_img = relabel_segmentation(seg_img, df_example, key="gt_proofread", add=1)
    silver_standard_img = relabel_segmentation(seg_img, df_example, key="gt_noisy", add=1)

    boundaries = find_boundaries(seg_img, mode='inner')
    gold_standard_img[boundaries] = 0
    silver_standard_img[boundaries] = 0

    out_dir = "figures/figure_2"
    cmap = np.zeros([4,3])
    cmap[2,:] = [1,0,0]
    cmap[3,:] = [0,1,0]
    cmap = ListedColormap(cmap)
    fig, ax = plt.subplots(1,1,figsize=(10,10))
    ax.imshow(silver_standard_img, cmap=cmap, interpolation='none')
    ax.axis("off")
    plt.savefig(os.path.join(out_dir, f"silver_standard_{chan}.svg"), format='svg', bbox_inches='tight', dpi=300, transparent=True)
    plt.show()

    fig, ax = plt.subplots(1,1,figsize=(10,10))
    ax.imshow(np.clip(gold_standard_img, 0, 2), cmap=cmap, interpolation='none')
    ax.axis("off")
    plt.savefig(os.path.join(out_dir, f"gold_standard{chan}.svg"), format='svg', bbox_inches='tight', dpi=300, transparent=True)
    plt.show()

    fig, ax = plt.subplots(1,1,figsize=(10,10))
    ax.imshow(image, cmap="Greys_r", vmax=np.quantile(image, 0.99), interpolation='none')
    ax.axis("off")
    plt.savefig(os.path.join(out_dir, f"{chan}.svg"), format='svg', bbox_inches='tight', dpi=300, transparent=True)
    plt.show()

In [None]:
import matplotlib.patches as mpatches

df_example = df[df.fov == "TONIC_TMA10_R5C4"]
df_example = df_example[df_example.channel == "ECAD"]
df_example["plot_lineage_cat"] = df_example["plot_lineage"].astype('category').cat.codes
lineage_img = relabel_segmentation(seg_img, df_example, key="plot_lineage_cat", add=1)

colors = {
 'Other': [0.25, 0.25, 0.25],
 'Stroma': [0.5, 0.3, 0.3],
 'Cancer': [0.0, 0.85, 0.0],
 "Lymphocytes": [0.85, 0.3, 0.3],
 "Other Immune": [0.3, 0.85, 0.3],
 "Myeloids": [0.8, 0.6, 0.3],
 "Muscle": [0.3, 0.3, 0.65],
 "Vasculature": [0.3, 0.3, 0.85],
}
ccmap_plot = np.clip((np.stack([[0,0,0]]+list(colors.values())) + (np.random.rand(9,3)-0.5)*0.2), 0, 1)
ccmap_plot[0,:] = 0
ccmap = ListedColormap(ccmap_plot)
name_to_int = {k: v+1 for v,k in enumerate(list(colors.keys()))}
df_example = df[df.fov == "TONIC_TMA10_R5C4"]
df_example = df_example[df_example.channel == "ECAD"]
df_example["plot_lineage_cat"] = df_example["plot_lineage"].apply(lambda x: name_to_int[x])
lineage_img = relabel_segmentation(seg_img, df_example, key="plot_lineage_cat", add=0)
boundaries = find_boundaries(seg_img, mode='inner')

lineage_img[boundaries] = 0
out_dir = os.path.join("figures", "figure_2")
fig, ax =  plt.subplots(1,1,figsize=(10,10))
ax.imshow(np.squeeze(lineage_img), cmap=ccmap, interpolation='none', label = list(colors.keys()))
# create a patch (proxy artist) for every color 
patches = [ mpatches.Patch(color=ccmap_plot[i+1], label=list(colors.keys())[i]) for i in range(len(list(colors.keys())))]
# put those patched as legend-handles into the legend
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0. )
ax.axis("off")
plt.savefig(os.path.join(out_dir, "cell_type.svg"), format='svg', bbox_inches='tight', dpi=300, transparent=True)
plt.show()

In [None]:
df = pd.read_csv(os.path.join(base_dir,"gt_pred_ie_consolidated.csv"), index_col=0)
df_subset = df[df["gt_noisy"] != 2]
df_subset = df_subset[df_subset["gt_proofread"] != 2]
df_subset["marker localization"] = df_subset["channel"].apply(lambda x: reverse_dict(marker_localization)[x])

# make visualization of cell wise f1 score
df_subset["tp"] = np.logical_and(df_subset["gt_proofread"] == 1, df_subset["nimbus"] > 0.41).astype(int)
df_subset["fp"] = np.logical_and(df_subset["gt_proofread"] == 0, df_subset["nimbus"] > 0.41).astype(int)
df_subset["fn"] = np.logical_and(df_subset["gt_proofread"] == 1, df_subset["nimbus"] <= 0.41).astype(int)
df_subset["tn"] = np.logical_and(df_subset["gt_proofread"] == 0, df_subset["nimbus"] <= 0.41).astype(int)
precision = df_subset.groupby("dataset").sum(["tp", "fp", "fn", "tn"]).apply(lambda x: x["tp"]/(x["tp"] + x["fp"]), axis=1).sort_values(ascending=False)
recall = df_subset.groupby("dataset").sum(["tp", "fp", "fn", "tn"]).apply(lambda x: x["tp"]/(x["tp"] + x["fn"]), axis=1).sort_values(ascending=False)
specificity = df_subset.groupby("dataset").sum(["tp", "fp", "fn", "tn"]).apply(lambda x: x["tn"]/(x["tn"] + x["fp"]), axis=1).sort_values(ascending=False)
f1 = 2 * precision * recall / (precision + recall)
nimbus_metric_df = pd.DataFrame({"precision": precision, "recall": recall, "specificity": specificity, "f1": f1})
nimbus_metric_df

#### Figure 2 g

In [None]:
from skimage.io import imread

# load ecad, cd45, CD163 or CD 68

img_dir = os.path.join(base_dir, "vectra_pancreas", "raw_structured")
seg_dir = os.path.join(base_dir, "vectra_pancreas", "segmentation")

fovs = ["0852a4103bed Pancreas_PANEL7-10_CD40L,_CD40,_PD1,_PDL1,CD8,CK_[61352,11423]_component_data.tif_image"]


def segmentation_naming_convention(fname):
    return os.path.join(
        seg_dir,
        fname + "feature_0.ome.tif"
    )

# p = "C:/Users/Lorenz/OneDrive - Charité - Universitätsmedizin Berlin/cell_classification/data_annotation/msk_pancreas/{fov}".format(fov=fovs[0])
p = os.path.join(img_dir, fovs[0])
df_gold = pd.read_csv(os.path.join(base_dir, "vectra_pancreas", "ground_truth.csv"))
instance_mask_path = segmentation_naming_convention(fovs[0])
instance_mask = imread(instance_mask_path)
instance_mask = instance_mask
df_tmp = df_gold[df_gold.fov == fovs[0]]
p_nuclei = os.path.join(p, "DAPI.ome.tif")
nuclei_img = imread(p_nuclei).astype(np.float32)
nuclei_img = nuclei_img
nuclei_img /= np.quantile(nuclei_img, 0.99)

imgs = []
gt_imgs = []
for c in ["panCK", "CD8", "CD40", "PD-1"]:
    fpath = os.path.join(p, c + ".ome.tif")
    image = imread(fpath)
    imgs.append(image)
    df_tmp.labels[df_tmp[c] == 1]
    out_img = np.zeros_like(instance_mask, dtype=np.float32)
    idx = np.isin(instance_mask, df_tmp.labels[df_tmp[c] == 1].values)
    out_img[idx] = 1
    idx = np.isin(instance_mask, df_tmp.labels[df_tmp[c] != 1].values)
    out_img[idx] = 0.25
    gt_imgs.append(out_img)

In [None]:
# make overlay plots
out_dir = "figures/figure_2"
rgb_to_cmyk = np.array([[0.0, 1.0, 1.0],
                         [1.0, 0.0, 1.0],
                         [1.0, 1.0, 0.0]])
cmyk_from_rgb = np.linalg.inv(rgb_to_cmyk)

def rgb_to_cmyk(rgb):
    return np.dot(rgb, cmyk_from_rgb)

def cmyk_to_rgb(cmyk):
    return np.dot(cmyk, rgb_to_cmyk)

composite = np.stack([imgs[1], imgs[0], imgs[2]], -1)
composite /= np.quantile(composite, 0.99, axis=(0,1))
composite = rgb_to_cmyk(composite)
fig, ax =  plt.subplots(1,1,figsize=(10,10))
ax.imshow(nuclei_img, cmap="gray", vmin=0, vmax=.75, interpolation="none")
ax.imshow(composite, vmin=0, vmax=1.0, interpolation="none", alpha=0.7)
ax.axis("off")
plt.savefig(os.path.join(out_dir, "vectra_pancreas_composite.svg"), format='svg', bbox_inches='tight', dpi=300, transparent=True)
plt.show()
for i,c in enumerate(["panCK", "CD8", "CD40", "PD-1"]):
    fig, ax =  plt.subplots(1,1,figsize=(10,10))
    ax.imshow(np.squeeze(gt_imgs[i]), cmap="Greys_r")
    ax.axis("off")
    plt.savefig(os.path.join(out_dir, "vectra_pancreas_{c}.svg").format(c=c), format='svg', bbox_inches='tight', dpi=300, transparent=True)
    plt.show()

#### Figure 2 h

In [None]:
# load ecad, cd45, CD163 or CD 68
fovs = ["B011B_reg001_X01_Y01_Z01"]
slices = {
    "B011B_reg001_X01_Y01_Z01": [slice(2000,4048), slice(4000,6048)],
}

img_dir = os.path.join(base_dir, "codex_colon", "raw_structured")
seg_dir = os.path.join(base_dir, "codex_colon", "masks")

def segmentation_naming_convention(fov_path):
    fname = os.path.basename(fov_path)
    fov, reg = fname.split("_")[:2]
    fov_path = os.path.join(seg_dir, fov)
    images = os.listdir(fov_path)
    labels = [img for img in images if "_labeled" in img]
    labels = [img for img in labels if reg in img]
    label_fname = labels[0]    
    return os.path.join(os.path.normpath(fov_path), label_fname)


# p = "C:/Users/Lorenz/OneDrive - Charité - Universitätsmedizin Berlin/cell_classification/data_annotation/hickey_dataset/{fov}".format(fov=fovs[0])
p = os.path.join(img_dir, fovs[0])
df_gold = pd.read_csv(os.path.join(base_dir, "codex_colon", "ground_truth.csv"))
instance_mask_path = segmentation_naming_convention(fovs[0])
instance_mask = imread(instance_mask_path)
instance_mask = instance_mask[slices[fovs[0]][0], slices[fovs[0]][1]]
df_tmp = df_gold[df_gold.fov == fovs[0]]
p_nuclei = os.path.join(img_dir, fovs[0], "DRAQ5.ome.tif")
nuclei_img = imread(p_nuclei).astype(np.float32)
nuclei_img = nuclei_img[slices[fovs[0]][0], slices[fovs[0]][1]]
nuclei_img /= np.quantile(nuclei_img, 0.99)

imgs = []
gt_imgs = []
for c in ["Cytokeratin", "CD45", "CD163", "CD68"]:
    fpath = os.path.join(p, c)
    # silver_standard = json.load(open(os.path.join(p, 'annotations.json')))
    # gold_standard = json.load(open(os.path.join(p, 'corrected_annotation.json')))
    image = imread(fpath + ".ome.tif")
    image = image[slices[fovs[0]][0], slices[fovs[0]][1]]
    imgs.append(image)
    df_tmp.labels[df_tmp[c] == 1]
    out_img = np.zeros_like(instance_mask, dtype=np.float32)
    idx = np.isin(instance_mask, df_tmp.labels[df_tmp[c] == 1].values)
    out_img[idx] = 1
    idx = np.isin(instance_mask, df_tmp.labels[df_tmp[c] != 1].values)
    out_img[idx] = 0.25
    gt_imgs.append(out_img)

In [None]:
# make overlay plots
rgb_to_cmyk = np.array([[0.0, 1.0, 1.0],
                         [1.0, 0.0, 1.0],
                         [1.0, 1.0, 0.0]])
cmyk_from_rgb = np.linalg.inv(rgb_to_cmyk)

def rgb_to_cmyk(rgb):
    return np.dot(rgb, cmyk_from_rgb)

def cmyk_to_rgb(cmyk):
    return np.dot(cmyk, rgb_to_cmyk)

composite = np.stack([imgs[1], imgs[0], imgs[2]], -1).astype(np.float32)
composite /= np.quantile(composite, 0.99, axis=(0,1))
composite = rgb_to_cmyk(composite)
fig, ax =  plt.subplots(1,1,figsize=(10,10))
ax.imshow(nuclei_img, cmap="gray", vmin=0, vmax=.75, interpolation="none")
ax.imshow(composite, vmin=0, vmax=1.0, interpolation="none", alpha=0.7)
ax.axis("off")
plt.savefig(os.path.join(out_dir, "codex_colon_composite.svg"), format='svg', bbox_inches='tight', dpi=300, transparent=True)
plt.show()
for i,c in enumerate(["Cytokeratin", "CD45", "CD163", "CD68"]):
    fig, ax =  plt.subplots(1,1,figsize=(10,10))
    ax.imshow(np.squeeze(gt_imgs[i]), cmap="Greys_r")
    ax.axis("off")
    plt.savefig(os.path.join(out_dir, "codex_colon_{c}.svg").format(c=c), format='svg', bbox_inches='tight', dpi=300, transparent=True)
    plt.show()

#### Figure 2 i

In [None]:
# load ecad, cd45, CD163 or CD 68
fovs = ["TONIC_TMA10_R5C4"]

img_dir = os.path.join(base_dir, "mibi_breast", "image_data", "samples")


df_gold = pd.read_csv(os.path.join(base_dir, "mibi_breast", "ground_truth.csv"))
instance_mask = np.squeeze(imread(os.path.join(base_dir, "mibi_breast", "segmentation_data", "deepcell_output", "TONIC_TMA10_R5C4_feature_0.tif")))
df_tmp = df_gold[df_gold.fov == fovs[0]]
p_nuclei = os.path.join(img_dir, fovs[0], "H3K9ac.tiff")
nuclei_img = imread(p_nuclei).astype(np.float32)
nuclei_img /= np.quantile(nuclei_img, 0.99)
nuclei_img = imread(p_nuclei)

imgs = []
gt_imgs = []
for c in ["ECAD", "CD45", "CD163"]:
    image = np.squeeze(imread(os.path.join(base_dir, "mibi_breast", "image_data", "samples", "TONIC_TMA10_R5C4", f"{c}.tiff")))
    imgs.append(image)
    df_tmp.labels[df_tmp[c] == 1]
    out_img = np.zeros_like(instance_mask, dtype=np.float32)
    idx = np.isin(instance_mask, df_tmp.labels[df_tmp[c] == 1].values)
    out_img[idx] = 1
    idx = np.isin(instance_mask, df_tmp.labels[df_tmp[c] != 1].values)
    out_img[idx] = 0.25
    gt_imgs.append(out_img)

In [None]:
# make overlay plots
rgb_to_cmyk = np.array([[0.0, 1.0, 1.0],
                         [1.0, 0.0, 1.0],
                         [1.0, 1.0, 0.0]])
cmyk_from_rgb = np.linalg.inv(rgb_to_cmyk)

def rgb_to_cmyk(rgb):
    return np.dot(rgb, cmyk_from_rgb)

def cmyk_to_rgb(cmyk):
    return np.dot(cmyk, rgb_to_cmyk)

composite = np.stack([imgs[1], imgs[0], imgs[2]], -1)
composite /= np.quantile(composite, 0.99, axis=(0,1))
composite = rgb_to_cmyk(composite)
fig, ax =  plt.subplots(1,1,figsize=(10,10))
ax.imshow(nuclei_img, cmap="gray", vmin=0, vmax=.08, interpolation="none")
ax.imshow(composite, vmin=0, vmax=1.0, interpolation="none", alpha=0.7)
ax.axis("off")
plt.savefig(os.path.join(out_dir, "mibi_breast_composite.svg"), format='svg', bbox_inches='tight', dpi=300, transparent=True)
plt.show()
for i,c in enumerate(["CK17", "CD45", "CD163"]):
    fig, ax =  plt.subplots(1,1,figsize=(10,10))
    ax.imshow(np.squeeze(gt_imgs[i]), cmap="Greys_r")
    ax.axis("off")
    plt.savefig(os.path.join(out_dir, "mibi_breast_{c}.svg").format(c=c), format='svg', bbox_inches='tight', dpi=300, transparent=True)
    plt.show()
