In [None]:
# import required packages
import sys
sys.path.append("../src")
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.segmentation import find_boundaries
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import pandas as pd
from tqdm.notebook import tqdm
from copy import copy
import imageio as io

## 1. Load and transform data

In [None]:
base_dir = "data"

merged_df = pd.read_csv(os.path.join(base_dir, "gt_pred_ie_consolidated.csv"))


# make visualization of cell wise f1 score
merged_df["tp"] = np.logical_and(merged_df["gt_proofread"] == 1, merged_df["nimbus"] > 0.5).astype(int)
merged_df["fp"] = np.logical_and(merged_df["gt_proofread"] == 0, merged_df["nimbus"] > 0.5).astype(int)
merged_df["fn"] = np.logical_and(merged_df["gt_proofread"] == 1, merged_df["nimbus"] <= 0.5).astype(int)
merged_df["tn"] = np.logical_and(merged_df["gt_proofread"] == 0, merged_df["nimbus"] <= 0.5).astype(int)

## Prepare plots

#### Figure 3 a, b, c, d, e

In [None]:
def mibi_breast_naming_conv(fov_path):
    base_dir_ = os.path.join(base_dir, "mibi_breast")
    fov_name = os.path.basename(fov_path)
    deepcell_output_dir = os.path.join(base_dir_, "segmentation_data")
    return os.path.join(
        deepcell_output_dir , "deepcell_output", fov_name + "_feature_0.tif"
    )

def mibi_decidua_naming_conv(fov_path):
    base_dir_ = os.path.join(base_dir, "mibi_decidua")
    fov_name = os.path.basename(fov_path)
    deepcell_output_dir = os.path.join(base_dir_, "segmentation_data")
    return os.path.join(
        deepcell_output_dir, fov_name + "_segmentation_labels.tiff"
    )

def vectra_colon_naming_conv(fname):
    return os.path.join(
        base_dir, "vectra_colon", "segmentation",
        fname + "feature_0.ome.tif"
    )

def vectra_pancreas_naming_conv(fname):
    return os.path.join(
        base_dir, "vectra_pancreas", "segmentation",
        fname + "feature_0.ome.tif"
    )

def codex_colon_naming_conv(fname):
    fov, reg = fname.split("_")[:2]
    fov_path = os.path.join(base_dir, "codex_colon", "masks", fov)
    images = os.listdir(fov_path)
    labels = [img for img in images if "_labeled" in img]
    labels = [img for img in labels if reg in img]
    label_fname = labels[0]    
    return os.path.join(os.path.normpath(fov_path), label_fname)

name_conv_dict = {
    "mibi_breast": mibi_breast_naming_conv,
    "mibi_decidua": mibi_decidua_naming_conv,
    "vectra_colon": vectra_colon_naming_conv,
    "vectra_pancreas": vectra_pancreas_naming_conv,
    "codex_colon": codex_colon_naming_conv
}

slices = {
    "B010A_reg003_X01_Y01_Z01": [slice(0,2048), slice(6000,8048)],
    "B011B_reg003_X01_Y01_Z01": [slice(5000,7048), slice(-2049,-1)],
    "B011B_reg001_X01_Y01_Z01": [slice(2000,4048), slice(4000,6048)],

}
for dset in merged_df.dataset.unique():
    # load segmentation
    dset_df = merged_df[merged_df.dataset == dset]
    for fov in dset_df.fov.unique():
        dset_df_fov = dset_df[dset_df.fov == fov]
        dset_df_fov = dset_df[dset_df.fov == fov]
        dset_df_fov["accuracy"] = (dset_df_fov["gt_proofread"] == (dset_df_fov["nimbus"] > 0.41).astype(int)).astype(int) # threshold determined via f1 score
        dset_df_fov["accuracy"][dset_df_fov["gt_proofread"] == 2] = 2
        share_correct = dset_df_fov[dset_df_fov["accuracy"] != 2].groupby("labels").accuracy.mean()
        f1_score = dset_df_fov[dset_df_fov["gt_proofread"] != 2].groupby("labels").apply(lambda x: (x.tp.sum() * 2) / (x.tp.sum() * 2 + x.fp.sum() + x.fn.sum()+ 1e-8) if x.gt_proofread.sum() != 0 else 1)
        seg_img = io.imread(name_conv_dict[dset](fov))
        seg_img = np.squeeze(seg_img)
        seg_img[find_boundaries(seg_img, mode='inner')] = 0
        if fov in slices.keys():
            seg_img = seg_img[slices[fov][0], slices[fov][1]]
        accuracy_out = np.zeros(seg_img.shape, dtype=np.float32)
        f1_score_out = np.zeros(seg_img.shape, dtype=np.float32)
        for cell_id in tqdm(np.unique(seg_img)[1:]):
            m = seg_img == cell_id
            if cell_id in share_correct.index:
                accuracy_out[m] = share_correct[cell_id]
            else:
                accuracy_out[m] = 1
            #
            if cell_id in f1_score.index:
                f1_score_out[m] = f1_score[cell_id]
            else:
                f1_score_out[m] = 1
        red = np.ones(f1_score_out.shape)
        red[seg_img == 0] = 0

        tmp = np.stack([red, f1_score_out, f1_score_out], -1)
        fig, ax = plt.subplots(figsize=(20,20))
        ax.imshow(tmp, vmin=0, vmax=1)
        ax.axis("off")
        out_dir = os.path.join("figures", "figure_3")
        os.makedirs(out_dir, exist_ok=True)
        plt.savefig(os.path.join(out_dir, f"{dset}_{fov}_f1_score.svg"), bbox_inches="tight", pad_inches=0, dpi=300)
        plt.show()

In [None]:
# make overlay plots
rgb_to_cmyk = np.array([[0.0, 1.0, 1.0],
                         [1.0, 0.0, 1.0],
                         [1.0, 1.0, 0.0]])
cmyk_from_rgb = np.linalg.inv(rgb_to_cmyk)

def rgb_to_cmyk(rgb):
    out = np.dot(rgb, cmyk_from_rgb)
    return out

def cmyk_to_rgb(cmyk):
    return np.dot(cmyk, rgb_to_cmyk)

pred_paths = {
    "mibi_decidua": os.path.join(base_dir, "mibi_decidua", "nimbus_predictions"),
    "codex_colon": os.path.join(base_dir, "codex_colon", "nimbus_predictions"),
    "vectra_colon": os.path.join(base_dir, "vectra_colon", "nimbus_predictions"),
    "vectra_pancreas": os.path.join(base_dir, "vectra_pancreas", "nimbus_predictions"),
    "mibi_breast": os.path.join(base_dir, "mibi_breast", "nimbus_predictions"),
}          
data_paths = {
    "mibi_decidua": os.path.join(base_dir, "mibi_decidua", "image_data"),
    "codex_colon": os.path.join(base_dir, "codex_colon", "raw_structured"),
    "vectra_colon": os.path.join(base_dir, "vectra_colon", "raw_structured"),
    "vectra_pancreas": os.path.join(base_dir, "vectra_pancreas", "raw_structured"),
    "mibi_breast": os.path.join(base_dir, "mibi_breast", "image_data", "samples"),
}
channel_dict = {
    "mibi_decidua": ["CD45", "SMA", "CD31"], # ["CD45", "CK7", "CD68"]
    "codex_colon": ["CD4", "MUC1", "Vimentin"],
    "vectra_colon": ["CD3", "panCK+CK7+CAM5.2", "Foxp3"],
    "vectra_pancreas": ["PD-1", "panCK", "CD8"],
    "mibi_breast": ["CD3", "ECAD", "CD8"],
}
nuclei_channels = {
    "mibi_decidua": ["H3"],
    "codex_colon": ["DRAQ5"],
    "vectra_colon": ["DAPI"],
    "vectra_pancreas": ["DAPI"],
    "mibi_breast": ["H3K27me3", "H3K9ac"],
}
slices = {
    "B010A_reg003_X01_Y01_Z01": [slice(0,2048), slice(6000,8048)],
    "B011B_reg003_X01_Y01_Z01": [slice(5000,7048), slice(-2049,-1)],
    "B011B_reg001_X01_Y01_Z01": [slice(2000,4048), slice(4000,6048)],

}
from matplotlib.colors import ListedColormap
cyan = np.linspace(0, 100, 256)
magenta = np.linspace(0, 100, 256)
yellow = np.linspace(0, 100, 256)
key = np.linspace(0, 100, 256)
cmyk_colormap = np.stack([cyan, magenta, yellow, key], axis=-1) / 256.0
cmyk_cmap = ListedColormap(cmyk_colormap)

for dset in ["mibi_decidua", "codex_colon", "vectra_colon", "vectra_pancreas", "mibi_breast"]:
    for fov in merged_df[merged_df.dataset == dset].fov.unique():
        img = None
        pred = None
        naming_conv = name_conv_dict[dset]
        inst_seg = np.squeeze(io.imread(naming_conv(fov))).astype(np.float32)
        boundaries = find_boundaries(inst_seg, mode='inner')
        inst_seg[boundaries] = 0
        inst_seg = (inst_seg > 0).astype(np.float32)
        for channel in channel_dict[dset]:
            pred_path = os.path.join(pred_paths[dset], fov, f"{channel}.tiff")
            if dset in ["mibi_decidua"]:
                suffix = ".tif"
            elif dset in ["codex_colon", "vectra_colon", "vectra_pancreas"]:
                suffix = ".ome.tif"
            elif dset in ["mibi_breast"]:
                suffix = ".tiff"
            data_path = os.path.join(data_paths[dset], fov, channel + suffix)
            if img is None:
                img = [np.squeeze(io.imread(data_path))]
                pred = [np.squeeze(io.imread(pred_path))]
            else:
                img += [np.squeeze(io.imread(data_path))]
                
                pred_ = np.squeeze(io.imread(pred_path))
                pred += [pred_]
        nuclei_img = []
        for nuclei_chan in nuclei_channels[dset]:
            data_path = os.path.join(data_paths[dset], fov, nuclei_chan + suffix)
            nuclei_img += [np.squeeze(io.imread(data_path))]
        nuclei_img = np.stack(nuclei_img, axis=-1)
        nuclei_img = nuclei_img / np.quantile(nuclei_img, 0.99)
        nuclei_img = nuclei_img.mean(axis=-1)
        
        img = np.stack(img, axis=-1)
        if fov in slices.keys():
            boundaries = boundaries[slices[fov][0], slices[fov][1]]
            inst_seg = inst_seg[slices[fov][0], slices[fov][1]]
            img = img[slices[fov][0], slices[fov][1]]
            nuclei_img = nuclei_img[slices[fov][0], slices[fov][1]]
        pred = np.stack(pred, axis=-1)
        pred[boundaries,:] = 0
        img = img / np.quantile(img, 0.99, axis=(0,1))
        img = np.clip(img, 0, 1)
        pred = pred / 255.0
        pred += np.stack([inst_seg] * 3, axis=-1) * 0.25
        fig, ax = plt.subplots(1,2, figsize=(10,10))
        img = rgb_to_cmyk(img)
        pred = rgb_to_cmyk(pred)
        ax[0].imshow(nuclei_img, cmap="gray", vmin=0, vmax=1.0, interpolation="none")
        ax[0].imshow(img, vmin=0, vmax=1.0, interpolation="none", alpha=0.8)
        ax[0].axis("off")
        ax[1].imshow(pred, vmin=0, vmax=1, interpolation="none")
        ax[1].axis("off")
        channel_names = "".join(channel_dict[dset])
        print(f"{dset}_{fov}_{channel_names}_overlay.svg")
        out_dir = os.path.join("figures", "figure_3")
        os.makedirs(out_dir, exist_ok=True)
        plt.savefig(os.path.join(out_dir, f"{dset}_{fov}_{channel_names}_overlay.svg"), transparent=True, dpi=300)
        plt.show()