In [None]:
import matplotlib.pyplot as plt
from copy import deepcopy
import seaborn as sns
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from imageio import imread
from skimage.measure import regionprops_table
import os
sns.set_style("ticks")

In [None]:
all_data_df = pd.read_csv(os.path.join("data", "gt_pred_ie_consolidated.csv"))
base_dir = os.path.join("data", "codex_colon")

def segmentation_naming_convention(fov_path):
    fname = os.path.basename(fov_path)
    fov, reg = fname.split("_")[:2]
    fov_path = os.path.join(base_dir, "masks", fov)
    images = os.listdir(fov_path)
    labels = [img for img in images if "_labeled" in img]
    labels = [img for img in labels if reg in img]
    label_fname = labels[0]    
    return os.path.join(os.path.normpath(fov_path), label_fname)

fovs = ['B010A_reg003_X01_Y01_Z01', 'B011B_reg001_X01_Y01_Z01', 'B011B_reg003_X01_Y01_Z01']
channels = ['aDefensin5', 'aSMA', 'BCL2', 'CD117', 'CD11c', 'CD138', 'CD15',
       'CD16', 'CD161', 'CD163', 'CD19', 'CD206', 'CD21', 'CD3', 'CD31',
       'CD34', 'CD36', 'CD38', 'CD4', 'CD45', 'CD56', 'CD57', 'CD66',
       'CD68', 'CD7', 'CD8', 'CD90', 'CHGA', 'Cytokeratin', 'HLADR',
       'MUC1', 'MUC2', 'Podoplanin', 'SOX9', 'Synaptophysin', 'Vimentin']

out = {}
for fov in fovs:
    out[fov] = {}
    path = os.path.join(base_dir, "raw_structured", fov)
    instance_seg_path = segmentation_naming_convention(path)
    instance_seg = np.squeeze(imread(instance_seg_path)).astype(np.uint16)
    for channel in tqdm(channels):
        c_path = os.path.join(path, f"{channel}.ome.tif")
        c_img = np.squeeze(imread(c_path))
        props = regionprops_table(instance_seg, intensity_image=c_img, properties=["label", "mean_intensity"])
        out[fov][channel] = props

In [None]:

df_list = []
for fov in out.keys():
    for chan in out[fov].keys():
        tmp_df = pd.DataFrame(out[fov][chan])
        tmp_df["channel"] = chan
        tmp_df["fov"] = fov
        df_list.append(tmp_df)
df_out = pd.concat(df_list)
normy = df_out.groupby("channel")["mean_intensity"].quantile(0.999).to_dict()
df_out["norm_intensity"] = df_out.apply(lambda x: x["mean_intensity"]/normy[x["channel"]], axis=1)

fovs = all_data_df[all_data_df["dataset"] == "codex_colon"].fov.values
channels = all_data_df[all_data_df["dataset"] == "codex_colon"].channel.values
labels = all_data_df[all_data_df["dataset"] == "codex_colon"].labels.values

tmp_dataframe = pd.DataFrame({"fov": fovs, "channel": channels, "label": labels})
tmp_dataframe = tmp_dataframe.join(df_out.set_index(["fov", "channel", "label"]), on=["fov", "channel", "label"])
tmp_dataframe

In [None]:
all_data_df = pd.read_csv(os.path.join("data", "gt_pred_ie_consolidated.csv"))

all_data_df = all_data_df[all_data_df["gt_proofread"] < 2]
gt = deepcopy(all_data_df["gt_proofread"])
channels = deepcopy(all_data_df["channel"])
dset = deepcopy(all_data_df["dataset"])
label = deepcopy(all_data_df["labels"])
fov = deepcopy(all_data_df["fov"])

all_data_df["ie"] = all_data_df.groupby(["dataset", "channel"])["ie"].transform(lambda x: x/ x.quantile(0.999))
all_data_df["ie"] = all_data_df.ie.clip(0)

all_data_df["gt"] = gt
all_data_df["channel"] = channels
all_data_df["dataset"] = dset
all_data_df["labels"] = label
all_data_df["fov"] = fov
all_data_df["nimbus"] = all_data_df["nimbus"].clip(0, 1)
all_data_df["gt"] = all_data_df["gt"].astype(int)
all_data_df["gt"] = all_data_df["gt"].apply(lambda x: {0: "negative", 1: "positive"}[int(np.round(x))])
all_data_df.rename(columns={"gt": "Groundtruth", "nimbus": "Nimbus Confidence Scores", "ie": "Integrated Expression"}, inplace=True)

#### Figure 5 a

In [None]:
sns.set(font_scale=1.2)

out_dir = "figures/figure_5"
os.makedirs(out_dir, exist_ok=True)
fig_name = "histogram_kde_pred_vs_ie_all_data.svg" 
sns.set_style("ticks")

palette = sns.color_palette("bright")[1:]

subset_df = all_data_df.sample(400000, random_state=1, replace=False)
df_subset_pos = subset_df[subset_df["Groundtruth"] == "positive"]
df_subset_neg = subset_df[subset_df["Groundtruth"] == "negative"]

fig, ax = plt.subplots(2, 1, sharex=False, figsize=(10,8))
g1 = sns.kdeplot(x='Nimbus Confidence Scores', data=df_subset_neg, legend=False, palette=palette, ax=ax[0], linewidth=1.5,
            hue="Groundtruth", hue_order=["negative", "positive"], fill=True, log_scale=(False, False), common_norm=True)
ax2 = ax[0].twinx()
g1 = sns.kdeplot(x='Nimbus Confidence Scores', data=df_subset_pos, legend=True, palette=palette, ax=ax2, linewidth=1.5,
            hue="Groundtruth", hue_order=["negative", "positive"], fill=True, log_scale=(False, False), common_norm=True)
ax[0].set(xlim=(-0.1, 1.1), ylabel='density (negative)', ylim=(0, 38))
ax2.set(ylabel='density (positive)', ylim=(0, 10))


g2 = sns.kdeplot(x='Integrated Expression', data=df_subset_neg, legend=False, palette=palette, ax=ax[1], linewidth=1.5,
            hue="Groundtruth", hue_order=["negative", "positive"], fill=True, log_scale=(False, False), common_norm=True)
ax2_2 = ax[1].twinx()
g2 = sns.kdeplot(x='Integrated Expression', data=df_subset_pos, legend=False, palette=palette, ax=ax2_2, linewidth=1.5,
            hue="Groundtruth", hue_order=["negative", "positive"], fill=True, log_scale=(False, False), common_norm=True)
ax[1].set(xlim=(-0.1, 1.1), ylabel='density (negative)', ylim=(0, 15))
ax2_2.set(ylabel='density (positive)', ylim=(0, 10))

fig.suptitle("Nimbus confidence scores vs. integrated expression on all datasets")
# sns.despine()
sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':(0,0,0,0)})
# set figure size 
fig.set_size_inches(10, 12)

plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()

#### Figure 5 b

In [None]:
sns.set(font_scale=1.2)

marker = "Cytokeratin"
dset = "codex_colon"
fig_name = "histogram_{dset}_{marker}_nimbus_vs_ie.svg".format(dset=dset, marker=marker)
sns.set_style("ticks")

palette = sns.color_palette("bright")[1:]


subset_df = all_data_df[all_data_df["channel"] == marker]
subset_df = subset_df[subset_df["dataset"] == dset]

df_subset_pos = subset_df[subset_df["Groundtruth"] == "positive"]
df_subset_neg = subset_df[subset_df["Groundtruth"] == "negative"]

fig, ax = plt.subplots(2, 1, sharex=False, figsize=(10,8))
g1 = sns.kdeplot(x='Nimbus Confidence Scores', data=df_subset_neg, legend=False, palette=palette, ax=ax[0], linewidth=1.5,
            hue="Groundtruth", hue_order=["negative", "positive"], fill=True, log_scale=(False, False), common_norm=True)
ax2 = ax[0].twinx()
g1 = sns.kdeplot(x='Nimbus Confidence Scores', data=df_subset_pos, legend=True, palette=palette, ax=ax2, linewidth=1.5,
            hue="Groundtruth", hue_order=["negative", "positive"], fill=True, log_scale=(False, False), common_norm=True)
ax[0].set(xlim=(-0.1, 1.1), ylabel='density (negative)', ylim=(0,26))
# ax[0].set(xlim=(-0.1, 1.1), ylabel='density (negative)', ylim=(0,20))

ax2.set(ylabel='density (positive)', ylim=(0, 10))


g2 = sns.kdeplot(x='Integrated Expression', data=df_subset_neg, legend=False, palette=palette, ax=ax[1], linewidth=1.5,
            hue="Groundtruth", hue_order=["negative", "positive"], fill=True, log_scale=(False, False), common_norm=True)
ax2_2 = ax[1].twinx()
g2 = sns.kdeplot(x='Integrated Expression', data=df_subset_pos, legend=False, palette=palette, ax=ax2_2, linewidth=1.5,
            hue="Groundtruth", hue_order=["negative", "positive"], fill=True, log_scale=(False, False), common_norm=True)
ax[1].set(xlim=(-0.1, 1.1), ylabel='density (negative)', ylim=(0, 50))
# ax[1].set(xlim=(-0.1, 1.1), ylabel='density (negative)', ylim=(0, 80))
ax2_2.set(ylabel='density (positive)', ylim=(0, 10))

fig.suptitle("Nimbus confidence scores vs. integrated expression on {dset} {marker}".format(dset=dset, marker=marker))
# sns.despine()
sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':(0,0,0,0)})
fig.set_size_inches(10, 12)

plt.savefig(os.path.join(out_dir, fig_name), format='svg')
plt.show()

####  Figure 5 c 

In [None]:
from imageio import imread
from matplotlib.colors import LinearSegmentedColormap
from skimage.segmentation import find_boundaries
import os
import matplotlib.pyplot as plt
import numpy as np
from copy import deepcopy
from tqdm.notebook import tqdm

base_dir = os.path.join("data", "codex_colon")

def segmentation_naming_convention(fov_path):
    fname = os.path.basename(fov_path)
    fov, reg = fname.split("_")[:2]
    fov_path = os.path.join(base_dir, "masks", fov)
    images = os.listdir(fov_path)
    labels = [img for img in images if "_labeled" in img]
    labels = [img for img in labels if reg in img]
    label_fname = labels[0]    
    return os.path.join(os.path.normpath(fov_path), label_fname)

def assign_values(instance_seg, df, key="ie"):
    """Assigns values to the instance segmentation based on the cell table
    Args:
        instance_seg (np.array): instance segmentation
        df (pd.DataFrame): cell table
    Returns:
        instance_seg (np.array): instance segmentation with values assigned
    """
    out = np.zeros_like(instance_seg).astype(np.float32)
    for _, row in tqdm(df.iterrows()):
        out[instance_seg == row["labels"]] = row[key]
    return out

subset_df = all_data_df[
    (all_data_df["channel"] == "Cytokeratin") & (all_data_df["dataset"] == "codex_colon") & (all_data_df["fov"] == "B011B_reg001_X01_Y01_Z01")
]

path = os.path.join(base_dir, "raw_structured", "B011B_reg001_X01_Y01_Z01")
instance_seg_path = segmentation_naming_convention(path)
instance_seg = np.squeeze(imread(instance_seg_path)).astype(np.uint16)

slices = {
    "B010A_reg003_X01_Y01_Z01": [slice(0,2048), slice(6000,8048)],
    "B011B_reg003_X01_Y01_Z01": [slice(5000,7048), slice(-2049,-1)],
    "B011B_reg001_X01_Y01_Z01": [slice(2000,4048), slice(4000,6048)],
}
sl = slices["B011B_reg001_X01_Y01_Z01"]
instance_seg = instance_seg[sl[0], sl[1]]

nimbus = assign_values(instance_seg, subset_df, "Nimbus Confidence Scores")
ie = assign_values(instance_seg, subset_df, "Integrated Expression")
nuclei = imread(os.path.join(path, "DRAQ5.ome.tif")).astype(np.float32)
marker = imread(os.path.join(path, "Cytokeratin.ome.tif")).astype(np.float32)
marker /= np.quantile(marker, 0.999)
nuclei /= np.quantile(nuclei, 0.999)

marker = marker[sl[0], sl[1]]
nuclei = nuclei[sl[0], sl[1]]

rgb_to_cmyk = np.array([[0.0, 1.0, 1.0],
                         [1.0, 0.0, 1.0],
                         [1.0, 1.0, 0.0]])
cmyk_from_rgb = np.linalg.inv(rgb_to_cmyk)

norm=plt.Normalize(-2,2)
cmap = LinearSegmentedColormap.from_list("", ["black","cyan"])


boundaries = find_boundaries(instance_seg) > 0
green = LinearSegmentedColormap.from_list("", ["black","green"])
s = slice(480, 620, None), slice(1320, 1460, None)
highlight_boundaries = find_boundaries(np.squeeze(instance_seg).astype(np.uint16)[s] == 24793) > 0

nuclei2 = deepcopy(nuclei[s])
nuclei2 /= nuclei2.max() * 2

marker2 = np.clip(deepcopy(marker[s]) + nuclei2, 0, 1)

marker3 = np.stack([nuclei2, marker2, nuclei2], -1)
marker3 = np.dot(marker3, rgb_to_cmyk)
marker3[boundaries[s]] = 0.5
marker3[highlight_boundaries,:] = (0, 1, 0)

ie2 = deepcopy(ie[s]) * 4
ie2 = np.stack([ie2]*3, -1)
ie2[boundaries[s]] = 0.25
ie2[highlight_boundaries,:] = (0, 1, 0)

nimbus2 = deepcopy(nimbus[s])
nimbus2 = np.stack([nimbus2]*3, -1)
nimbus2[boundaries[s]] = 0.25
nimbus2[highlight_boundaries,:] = (0, 1, 0)

nuclei2 = deepcopy(nuclei[s])
nuclei2 /= 20
nuclei2[boundaries[s]] = 0.25
nuclei3 = np.stack([nuclei2]*3, -1)
nuclei3[highlight_boundaries,:] = (0, 1, 0)

fig, ax = plt.subplots(1,3, figsize=(15,5))
ax[0].imshow(marker3, vmin=0, vmax=1, interpolation="none")
ax[1].imshow(ie2, vmin=0, vmax=1, interpolation="none")
ax[2].imshow(nimbus2, vmin=0, vmax=1, interpolation="none")
for a in ax:
    a.axis("off")
out_dir = "figures/figure_5"
plt.savefig(os.path.join(out_dir, "CK_fn_to_tp.svg"), format='svg')
plt.show()

In [None]:
from imageio import imread
from matplotlib.colors import LinearSegmentedColormap
from skimage.segmentation import find_boundaries
import os
import matplotlib.pyplot as plt
import numpy as np
from copy import deepcopy
from tqdm.notebook import tqdm

base_dir = os.path.join("data", "codex_colon")

def segmentation_naming_convention(fov_path):
    fname = os.path.basename(fov_path)
    fov, reg = fname.split("_")[:2]
    fov_path = os.path.join(base_dir, "masks", fov)
    images = os.listdir(fov_path)
    labels = [img for img in images if "_labeled" in img]
    labels = [img for img in labels if reg in img]
    label_fname = labels[0]    
    return os.path.join(os.path.normpath(fov_path), label_fname)

def assign_values(instance_seg, df, key="ie"):
    """Assigns values to the instance segmentation based on the cell table
    Args:
        instance_seg (np.array): instance segmentation
        df (pd.DataFrame): cell table
    Returns:
        instance_seg (np.array): instance segmentation with values assigned
    """
    out = np.zeros_like(instance_seg).astype(np.float32)
    for _, row in tqdm(df.iterrows()):
        out[instance_seg == row["labels"]] = row[key]
    return out

subset_df = all_data_df[
    (all_data_df["channel"] == "Cytokeratin") & (all_data_df["dataset"] == "codex_colon") & (all_data_df["fov"] == "B011B_reg003_X01_Y01_Z01")
]

path = os.path.join(base_dir, "raw_structured", "B011B_reg003_X01_Y01_Z01")
instance_seg_path = segmentation_naming_convention(path)
instance_seg = np.squeeze(imread(instance_seg_path)).astype(np.uint16)

slices = {
    "B010A_reg003_X01_Y01_Z01": [slice(0,2048), slice(6000,8048)],
    "B011B_reg003_X01_Y01_Z01": [slice(5000,7048), slice(-2049,-1)],
    "B011B_reg001_X01_Y01_Z01": [slice(2000,4048), slice(4000,6048)],
}
sl = slices["B011B_reg003_X01_Y01_Z01"]
instance_seg = instance_seg[sl[0], sl[1]]

nimbus = assign_values(instance_seg, subset_df, "Nimbus Confidence Scores")
ie = assign_values(instance_seg, subset_df, "Integrated Expression")
nuclei = imread(os.path.join(path, "DRAQ5.ome.tif")).astype(np.float32)
marker = imread(os.path.join(path, "Cytokeratin.ome.tif")).astype(np.float32)
marker /= np.quantile(marker, 0.999)
nuclei /= np.quantile(nuclei, 0.999)

marker = marker[sl[0], sl[1]]
nuclei = nuclei[sl[0], sl[1]]

rgb_to_cmyk = np.array([[0.0, 1.0, 1.0],
                         [1.0, 0.0, 1.0],
                         [1.0, 1.0, 0.0]])
cmyk_from_rgb = np.linalg.inv(rgb_to_cmyk)

norm=plt.Normalize(-2,2)
cmap = LinearSegmentedColormap.from_list("", ["black","cyan"])


boundaries = find_boundaries(instance_seg) > 0
green = LinearSegmentedColormap.from_list("", ["black","green"])
s = slice(455, 595, None), slice(1540, 1680, None)
highlight_boundaries = find_boundaries(np.squeeze(instance_seg).astype(np.uint16)[s] == 17388) > 0

nuclei2 = deepcopy(nuclei[s])
nuclei2 /= nuclei2.max() * 2

marker2 = np.clip(deepcopy(marker[s]) + nuclei2, 0, 1)

marker3 = np.stack([nuclei2, marker2, nuclei2], -1)
marker3 = np.dot(marker3, rgb_to_cmyk)
marker3[boundaries[s]] = 0.5
marker3[highlight_boundaries,:] = (1, 0, 0)

ie2 = deepcopy(ie[s]) * 4
ie2 = np.stack([ie2]*3, -1)
ie2[boundaries[s]] = 0.25
ie2[highlight_boundaries,:] = (1, 0, 0)

nimbus2 = deepcopy(nimbus[s])
nimbus2 = np.stack([nimbus2]*3, -1)
nimbus2[boundaries[s]] = 0.25
nimbus2[highlight_boundaries,:] = (1, 0, 0)

nuclei2 = deepcopy(nuclei[s])
nuclei2 /= 20
nuclei2[boundaries[s]] = 0.25
nuclei3 = np.stack([nuclei2]*3, -1)
nuclei3[highlight_boundaries,:] = (1, 0, 0)

fig, ax = plt.subplots(1,3, figsize=(15,5))
ax[0].imshow(marker3, vmin=0, vmax=1, interpolation="none")
ax[1].imshow(ie2, vmin=0, vmax=1, interpolation="none")
ax[2].imshow(nimbus2, vmin=0, vmax=1, interpolation="none")
for a in ax:
    a.axis("off")
out_dir = "figures/figure_5"
plt.savefig(os.path.join(out_dir, "CK_fn_to_tp.svg"), format='svg')
plt.show()

#### Figure 5 d

In [None]:
plot_df_tnbc = pd.read_csv("data/spain_tnbc/tnbc_heatmap_nimbus.csv", index_col=0)

In [None]:
out_dir = "figures/figure_5"
ax = sns.clustermap(
    plot_df_tnbc,
    cmap="vlag",
    figsize=(16, 12.5),
    row_cluster=False,
    col_cluster=False,
    z_score=1,
    linewidths=1,
    linecolor="black",
    clip_on=False,
    # vmin=0,
    # vmax=1
)
# change fontsize of y-axis labels
ax.ax_heatmap.yaxis.set_tick_params(labelsize=28)
ax.ax_heatmap.xaxis.set_tick_params(labelsize=28)
# plt.savefig(os.path.join(out_dir, "tnbc_heatmap.svg"), format='svg')

#### Figure 5 e

In [None]:
dcis_plot_df = pd.read_csv("data/DCIS/dcis_heatmap_nimbus.csv", index_col=0)

In [None]:
ax = sns.clustermap(
    dcis_plot_df,
    cmap="vlag",
    figsize=(16, 11),
    row_cluster=False,
    col_cluster=False,
    z_score=1,
    linewidths=1,
    linecolor="black",
    clip_on=False,
    # vmin=0,
    # vmax=1
)
ax.ax_heatmap.yaxis.set_tick_params(labelsize=28)
ax.ax_heatmap.xaxis.set_tick_params(labelsize=28)
plt.savefig(os.path.join(out_dir, "dcis_heatmap.svg"), format='svg')