The goal of this notebook is to reassign segmentation labels based on the objects that they are contained in. 
This will mean that the segmentation label id of the cell will match that of the nucleus that it is contained in.

In [1]:
import argparse
import os
import pathlib
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage
import tifffile

cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd
else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break
sys.path.append(str(root_dir / "utils"))
from arg_parsing_utils import check_for_missing_args, parse_args
from file_reading import read_zstack_image
from notebook_init_utils import bandicoot_check, init_notebook

root_dir, in_notebook = init_notebook()

image_base_dir = bandicoot_check(
    pathlib.Path(os.path.expanduser("~/mnt/bandicoot")).resolve(), root_dir
)

In [2]:
if not in_notebook:
    args = parse_args()
    well_fov = args["well_fov"]
    patient = args["patient"]
    mask_subparent_name = args["mask_subparent_name"]
    check_for_missing_args(
        well_fov=well_fov,
        patient=patient,
        mask_subparent_name=mask_subparent_name,
    )
else:
    print("Running in a notebook")

    patient = "NF0037_T1-Z-1"
    well_fov = "F4-2"
    mask_subparent_name = "segmentation_masks"

mask_dir = pathlib.Path(
    f"{image_base_dir}/data/{patient}/{mask_subparent_name}/{well_fov}"
).resolve()

Running in a notebook


In [3]:
def remove_edge_cases(
    mask: np.ndarray,
    border: int = 10,
) -> np.ndarray:
    """
    Remove masks that are image edge cases
    In this case - the edge literally means the edge of the image
    This is useful to remove masks that are not fully contained within the image

    Parameters
    ----------
    mask : np.ndarray
        The mask to process, should be a 3D numpy array
    border : int, optional
        The number of pixels in width to create border to scan for edge cased, by default 10

    Returns
    -------
    np.ndarray
        The mask with edge cases removed
    """

    edge_pixels = np.concatenate(
        [
            # all of z, last n rows (y), all columns (x) - bottom edge
            mask[:, -border:, :].flatten(),
            # all of z, first n rows (y), all columns (x) - top edge
            mask[:, 0:border, :].flatten(),
            # all of z, all rows (y), first n columns (x) - left edge
            mask[:, :, 0:border:].flatten(),
            # all of z, all rows (y), last n columns (x) - right edge
            mask[:, :, -border:].flatten(),
            # each are the edges stacked for the whole volume -> no need to specify every z slice or 3D edge
        ]
    )
    # get unique edge pixel values
    edge_pixels = np.unique(edge_pixels[edge_pixels > 0])

    for edge_pixel_case in edge_pixels:
        # make the edge cases equal to zero
        mask[mask == edge_pixel_case] = 0

    # return the mask with edge cases removed
    return mask


def centroid_within_bbox_detection(
    centroid: tuple,
    bbox: tuple,
) -> bool:
    """
    Check if the centroid is within the bbox

    Parameters
    ----------
    centroid : tuple
        Centroid of the object in the order of (z, y, x)
        Order of the centroid is important
    bbox : tuple
        Where the bbox is in the order of (z_min, y_min, x_min, z_max, y_max, x_max)
        Order of the bbox is important

    Returns
    -------
    bool
        True if the centroid is within the bbox, False otherwise
    """
    z_min, y_min, x_min, z_max, y_max, x_max = bbox
    z, y, x = centroid
    # check if the centroid is within the bbox
    if (
        z >= z_min
        and z <= z_max
        and y >= y_min
        and y <= y_max
        and x >= x_min
        and x <= x_max
    ):
        return True
    else:
        return False


def check_if_centroid_within_mask(
    centroid: tuple, mask: np.ndarray, label: int
) -> bool:
    """
    Check if the centroid is within the mask

    Parameters
    ----------
    centroid : tuple
        Centroid of the object in the order of (z, y, x)
        Order of the centroid is important
    mask : np.ndarray
        The mask to check against

    Returns
    -------
    bool
        True if the centroid is within the mask, False otherwise
    """
    z, y, x = centroid
    z = np.round(z).astype(int)
    y = np.round(y).astype(int)
    x = np.round(x).astype(int)
    # check if the centroid is within the segmentation mask
    cell_label = mask[z, y, x]
    if cell_label > 0 and cell_label == label:
        return True
    else:
        return False


def mask_label_reassignment(
    mask_df: pd.DataFrame,
    mask_input: np.ndarray,
) -> np.ndarray:
    """
    Reassign the labels of the mask based on the mask_df

    Parameters
    ----------
    mask_df : pd.DataFrame
        DataFrame containing the labels and centroids of the mask
    mask_input : np.ndarray
        The input mask to reassign the labels to

    Returns
    -------
    np.ndarray
        The mask with reassigned labels
    """
    for i, row in mask_df.iterrows():
        if row["label"] == row["new_label"]:
            # if the label is already the new label, skip
            continue
        mask_input[mask_input == row["label"]] = row["new_label"]
    return mask_input

In [4]:
# get the organoid masks
cell_mask_path = mask_dir / "cell_masks_watershed.tiff"
nuclei_mask_path = mask_dir / "nuclei_masks_reconstructed_corrected.tiff"
nuclei_mask_output_path = mask_dir / "nuclei_masks_reassigned.tiff"

cell_mask = read_zstack_image(cell_mask_path)
nuclei_mask = read_zstack_image(nuclei_mask_path)

In [5]:
# get the centroid and bbox of the cell mask
cell_df = pd.DataFrame.from_dict(
    skimage.measure.regionprops_table(
        cell_mask,
        properties=["centroid", "bbox"],
    )
)
cell_df["compartment"] = "cell"
cell_df["label"] = cell_mask[
    cell_df["centroid-0"].astype(int),
    cell_df["centroid-1"].astype(int),
    cell_df["centroid-2"].astype(int),
]
# remove all 0 labels
cell_df = cell_df[cell_df["label"] > 0].reset_index(drop=True)

In [6]:
nuclei_df = pd.DataFrame.from_dict(
    skimage.measure.regionprops_table(
        nuclei_mask,
        properties=["centroid", "bbox"],
    )
)
nuclei_df["compartment"] = "nuclei"
nuclei_df["label"] = nuclei_mask[
    nuclei_df["centroid-0"].astype(int),
    nuclei_df["centroid-1"].astype(int),
    nuclei_df["centroid-2"].astype(int),
]
nuclei_df = nuclei_df[nuclei_df["label"] > 0].reset_index(drop=True)
nuclei_df["new_label"] = nuclei_df["label"].copy()

In [7]:
nuclei_df.head()

Unnamed: 0,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,bbox-2,bbox-3,bbox-4,bbox-5,compartment,label,new_label
0,36.408527,778.44416,924.675087,32,719,883,42,840,966,nuclei,1,1
1,37.077536,1363.62838,1199.333622,32,1323,1154,43,1413,1244,nuclei,2,2
2,39.570794,637.988044,957.724777,34,600,919,46,678,994,nuclei,3,3
3,38.386071,1015.394674,781.534522,35,946,738,43,1088,813,nuclei,5,5
4,38.658337,436.786566,756.43665,36,400,701,43,483,825,nuclei,6,6


In [8]:
cell_df.head()

Unnamed: 0,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,bbox-2,bbox-3,bbox-4,bbox-5,compartment,label
0,44.980775,778.329356,928.796885,29,700,870,63,855,1006,cell,1799
1,39.176509,1367.675748,1206.566183,31,1322,1145,56,1436,1279,cell,3855
2,43.991733,640.26129,966.165076,31,589,886,63,702,1030,cell,5911
3,47.02939,676.812473,1039.618613,19,610,970,63,740,1123,cell,7967
4,39.15857,1040.784084,784.519582,31,938,732,63,1153,829,cell,10023


In [9]:
print(f"Number of nuclei: {len(nuclei_df)}\nNumber of cells: {len(cell_df)}\n")

Number of nuclei: 31
Number of cells: 31



In [10]:
# if a centroid of the nuclei is inside the cell mask,
# then make the cell retain the label of the nuclei
for i, row in nuclei_df.iterrows():
    for j, row2 in cell_df.iterrows():
        nuc_contained_in_cell_bool = check_if_centroid_within_mask(
            centroid=(
                row["centroid-0"],
                row["centroid-1"],
                row["centroid-2"],
            ),
            mask=cell_mask,
            label=row2["label"],
        )
        if nuc_contained_in_cell_bool:
            # if the centroid of the nuclei is within the cell mask,
            # then make the cell retain the label of the nuclei
            nuclei_df.at[i, "new_label"] = row2["label"]
            break
        else:
            pass

In [11]:
# merge the dataframes
nuclei_and_cell_df = pd.merge(
    nuclei_df,
    cell_df,
    left_on="new_label",
    right_on="label",
    suffixes=("_nuclei", "_cell"),
)
nuclei_and_cell_df.head()

Unnamed: 0,centroid-0_nuclei,centroid-1_nuclei,centroid-2_nuclei,bbox-0_nuclei,bbox-1_nuclei,bbox-2_nuclei,bbox-3_nuclei,bbox-4_nuclei,bbox-5_nuclei,compartment_nuclei,...,centroid-1_cell,centroid-2_cell,bbox-0_cell,bbox-1_cell,bbox-2_cell,bbox-3_cell,bbox-4_cell,bbox-5_cell,compartment_cell,label_cell
0,36.408527,778.44416,924.675087,32,719,883,42,840,966,nuclei,...,778.329356,928.796885,29,700,870,63,855,1006,cell,1799
1,37.077536,1363.62838,1199.333622,32,1323,1154,43,1413,1244,nuclei,...,1367.675748,1206.566183,31,1322,1145,56,1436,1279,cell,3855
2,39.570794,637.988044,957.724777,34,600,919,46,678,994,nuclei,...,640.26129,966.165076,31,589,886,63,702,1030,cell,5911
3,38.386071,1015.394674,781.534522,35,946,738,43,1088,813,nuclei,...,1040.784084,784.519582,31,938,732,63,1153,829,cell,10023
4,38.658337,436.786566,756.43665,36,400,701,43,483,825,nuclei,...,441.962151,773.065334,28,391,676,63,523,845,cell,12079


In [12]:
nuclei_and_cell_df[["label_nuclei", "new_label", "label_cell"]].head()
nuclei_df.head()

Unnamed: 0,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,bbox-2,bbox-3,bbox-4,bbox-5,compartment,label,new_label
0,36.408527,778.44416,924.675087,32,719,883,42,840,966,nuclei,1,1799
1,37.077536,1363.62838,1199.333622,32,1323,1154,43,1413,1244,nuclei,2,3855
2,39.570794,637.988044,957.724777,34,600,919,46,678,994,nuclei,3,5911
3,38.386071,1015.394674,781.534522,35,946,738,43,1088,813,nuclei,5,10023
4,38.658337,436.786566,756.43665,36,400,701,43,483,825,nuclei,6,12079


In [13]:
print(
    f"Number of nuclei: {len(nuclei_df)}\n"
    f"Number of cells: {len(cell_df)}\n"
    f"Number of cells with nuclei: {len(nuclei_and_cell_df)}"
)

Number of nuclei: 31
Number of cells: 31
Number of cells with nuclei: 30


In [14]:
# remove the edge cases
cell_mask = remove_edge_cases(
    mask=cell_mask,
    border=10,
)
nuclei_mask = remove_edge_cases(
    mask=nuclei_mask,
    border=10,
)

In [15]:
# reassign the labels of the cell mask
nuclei_mask = mask_label_reassignment(
    mask_df=nuclei_df,
    mask_input=nuclei_mask,
)
# save the cell mask
tifffile.imwrite(
    nuclei_mask_output_path,
    nuclei_mask,
)