In [2]:
# This is version 3.10 for smFISH spot detection. I have tried using pycharm + other python platform but in the end I feel like using Jupyter notebook is more suitable 
# Especailly for person who enter the Planarian Field might not understand coding/structure and other things too much. 
# This requires a enviornment and setting parameters. 
# https://stackoverflow.com/questions/58645807/change-interpreter-in-jupyter-notebook Please refer to this for setting up python interpreter. 
# This requires setting up a enviornment for 3D detection on stardist and bigfish. 
# Recommend to setup a enviornment for Stardist and then install bigfish/fishquant. 
# Please read instructions from Stardist to do so, and set python interpreter using the stackoverflow instructions. 
# Depend on your Image and settings this whole code need a while to run. I am making it as light as possible so please be patient. 
# I am also using tqdm in all of my customized code to show you process and the expected time in real time. 
# Please contact qingxuguan2020@u.northwestern.edu for any details/updates/help. 

In [3]:
# Here I will put most of the varaibles need for running python 
# File path: This will include: A control channel for checking intensities and other parameters
# If there is no control channel, please change controlImage = False
# counterstainChannelPath for smFISH channel, the signal you want to segement in this case 
# nucleiChannelPath for the nuclei channel locations 
# assuming you are running 3D detection. If not please develop a separate code for this. 
controlImage = False
counterstainControlPath ="/Users/eliasguan/Desktop/EG_0920_Test_wnt1_incision_amputation/Experiment_dataset/control/0h_incision_Image1/633/control_0h_Incision_Image1_633.tif"
counterstainChannelPath = "/Users/eliasguan/Desktop/EG_0920_Test_wnt1_incision_amputation/Experiment_dataset/Experiment/0h_Amputation/Image1/633/0h_Amputation_Image1_633.tif"
nucleiSegmentationPath = "/Users/eliasguan/Desktop/EG_0920_Test_wnt1_incision_amputation/Experiment_dataset/Experiment/0h_Amputation/Image1/565/results/labels"
# Set Parameters for detection. Here minimal distance is the minimal distance between spots. 
# Note this will be consistent for both control and the smFISH channel. 
# Unless specified separately, all these three number tuples are z,y,x in order. 
minimal_distance = (2,2,2)
# Set the Gaussian LoG filter Kernel size. Recommend to start with 1,1.5,1.5 and increase if you need more. 
# I don`t think you need this different from control and experimental image. 
kernel_size = (1,1.5,1.5)
# Set the voxel size. This is determined by the pixel size of your microscope. Please contact microscopt manufactuer and convert resolution to voxel size. 
# unit is nm, please change to nm and note this should be the same for control and your experimental image. 
# I specifically allow this code to run different voxel size for control and experimental image, but for a good experiment you should not do it like that. 
control_voxel_size = (361,75,75)
voxel_size = (361,75,75)
# Set the spot size as your expected spot size 
spot_size = (600, 300, 300)
decomposition_thresh = (0.7,1,5)
# Recommend start with 4 in planarian. You can have more. Recommend turn spotsRadiusDetection = True and run the radius test
# Usually the largest radius/the average radius is what you want. 
min_spots_for_clusters = 4
# Enter the radius for spot for detecting clusters. 
# If need refer to the spotsRadiusDetection to set a good reference
radius_for_spots = 250 
# Here you need to define the spot plotting size You can make it as large as possible. 
plot_spot_size = 4
# Here you add the nuclei projection size 

In [4]:
# Here we get some advanced settings: 
# Spot_radius detections
# automated False, turn to True if you need the code to detect a correct average spotRadius, in pixels for you. 
spotsRadiusDetection = True
# Lets see if you want to save the Spot infomation. I turn it to True by default, but in general you dont need to do that. 
saveSpotInformation = True 
# Lets set this for Plotting Outer Circle. 
# If you want to plot Inner Circle Please set this to False 
plotInnerCircle = False 
# If you need to plot exact spot location, turn this to True. In this case the plot_spot_size will be rendered off since there is no need for plotting spot size. 
# Note this function is still under development. Please, be aware and I dont recommend turn it on. 
plotExactSpot = False
# Adjust the outer layer size here if you need 
exactSpotSize = 2
# Open this if you want to plot each spot by number. I do not recommend turn this on since this will largely increase the image size and does not help with anything. 
# If you need this note you need to have at most 65536 spots. If you have more manually change the dtype in the empty image but I dont think anyone need this much of image. 
plotSpotLabel = False 
labelExpansionSize = 20
nuclei_projection_size = 10

In [5]:
# This part is for importing all the functions for smFISH detection. Please install them if you dont have these pacakges. 
import os
import sys
# import tk for getting the directory faster. dont need this in a command line/server version
import tkinter as tk
from tkinter import *
from tkinter import filedialog
import numpy as np
import bigfish.detection 
import bigfish.stack
import bigfish.plot
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import csv
import random
import math
import json
# if you dont need to plot in jupyter you don need these. Some magic interperters need to be removed for command line version. 
import matplotlib
matplotlib.rcParams["image.interpolation"] = 'none'
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
# Glob and tifffile are needed
from glob import glob
from tifffile import imread,imwrite
# csb deep is to take normalization 
from csbdeep.utils import Path, normalize
from csbdeep.io import save_tiff_imagej_compatible
# This is your stardist models and everything in stardist coming from. 
from stardist import random_label_cmap, _draw_polygons, export_imagej_rois
from stardist.models import StarDist2D
from skimage import segmentation
import bigfish.multistack as multistack
# Set random seed for you color map. You do not really need this to be 6 all the time, but its okay. 
np.random.seed(6)
lbl_cmap = random_label_cmap()

In [6]:
def load_npy_file(filename):
    try:
        data = np.load(filename)
        print(f"Loaded {filename} successfully.")
        return data
    except FileNotFoundError:
        print(f"{filename} not found.")
    except Exception as e:
        print(f"Error loading {filename}: {e}")
    return None
def make_stardist_Predictions_labels (dataset, normalized = False, normalize_low = 0, normalize_high = 0, nms_thresh = 0, prob_thresh = 0): 
    ''' input: csbdeep input: If this image is a csbdeep filtered image. If this is true, the image will not be normalized. 
               dataset: The data set you want to analysis on 
               normalize_low and normalized_high: The parameter you want to normalize, if either of them is 0 will use default value (1,99.8)
               if not then will use these values 
               nms_thresh and prob_thrsh : the parameter of overlapping and probablity threshold. If eitehr of them is 0 will use default value 
               for the model (depend on the model) if not then use these values
    '''
    labels_collection= []
    for i in range(len(dataset)): 
        if not normalized:
            if normalize_low ==0 or normalize_high == 0:
                img = normalize(dataset[i], 1,99.8, axis=(0,1))
            else:
                img = normalize(dataset[i], normalize_low, normalize_high, axis=(0,1))
        else:
            img = dataset[i]
        if nms_thresh == 0 or prob_thresh == 0 :
            labels, details = model.predict_instances(img)
        else: 
            labels, details = model.predict_instances(img,nms_thresh = nms_thresh, prob_thresh = prob_thresh)
        # write labels
        labels_collection.append(labels)
        export_imagej_rois('polygons/polygon_rois_'+str(i).zfill(3)+'.zip', details['coord'])
        imwrite("labels/Nucleus_Labels_"+str(i).zfill(3)+".tif", labels)
    labels_collection = np.array(labels_collection, dtype = np.uint16)
    return labels_collection
def random_select_images(dataset, percentage):
    """
    Randomly select a certain percentage of images from the dataset.

    Parameters:
        dataset (list): A list containing 2D arrays (images).
        percentage (float): Percentage of images to be selected.

    Returns:
        selected_indices (list): A list of indices corresponding to the selected images.
    """
    num_images = len(dataset)
    num_selected = int(np.ceil(percentage/100 * num_images))
    selected_indices = np.random.choice(num_images, num_selected, replace=False)
    return selected_indices

def random_select_region(image, region_size):
    """
    Randomly select a region within the image with a certain size.

    Parameters:
        image (2D array): The original image.
        region_size (tuple): The size of the region to be selected (height, width).

    Returns:
        region (2D array): The selected region.
    """
    image_height, image_width = image.shape
    region_height, region_width = region_size

    if region_height > image_height or region_width > image_width:
        raise ValueError("Region size exceeds original image size.")

    start_row = np.random.randint(0, image_height - region_height + 1)
    start_col = np.random.randint(0, image_width - region_width + 1)
    end_row = start_row + region_height
    end_col = start_col + region_width

    region_prop = [(start_row, end_row),(start_col,end_col)]
    return region_prop
def make_random_examples(dataset, normalize_low = 0, normalize_high =0, nms_thresh =0, prob_thresh = 0, normalized = False, percentage = 20, region_size = (500,500)):
    selected_indicies = sorted(random_select_images(dataset, percentage))
    for item in selected_indicies: 
        if normalized:
            img = dataset[item]
        else:
            if normalize_low ==0 or normalize_high == 0:
                img = normalize(dataset[item], 1,99.8, axis=(0,1))
            else:
                img = normalize(dataset[item], normalize_low, normalize_high, axis=(0,1))   
        if nms_thresh == 0 or prob_thresh == 0 :
            labels, details = model.predict_instances(img)
        else: 
            labels, details = model.predict_instances(img, nms_thresh = nms_thresh, prob_thresh = prob_thresh)
        region_props =  random_select_region(img, region_size)
        cropped_image = img[region_props[0][0]:region_props[0][1],region_props[1][0]:region_props[1][1]]
        cropped_label = labels[region_props[0][0]:region_props[0][1],region_props[1][0]:region_props[1][1]]
        figure = plt.figure(figsize=(13,10))
        coord, points, prob = details['coord'], details['points'], details['prob']
        # Plot image on the first one
        ax1 = figure.add_subplot(121); ax1.imshow(cropped_image, cmap='gray'); ax1.axis('off')
        # Plot image on the second one
        ax2 = figure.add_subplot(122); ax2.imshow(cropped_image, cmap='gray'); ax2.axis('off')
        # Plot labels on the third one. 
        ax2.imshow(cropped_label, cmap=lbl_cmap, alpha=0.3)
        figure.tight_layout()
        plt.savefig("random_examples/random_example_"+str(item).zfill(3)+".tif")
def create_folder_in_same_directory(file_path, folder_name):
    """
    Creates a folder with the specified name in the same directory as the given file.
    If the folder already exists, it returns the existing path.
    """
    # Get the directory of the given file
    directory = os.path.dirname(file_path)
    
    # Define the path for the specified folder
    folder_path = os.path.join(directory, folder_name)
    
    # Check if the folder exists
    if not os.path.exists(folder_path):
        # Create the folder if it doesn't exist
        os.makedirs(folder_path)
        print(f"Created '{folder_name}' folder at: {folder_path}")
    else:
        print(f"'{folder_name}' folder already exists at: {folder_path}")
    
    return folder_path
def generate_max_projection_array(array, projection_size):
    ranges = []
    total = array.shape[0]
    projected_image = []
    for i in range(0, total, projection_size):
        start = i
        end = min(i + projection_size - 1, total - 1)
        ranges.append((start, end))
    for item in ranges:
        start, end = item

        for i in range(start,end):
            nuclei_array.append(array[i])
        projection = bigfish.stack.maximum_projection(np.array(nuclei_array,dtype=np.uint8))
        projected_image.append(projection)
    return np.array(projected_image,dtype=np.uint8)

In [7]:
# Get into your counterstain spots
os.chdir(os.path.dirname(counterstainChannelPath))
os.chdir("results")
# Read in your counterstain spots file 
# File names
file_A = 'spots_post_decomposition_and_background_removed.npy'
file_B = 'spots_post_decomposition.npy'

# Try loading A, fallback to B if A fails
post_decomposition_array = load_npy_file(file_A)
if post_decomposition_array is None:
    post_decomposition_array = load_npy_file(file_B)

Loaded spots_post_decomposition_and_background_removed.npy successfully.


In [8]:
os.chdir(nucleiSegmentationPath)
nucleiSegmentationFilenames =sorted(glob("*.tif"))
nucleiImageArray_projected_labels = []
for item in nucleiSegmentationFilenames:
    nucleiSegmentation = imread(item)
    nucleiImageArray_projected_labels.append(nucleiSegmentation)

In [9]:
# Get into your counterstain spots
os.chdir(os.path.dirname(counterstainChannelPath))
os.chdir("results")
post_decomposition_array_projected = np.copy(post_decomposition_array)  # Create a copy of the original array
post_decomposition_array_projected[:, 0] = np.floor_divide(post_decomposition_array[:, 0], nuclei_projection_size)
create_folder_in_same_directory('.','expanded_labels')
assignment_results = []
# for i in range(len(nucleiImageArray_projected_labels)):
    # expanded_labels = segmentation.expand_labels(nucleiImageArray_projected_labels[i], distance=labelExpansionSize)
    # indices = np.where(post_decomposition_array_projected[:, 0] == i)[0]
    # rna_coord = post_decomposition_array_projected[indices][:, -2:]
    # imwrite('expanded_labels/20expandedlabel'+str(i).zfill(3)+'.tif', expanded_labels, photometric = 'minisblack')
    # cell_extraction_results = multistack.extract_cell(cell_label = expanded_labels, ndim =2, rna_coord = rna_coord)
    # assignment_results.append(cell_extraction_results)

'expanded_labels' folder already exists at: expanded_labels


In [9]:
expanded_labels = segmentation.expand_labels(nucleiImageArray_projected_labels[0], distance=labelExpansionSize)
indices = np.where(post_decomposition_array_projected[:, 0] == i)[0]
rna_coord = post_decomposition_array_projected[indices][:, -2:]
imwrite('expanded_labels/20expandedlabel'+str(i).zfill(3)+'.tif', expanded_labels, photometric = 'minisblack')
cell_extraction_results = multistack.extract_cell(cell_label = expanded_labels, ndim =2, rna_coord = rna_coord)
assignment_results.append(cell_extraction_results)

NameError: name 'i' is not defined

In [10]:
os.getcwd()

'/Users/eliasguan/Desktop/EG_0920_Test_wnt1_incision_amputation/Experiment_dataset/Experiment/0h_Amputation/Image1/633/results'

In [11]:
expanded_label = segmentation.expand_labels(nucleiImageArray_projected_labels[0], distance=labelExpansionSize)

In [12]:
indices = np.where(post_decomposition_array_projected[:, 0] == 0)[0]
rna_coord = post_decomposition_array_projected[indices][:, -2:]
expanded_labels = expanded_label.astype(dtype=np.uint16)
rna_coord = rna_coord.astype(dtype="int64")

In [None]:
imwrite('expanded_labels/20expandedlabel'+str(0).zfill(3)+'.tif', expanded_labels, photometric = 'minisblack')
cell_extraction_results = multistack.extract_cell(cell_label = expanded_label, ndim =2, rna_coord = rna_coord)

In [28]:
expanded_label.dtype

dtype('int32')

In [26]:
rna_coord.dtype

dtype('int64')

In [13]:
import warnings

import numpy as np
import pandas as pd
from scipy import ndimage as ndi

import bigfish.stack as stack

from skimage.measure import regionprops
from skimage.measure import find_contours
from skimage.draw import polygon_perimeter

In [14]:
def extract_cell(
        cell_label,
        ndim,
        nuc_label=None,
        rna_coord=None,
        others_coord=None,
        image=None,
        others_image=None,
        remove_cropped_cell=True,
        check_nuc_in_cell=True):
    """Extract cell-level results for an image.

    The function gathers different segmentation and detection results obtained
    at the image level and assigns each of them to the individual cells.

    Parameters
    ----------
    cell_label : np.ndarray, np.uint or np.int
        Image with labelled cells and shape (y, x).
    ndim : int
        Number of spatial dimensions to consider (2 or 3).
    nuc_label : np.ndarray, np.uint or np.int
        Image with labelled nuclei and shape (y, x). If None, individual
        nuclei are not assigned to each cell.
    rna_coord : np.ndarray
        Coordinates of the detected RNAs with zyx or yx coordinates in the
        first 3 or 2 columns. If None, RNAs are not assigned to individual
        cells.
    others_coord : Dict[np.ndarray]
        Dictionary of coordinates arrays. For each array of the dictionary,
        the different elements are assigned to individual cells. Arrays should
        be organized the same way than spots: zyx or yx coordinates in the
        first 3 or 2 columns, np.int64 dtype, one element per row. Can be used
        to assign different detected elements to the segmented cells along with
        the spots. If None, no others elements are assigned to the individual
        cells.
    image : np.ndarray, np.uint
        Image in 2-d. If None, image of the individual cells are not extracted.
    others_image : Dict[np.ndarray]
        Dictionary of images to crop. If None, no others image of the
        individual cells are extracted.
    remove_cropped_cell : bool
        Remove cells cropped by the FoV frame.
    check_nuc_in_cell : bool
        Check that each nucleus is entirely localized within a cell.

    Returns
    -------
    fov_results : List[Dict]
        List of dictionaries, one per cell segmented in the image. Each
        dictionary includes information about the cell (image, masks,
        coordinates arrays). Minimal information are:

        * `cell_id`: Unique id of the cell.
        * `bbox`: bounding box coordinates with the order (`min_y`, `min_x`,
          `max_y`, `max_x`).
        * `cell_coord`: boundary coordinates of the cell.
        * `cell_mask`: mask of the cell.

    """
    actual_keys = ["cell_id", "bbox", "cell_coord", "cell_mask", "nuc_coord",
                   "nuc_mask", "rna_coord", "image"]
    if rna_coord is not None and rna_coord.shape[1] < ndim:
        warnings.warn("'rna_coord' have less coordinates ({0}) than the "
                      "minimum number of spatial dimension we "
                      "consider ({1}).".format(rna_coord.shape[1], ndim),
                      UserWarning)

    # initialize FoV results
    fov_results = []

    # initialize a mask to detect cells at the FoV borders
    fov_borders = np.zeros(cell_label.shape, dtype=bool)
    if remove_cropped_cell:
        fov_borders[:, 0] = True
        fov_borders[0, :] = True
        fov_borders[:, cell_label.shape[1] - 1] = True
        fov_borders[cell_label.shape[0] - 1, :] = True

    # iterate over each segmented cell
    cells = regionprops(cell_label)
    for cell in cells:

        # initialize cell results
        cell_results = {}

        # get the bounding box of the cell
        label = cell.label
        cell_results["cell_id"] = label
        (min_y, min_x, max_y, max_x) = cell.bbox
        cell_results["bbox"] = cell.bbox

        # get binary masks of the cell
        cell_mask = cell_label.copy()
        cell_mask = (cell_mask == label)

        # check if cell is not cropped by the borders
        if remove_cropped_cell and _check_cropped_cell(cell_mask, fov_borders):
            continue

        # get boundaries coordinates for cell
        cell_coord = from_binary_to_coord(cell_mask)
        cell_coord = complete_coord_boundaries(cell_coord)
        cell_coord[:, 0] -= min_y
        cell_coord[:, 1] -= min_x
        cell_results["cell_coord"] = cell_coord

        # crop binary mask of the cell
        cell_mask_cropped = cell_mask[min_y: max_y, min_x: max_x]
        cell_results["cell_mask"] = cell_mask_cropped
        
        # get coordinates of the spots detected in the cell
        if rna_coord is not None:
            rna_in_cell, _ = identify_objects_in_region(
                cell_mask,
                rna_coord,
                ndim)
            rna_in_cell[:, ndim - 2] -= min_y
            rna_in_cell[:, ndim - 1] -= min_x
            cell_results["rna_coord"] = rna_in_cell

        # get coordinates of the other detected elements
        if others_coord is not None:
            for key in others_coord:
                array = others_coord[key]
                element_in_cell, _ = identify_objects_in_region(
                    cell_mask,
                    array,
                    ndim)
                element_in_cell[:, ndim - 2] -= min_y
                element_in_cell[:, ndim - 1] -= min_x
                cell_results[key] = element_in_cell

        # crop cell image
        if image is not None:
            image_cropped = image[min_y: max_y, min_x: max_x]
            cell_results["image"] = image_cropped

        fov_results.append(cell_results)

    return fov_results
def extract_cell(
        cell_label,
        ndim,
        nuc_label=None,
        rna_coord=None,
        others_coord=None,
        image=None,
        others_image=None,
        remove_cropped_cell=True,
        check_nuc_in_cell=True):
    """Extract cell-level results for an image.

    The function gathers different segmentation and detection results obtained
    at the image level and assigns each of them to the individual cells.

    Parameters
    ----------
    cell_label : np.ndarray, np.uint or np.int
        Image with labelled cells and shape (y, x).
    ndim : int
        Number of spatial dimensions to consider (2 or 3).
    nuc_label : np.ndarray, np.uint or np.int
        Image with labelled nuclei and shape (y, x). If None, individual
        nuclei are not assigned to each cell.
    rna_coord : np.ndarray
        Coordinates of the detected RNAs with zyx or yx coordinates in the
        first 3 or 2 columns. If None, RNAs are not assigned to individual
        cells.
    others_coord : Dict[np.ndarray]
        Dictionary of coordinates arrays. For each array of the dictionary,
        the different elements are assigned to individual cells. Arrays should
        be organized the same way than spots: zyx or yx coordinates in the
        first 3 or 2 columns, np.int64 dtype, one element per row. Can be used
        to assign different detected elements to the segmented cells along with
        the spots. If None, no others elements are assigned to the individual
        cells.
    image : np.ndarray, np.uint
        Image in 2-d. If None, image of the individual cells are not extracted.
    others_image : Dict[np.ndarray]
        Dictionary of images to crop. If None, no others image of the
        individual cells are extracted.
    remove_cropped_cell : bool
        Remove cells cropped by the FoV frame.
    check_nuc_in_cell : bool
        Check that each nucleus is entirely localized within a cell.

    Returns
    -------
    fov_results : List[Dict]
        List of dictionaries, one per cell segmented in the image. Each
        dictionary includes information about the cell (image, masks,
        coordinates arrays). Minimal information are:

        * `cell_id`: Unique id of the cell.
        * `bbox`: bounding box coordinates with the order (`min_y`, `min_x`,
          `max_y`, `max_x`).
        * `cell_coord`: boundary coordinates of the cell.
        * `cell_mask`: mask of the cell.

    """
    actual_keys = ["cell_id", "bbox", "cell_coord", "cell_mask", "nuc_coord",
                   "nuc_mask", "rna_coord", "image"]
    if rna_coord is not None and rna_coord.shape[1] < ndim:
        warnings.warn("'rna_coord' have less coordinates ({0}) than the "
                      "minimum number of spatial dimension we "
                      "consider ({1}).".format(rna_coord.shape[1], ndim),
                      UserWarning)

    # initialize FoV results
    fov_results = []

    # initialize a mask to detect cells at the FoV borders
    fov_borders = np.zeros(cell_label.shape, dtype=bool)
    if remove_cropped_cell:
        fov_borders[:, 0] = True
        fov_borders[0, :] = True
        fov_borders[:, cell_label.shape[1] - 1] = True
        fov_borders[cell_label.shape[0] - 1, :] = True

    # iterate over each segmented cell
    cells = regionprops(cell_label)
    for cell in tqdm(cells):

        # initialize cell results
        cell_results = {}

        # get the bounding box of the cell
        label = cell.label
        cell_results["cell_id"] = label
        (min_y, min_x, max_y, max_x) = cell.bbox
        cell_results["bbox"] = cell.bbox

        # get binary masks of the cell
        cell_mask = cell_label.copy()
        cell_mask = (cell_mask == label)

        # check if cell is not cropped by the borders
        if remove_cropped_cell and _check_cropped_cell(cell_mask, fov_borders):
            continue

        # get boundaries coordinates for cell
        cell_coord = from_binary_to_coord(cell_mask)
        cell_coord = complete_coord_boundaries(cell_coord)
        cell_coord[:, 0] -= min_y
        cell_coord[:, 1] -= min_x
        cell_results["cell_coord"] = cell_coord

        # crop binary mask of the cell
        cell_mask_cropped = cell_mask[min_y: max_y, min_x: max_x]
        cell_results["cell_mask"] = cell_mask_cropped
        
        # get coordinates of the spots detected in the cell
        if rna_coord is not None:
            rna_in_cell, _ = identify_objects_in_region(
                cell_mask,
                rna_coord,
                ndim)
            rna_in_cell[:, ndim - 2] -= min_y
            rna_in_cell[:, ndim - 1] -= min_x
            cell_results["rna_coord"] = rna_in_cell

        # get coordinates of the other detected elements
        if others_coord is not None:
            for key in others_coord:
                array = others_coord[key]
                element_in_cell, _ = identify_objects_in_region(
                    cell_mask,
                    array,
                    ndim)
                element_in_cell[:, ndim - 2] -= min_y
                element_in_cell[:, ndim - 1] -= min_x
                cell_results[key] = element_in_cell

        # crop cell image
        if image is not None:
            image_cropped = image[min_y: max_y, min_x: max_x]
            cell_results["image"] = image_cropped

        fov_results.append(cell_results)

    return fov_results




In [15]:
cell_label = expanded_labels
cells = regionprops(cell_label)
cell_label.dtype

dtype('uint16')

In [16]:
def _check_cropped_cell(cell_mask, border_frame):
    """
    Check if a cell is cropped by the border frame.

    Parameters
    ----------
    cell_mask : np.ndarray, bool
        Binary mask of the cell cytoplasm.

    border_frame : np.ndarray, bool
        Binary mask of the border frame.

    Returns
    -------
    _ : bool
        True if cell is cropped.

    """
    # check cell is not cropped by the borders
    crop = cell_mask & border_frame
    if np.any(crop):
        return True
    else:
        return False

In [17]:
fov_borders = np.zeros(cell_label.shape, dtype=bool)

In [18]:
def from_binary_to_coord(binary):
    """Extract coordinates from a 2-d binary matrix.

    As the resulting coordinates represent the external boundaries of the
    object, the coordinates values can be negative.

    Parameters
    ----------
    binary : np.ndarray, np.uint or np.int or bool
        Binary image with shape (y, x).

    Returns
    -------
    coord : np.ndarray, np.int
        Array of boundaries coordinates with shape (nb_points, 2).

    """
    # check parameters
    stack.check_array(
        binary,
        ndim=2,
        dtype=[np.uint8, np.uint16, np.int32, np.int64, bool])

    # store dtype
    if binary.dtype == np.int32:
        dtype = np.int32
    else:
        dtype = np.int64

    # we enlarge the binary mask with one pixel to be sure the external
    # boundaries of the object still fit within the frame
    binary_ = np.pad(binary, [(1, 1)], mode="constant")

    # get external boundaries coordinates
    coord = find_contours(binary_, level=0)[0].astype(dtype)

    # remove the pad
    coord -= 1

    return coord


def complete_coord_boundaries(coord):
    """Complete a 2-d coordinates array, by generating/interpolating missing
    points.

    Parameters
    ----------
    coord : np.ndarray, np.int
        Array of coordinates to complete, with shape (nb_points, 2).

    Returns
    -------
    coord_completed : np.ndarray, np.int
        Completed coordinates arrays, with shape (nb_points, 2).

    """
    # check parameters
    stack.check_array(coord, ndim=2, dtype=[np.int32, np.int64])

    # for each array in the list, complete its coordinates using the scikit
    # image method 'polygon_perimeter'
    coord_y, coord_x = polygon_perimeter(coord[:, 0], coord[:, 1])
    coord_y = coord_y[:, np.newaxis]
    coord_x = coord_x[:, np.newaxis]
    coord_completed = np.concatenate((coord_y, coord_x), axis=-1)
    coord_completed = coord_completed.astype(coord.dtype)

    return coord_completed
def identify_objects_in_region(mask, coord, ndim):
    """Identify cellular objects in specific region.

    Parameters
    ----------
    mask : np.ndarray, bool
        Binary mask of the targeted region with shape (y, x).
    coord : np.ndarray
        Array with two dimensions. One object per row, zyx or yx coordinates
        in the first 3 or 2 columns.
    ndim : int
        Number of spatial dimensions to consider (2 or 3).

    Returns
    -------
    coord_in : np.ndarray
        Coordinates of the objects detected inside the region.
    coord_out : np.ndarray
        Coordinates of the objects detected outside the region.

    """
    # check parameters
    stack.check_parameter(ndim=int)
    stack.check_array(
        mask,
        ndim=2,
        dtype=[np.uint8, np.uint16, np.int32, np.int64, bool])
    stack.check_array(
        coord,
        ndim=2,
        dtype=[np.float32, np.float64, np.int32, np.int64])

    # check number of dimensions
    if ndim not in [2, 3]:
        raise ValueError("The number of spatial dimension requested should be "
                         "2 or 3, not {0}.".format(ndim))
    if coord.shape[1] < ndim:
        raise ValueError("Coord array should have at least {0} features to "
                         "match the number of spatial dimensions requested. "
                         "Currently {1} is not enough."
                         .format(ndim, coord.shape[1]))

    # binarize nuclei mask if needed
    if mask.dtype != bool:
        mask = mask.astype(bool)

    # cast coordinates dtype if necessary
    if coord.dtype in [np.int32, np.int64]:
        coord_int = coord
    else:
        coord_int = np.round(coord).astype(np.int64)

    # remove objects inside the region
    mask_in = mask[coord_int[:, ndim - 2], coord_int[:, ndim - 1]]
    coord_in = coord[mask_in]
    coord_out = coord[~mask_in]

    return coord_in, coord_out


def remove_transcription_site(rna, clusters, nuc_mask, ndim):
    """Distinguish RNA molecules detected in a transcription site from the
    rest.

    A transcription site is defined as as a foci detected within the nucleus.

    Parameters
    ----------
    rna : np.ndarray
        Coordinates of the detected RNAs with shape (nb_spots, 4) or
        (nb_spots, 3). One coordinate per dimension (zyx or yx coordinates)
        plus the index of the cluster assigned to the RNA. If no cluster was
        assigned, value is -1.
    clusters : np.ndarray
        Array with shape (nb_clusters, 5) or (nb_clusters, 4). One coordinate
        per dimension for the clusters centroid (zyx or yx coordinates),
        the number of RNAs detected in the clusters and their index.
    nuc_mask : np.ndarray, bool
        Binary mask of the nuclei region with shape (y, x).
    ndim : int
        Number of spatial dimensions to consider (2 or 3).

    Returns
    -------
    rna_out_ts : np.ndarray
        Coordinates of the detected RNAs with shape (nb_spots, 4) or
        (nb_spots, 3). One coordinate per dimension (zyx or yx coordinates)
        plus the index of the foci assigned to the RNA. If no foci was
        assigned, value is -1. RNAs from transcription sites are removed.
    foci : np.ndarray
        Array with shape (nb_foci, 5) or (nb_foci, 4). One coordinate per
        dimension for the foci centroid (zyx or yx coordinates),
        the number of RNAs detected in the foci and its index.
    ts : np.ndarray
        Array with shape (nb_ts, 5) or (nb_ts, 4). One coordinate per
        dimension for the transcription site centroid (zyx or yx coordinates),
        the number of RNAs detected in the transcription site and its index.

    """
    # check parameters
    stack.check_array(
        rna,
        ndim=2,
        dtype=[np.float32, np.float64, np.int32, np.int64])

    # discriminate foci from transcription sites
    ts, foci = identify_objects_in_region(
        nuc_mask, clusters, ndim)

    # filter out rna from transcription sites
    rna_in_ts = ts[:, ndim + 1]
    mask_rna_in_ts = np.isin(rna[:, ndim], rna_in_ts)
    rna_out_ts = rna[~mask_rna_in_ts]

    return rna_out_ts, foci, ts


# ### Nuclei-cells matching

def match_nuc_cell(nuc_label, cell_label, single_nuc, cell_alone):
    """Match each nucleus instance with the most overlapping cell instance.

    Parameters
    ----------
    nuc_label : np.ndarray, np.int or np.uint
        Labelled image of nuclei with shape (z, y, x) or (y, x).
    cell_label : np.ndarray, np.int or np.uint
        Labelled image of cells with shape (z, y, x) or (y, x).
    single_nuc : bool
        Authorized only one nucleus in a cell.
    cell_alone : bool
        Authorized cell without nucleus.

    Returns
    -------
    new_nuc_label : np.ndarray, np.int or np.uint
        Labelled image of nuclei with shape (z, y, x) or (y, x).
    new_cell_label : np.ndarray, np.int or np.uint
        Labelled image of cells with shape (z, y, x) or (y, x).

    """
    # check parameters
    stack.check_array(
        nuc_label,
        ndim=[2, 3],
        dtype=[np.uint8, np.uint16, np.int32, np.int64])
    stack.check_array(
        cell_label,
        ndim=[2, 3],
        dtype=[np.uint8, np.uint16, np.int32, np.int64])

    # initialize new labelled images
    new_nuc_label = np.zeros_like(nuc_label)
    new_cell_label = np.zeros_like(cell_label)
    remaining_cell_label = cell_label.copy()

    # loop over nuclei
    i_instance = 1
    max_nuc_label = nuc_label.max()
    for i_nuc in range(1, max_nuc_label + 1):

        # get nuc mask
        nuc_mask = nuc_label == i_nuc

        # check if a nucleus is labelled with this value
        if nuc_mask.sum() == 0:
            continue

        # check if a cell is labelled with this value
        i_cell = _get_most_frequent_value(cell_label[nuc_mask])
        if i_cell == 0:
            continue

        # get cell mask
        cell_mask = cell_label == i_cell

        # ensure nucleus is totally included in cell
        cell_mask |= nuc_mask
        cell_label[cell_mask] = i_cell
        remaining_cell_label[cell_mask] = i_cell

        # assign cell and nucleus
        new_nuc_label[nuc_mask] = i_instance
        new_cell_label[cell_mask] = i_instance
        i_instance += 1

        # remove pixel already assigned
        remaining_cell_label[cell_mask] = 0

        # if one nucleus per cell only, we remove the cell as candidate
        if single_nuc:
            cell_label[cell_mask] = 0

    # if only cell with nucleus are authorized we stop here
    if not cell_alone:
        return new_nuc_label, new_cell_label

    # loop over remaining cells
    max_remaining_cell_label = remaining_cell_label.max()
    for i_cell in range(1, max_remaining_cell_label + 1):

        # get cell mask
        cell_mask = remaining_cell_label == i_cell

        # check if a cell is labelled with this value
        if cell_mask.sum() == 0:
            continue

        # add cell in the result
        new_cell_label[cell_mask] = i_instance
        i_instance += 1

    return new_nuc_label, new_cell_label


def _get_most_frequent_value(array):
    """Count the most frequent value in a array.

    Parameters
    ----------
    array : np.ndarray, np.uint or np.int
        Array-like object.

    Returns
    -------
    value : int
        Most frequent integer in the array.

    """
    value = np.argmax(np.bincount(array))

    return value


# ### Cell extraction ###

def extract_cell(
        cell_label,
        ndim,
        nuc_label=None,
        rna_coord=None,
        others_coord=None,
        image=None,
        others_image=None,
        remove_cropped_cell=True,
        check_nuc_in_cell=True):
    """Extract cell-level results for an image.

    The function gathers different segmentation and detection results obtained
    at the image level and assigns each of them to the individual cells.

    Parameters
    ----------
    cell_label : np.ndarray, np.uint or np.int
        Image with labelled cells and shape (y, x).
    ndim : int
        Number of spatial dimensions to consider (2 or 3).
    nuc_label : np.ndarray, np.uint or np.int
        Image with labelled nuclei and shape (y, x). If None, individual
        nuclei are not assigned to each cell.
    rna_coord : np.ndarray
        Coordinates of the detected RNAs with zyx or yx coordinates in the
        first 3 or 2 columns. If None, RNAs are not assigned to individual
        cells.
    others_coord : Dict[np.ndarray]
        Dictionary of coordinates arrays. For each array of the dictionary,
        the different elements are assigned to individual cells. Arrays should
        be organized the same way than spots: zyx or yx coordinates in the
        first 3 or 2 columns, np.int64 dtype, one element per row. Can be used
        to assign different detected elements to the segmented cells along with
        the spots. If None, no others elements are assigned to the individual
        cells.
    image : np.ndarray, np.uint
        Image in 2-d. If None, image of the individual cells are not extracted.
    others_image : Dict[np.ndarray]
        Dictionary of images to crop. If None, no others image of the
        individual cells are extracted.
    remove_cropped_cell : bool
        Remove cells cropped by the FoV frame.
    check_nuc_in_cell : bool
        Check that each nucleus is entirely localized within a cell.

    Returns
    -------
    fov_results : List[Dict]
        List of dictionaries, one per cell segmented in the image. Each
        dictionary includes information about the cell (image, masks,
        coordinates arrays). Minimal information are:

        * `cell_id`: Unique id of the cell.
        * `bbox`: bounding box coordinates with the order (`min_y`, `min_x`,
          `max_y`, `max_x`).
        * `cell_coord`: boundary coordinates of the cell.
        * `cell_mask`: mask of the cell.

    """
    # check parameters
    stack.check_parameter(
        ndim=int,
        others_coord=(dict, type(None)),
        others_image=(dict, type(None)),
        remove_cropped_cell=bool,
        check_nuc_in_cell=bool)
    stack.check_array(
        cell_label,
        ndim=2,
        dtype=[np.uint8, np.uint16, np.int32, np.int64])
    if nuc_label is not None:
        stack.check_array(
            nuc_label,
            ndim=2,
            dtype=[np.uint8, np.uint16, np.int32, np.int64])
    if rna_coord is not None:
        stack.check_array(
            rna_coord,
            ndim=2,
            dtype=[np.float32, np.float64, np.int32, np.int64])
    if image is not None:
        stack.check_array(image, ndim=2, dtype=[np.uint8, np.uint16])
    actual_keys = ["cell_id", "bbox", "cell_coord", "cell_mask", "nuc_coord",
                   "nuc_mask", "rna_coord", "image"]
    if others_coord is not None:
        for key in others_coord:
            if key in actual_keys:
                raise KeyError("Key {0} in 'others_coord' is already taken. "
                               "Please choose another one.".format(key))
            else:
                actual_keys.append(key)
            array = others_coord[key]
            stack.check_array(
                array,
                ndim=2,
                dtype=[np.float32, np.float64, np.int32, np.int64])
            if array.shape[1] < ndim:
                warnings.warn("Array in 'others_coord' have less coordinates "
                              "({0}) than the minimum number of spatial "
                              "dimension we consider ({1})."
                              .format(array.shape[1], ndim),
                              UserWarning)
    if others_image is not None and image is not None:
        for key in others_image:
            if key in actual_keys:
                raise KeyError("Key {0} in 'others_image' is already taken. "
                               "Please choose another one.".format(key))
            else:
                actual_keys.append(key)
            image_ = others_image[key]
            stack.check_array(
                image_,
                ndim=2,
                dtype=[np.uint8, np.uint16, np.int32, np.int64, bool])
            if image_.shape != image.shape:
                warnings.warn("Image in 'others_image' does not have the same "
                              "shape ({0}) than original image ({1})."
                              .format(image_.shape, image.shape),
                              UserWarning)
    if rna_coord is not None and rna_coord.shape[1] < ndim:
        warnings.warn("'rna_coord' have less coordinates ({0}) than the "
                      "minimum number of spatial dimension we "
                      "consider ({1}).".format(rna_coord.shape[1], ndim),
                      UserWarning)

    # initialize FoV results
    fov_results = []

    # initialize a mask to detect cells at the FoV borders
    fov_borders = np.zeros(cell_label.shape, dtype=bool)
    if remove_cropped_cell:
        fov_borders[:, 0] = True
        fov_borders[0, :] = True
        fov_borders[:, cell_label.shape[1] - 1] = True
        fov_borders[cell_label.shape[0] - 1, :] = True

    # iterate over each segmented cell
    cells = regionprops(cell_label)
    for cell in cells:

        # initialize cell results
        cell_results = {}

        # get the bounding box of the cell
        label = cell.label
        cell_results["cell_id"] = label
        (min_y, min_x, max_y, max_x) = cell.bbox
        cell_results["bbox"] = cell.bbox

        # get binary masks of the cell
        cell_mask = cell_label.copy()
        cell_mask = (cell_mask == label)

        # check if cell is not cropped by the borders
        if remove_cropped_cell and _check_cropped_cell(cell_mask, fov_borders):
            continue

        # get boundaries coordinates for cell
        cell_coord = from_binary_to_coord(cell_mask)
        cell_coord = complete_coord_boundaries(cell_coord)
        cell_coord[:, 0] -= min_y
        cell_coord[:, 1] -= min_x
        cell_results["cell_coord"] = cell_coord

        # crop binary mask of the cell
        cell_mask_cropped = cell_mask[min_y: max_y, min_x: max_x]
        cell_results["cell_mask"] = cell_mask_cropped

        # get binary mask of the nucleus
        if nuc_label is not None:
            nuc_mask = nuc_label.copy()
            nuc_mask = (nuc_mask == label)

            # check if nucleus is in the cell
            if (check_nuc_in_cell
                    and not _check_nucleus_in_cell(cell_mask, nuc_mask)):
                continue

            # get boundaries coordinates for nucleus
            nuc_coord = from_binary_to_coord(nuc_mask)
            nuc_coord = complete_coord_boundaries(nuc_coord)
            nuc_coord[:, 0] -= min_y
            nuc_coord[:, 1] -= min_x
            cell_results["nuc_coord"] = nuc_coord

            # crop binary mask of the nucleus
            nuc_mask_cropped = nuc_mask[min_y: max_y, min_x: max_x]
            cell_results["nuc_mask"] = nuc_mask_cropped

        # get coordinates of the spots detected in the cell
        if rna_coord is not None:
            rna_in_cell, _ = identify_objects_in_region(
                cell_mask,
                rna_coord,
                ndim)
            rna_in_cell[:, ndim - 2] -= min_y
            rna_in_cell[:, ndim - 1] -= min_x
            cell_results["rna_coord"] = rna_in_cell

        # get coordinates of the other detected elements
        if others_coord is not None:
            for key in others_coord:
                array = others_coord[key]
                element_in_cell, _ = identify_objects_in_region(
                    cell_mask,
                    array,
                    ndim)
                element_in_cell[:, ndim - 2] -= min_y
                element_in_cell[:, ndim - 1] -= min_x
                cell_results[key] = element_in_cell

        # crop cell image
        if image is not None:
            image_cropped = image[min_y: max_y, min_x: max_x]
            cell_results["image"] = image_cropped

        # get crops of the other images
        if others_image is not None:
            for key in others_image:
                image_ = others_image[key]
                image_cropped_ = image_[min_y: max_y, min_x: max_x]
                cell_results[key] = image_cropped_

        fov_results.append(cell_results)

    return fov_results

In [None]:
ndim = 2
fov_results = []
actual_keys = ["cell_id", "bbox", "cell_coord", "cell_mask", "nuc_coord",
                   "nuc_mask", "rna_coord", "image"]
for cell in tqdm(cells):
    
    # initialize cell results
    cell_results = {}

    # get the bounding box of the cell
    label = cell.label
    cell_results["cell_id"] = label
    (min_y, min_x, max_y, max_x) = cell.bbox
    cell_results["bbox"] = cell.bbox

    # get binary masks of the cell
    cell_mask = cell_label.copy()
    cell_mask = (cell_mask == label)

    # check if cell is not cropped by the borders
    if _check_cropped_cell(cell_mask, fov_borders):
        continue

    # get boundaries coordinates for cell
    cell_coord = from_binary_to_coord(cell_mask)
    cell_coord = complete_coord_boundaries(cell_coord)
    cell_coord[:, 0] -= min_y
    cell_coord[:, 1] -= min_x
    cell_results["cell_coord"] = cell_coord

    # crop binary mask of the cell
    cell_mask_cropped = cell_mask[min_y: max_y, min_x: max_x]
    cell_results["cell_mask"] = cell_mask_cropped
    
    # get coordinates of the spots detected in the cell
    if rna_coord is not None:
        rna_in_cell, _ = identify_objects_in_region(
            cell_mask,
            rna_coord,
            ndim)
        rna_in_cell[:, ndim - 2] -= min_y
        rna_in_cell[:, ndim - 1] -= min_x
        cell_results["rna_coord"] = rna_in_cell

    fov_results.append(cell_results)

 55%|█████████████████████████▋                     | 4028/7368 [13:32<11:12,  4.97it/s]

In [41]:
cell = cells[4033]

In [42]:
# initialize cell results
cell_results = {}

# get the bounding box of the cell
label = cell.label
cell_results["cell_id"] = label
(min_y, min_x, max_y, max_x) = cell.bbox
cell_results["bbox"] = cell.bbox

# get binary masks of the cell
cell_mask = cell_label.copy()
cell_mask = (cell_mask == label)

# check if cell is not cropped by the borders
if _check_cropped_cell(cell_mask, fov_borders):
   

    # get boundaries coordinates for cell
    cell_coord = from_binary_to_coord(cell_mask)
    cell_coord = complete_coord_boundaries(cell_coord)
    cell_coord[:, 0] -= min_y
    cell_coord[:, 1] -= min_x
    cell_results["cell_coord"] = cell_coord
    
    # crop binary mask of the cell
    cell_mask_cropped = cell_mask[min_y: max_y, min_x: max_x]
    cell_results["cell_mask"] = cell_mask_cropped
    
    # get coordinates of the spots detected in the cell
    if rna_coord is not None:
        rna_in_cell, _ = identify_objects_in_region(
            cell_mask,
            rna_coord,
            ndim)
        rna_in_cell[:, ndim - 2] -= min_y
        rna_in_cell[:, ndim - 1] -= min_x
        cell_results["rna_coord"] = rna_in_cell


In [43]:
cell_results

{'cell_id': 4034, 'bbox': (3005, 4600, 3123, 4705)}

In [36]:
np.size(cells)

7368

<skimage.measure._regionprops.RegionProperties at 0x347155b70>