<h3>Nuclear segmentation of 3D stacks of intestinal organoids, nuclei labeling using Cellpose. Measurement of YAP signal in the nuclei and simulated ring cytoplasm around nuclei</h3>

In [1]:
from utils import extract_scaling_metadata
import os
from tqdm import tqdm
from pathlib import Path
import czifile
import numpy as np
import skimage
from skimage import measure, exposure
from skimage.measure import regionprops_table
import pandas as pd
import pyclesperanto_prototype as cle
import plotly.express as px
from cellpose import models

cle.select_device("RTX")

<NVIDIA GeForce RTX 4090 on Platform: NVIDIA CUDA (1 refs)>

In [2]:
directory_path = Path("./raw_data/")
images = []

# Iterate through the lsm files in the directory
for file_path in directory_path.glob("*.czi"):
    # Remove Control and Isotype stainings from the analysis
    if "Control" and "Isotype" not in file_path.stem:
        images.append(str(file_path))
    
len(images)

117

In [3]:
# Initialize an empty list to store per image dataframe
dataframes = []

# Initialize the Cellpose model
model = models.Cellpose(gpu=True, model_type='nuclei')

for image in tqdm(images):

    # Read path storing raw image and extract filename
    file_path = Path(image)
    filename = file_path.stem

    # Read the image file and remove singleton dimensions
    img = czifile.imread(image)
    img = img.squeeze()

    # Extract experiment, mouse, treatment and replica ids
    experiment_id = filename.split(" ")[0]
    mouse_id = filename.split(" ")[1]
    treatment_id = filename.split(" ")[2]
    replica_id = filename.split(" ")[-1]

    # Image size reduction to improve processing times (slicing, not lossless compression)
    slicing_factor = None # Use 2 or 4 for compression (None for lossless)

    # Extract the stack containing the UEA-1 (0), YAP (1), nuclei (2) and BCAT (3) channels.
    uea1_stack = img[0, :, ::slicing_factor, ::slicing_factor]
    yap_stack = img[1, :, ::slicing_factor, ::slicing_factor]
    nuclei_stack = img[2, :, ::slicing_factor, ::slicing_factor]
    bcat_stack = img[3, :, ::slicing_factor, ::slicing_factor]

    # Extract x,y,z scaling from .czi file metadata in order to make data isotropic
    scaling_x_um, scaling_y_um, scaling_z_um = extract_scaling_metadata(file_path)

    # Resample nuclei stack and remove noise prior to Cellpose prediction
    resampled_nuclei = cle.scale(nuclei_stack, factor_x=scaling_x_um, factor_y=scaling_y_um, factor_z=scaling_z_um, auto_size=True)
    background_subtracted = cle.top_hat_box(resampled_nuclei, radius_x=5, radius_y=5, radius_z=5)

    # Pull OCL arrays from GPU memory and transform into numpy arrays to be fed into Cellpose
    processed_nuclei = cle.pull(background_subtracted)
    del background_subtracted

    # Apply Contrast Stretching to improve Cellpose detection of overly bright nuclei
    p2, p98 = np.percentile(processed_nuclei, (2, 98)) 
    nuclei_rescaled = exposure.rescale_intensity(processed_nuclei, in_range=(p2, p98))

    # Run Cellpose
    masks, flows, styles, diams = model.eval(nuclei_rescaled, diameter=10, channels=[0, 0], do_3D=True)

    # Filter out merged nuclei labels and artifacts based on size distribution

    # Create a destination array with the same shape as the nuclei labels (masks array)
    destination = cle.create_like(masks)

    # Remove labels smaller than 90 pixels (artifacts)
    filtered_nuclei_gpu = cle.exclude_small_labels(masks, destination, 90)

    # Remove labels larger than 500 pixels (merged nuclei)
    filtered_nuclei_gpu = cle.exclude_large_labels(filtered_nuclei_gpu, destination, 500)

    # Pull filtered_nuclei from GPU
    filtered_nuclei = cle.pull(filtered_nuclei_gpu).astype(np.uint16)

    # Dilate labels
    dilated_nuclei_gpu = cle.dilate_labels(filtered_nuclei, radius=2)
    dilated_nuclei = cle.pull(dilated_nuclei_gpu)

    # Simulate cytoplasm labels by building a sphere surrounding the nuclei labels

    # Create a copy of dilated_nuclei to modify
    cytoplasm = dilated_nuclei.copy()

    # Get unique labels (excluding 0 which is background)
    unique_labels = np.unique(filtered_nuclei)
    unique_labels = unique_labels[unique_labels != 0]

    # Iterate over each label and remove the corresponding pixels from dilated_nuclei
    for label in unique_labels:
        # Create a mask for the current label in filtered_nuclei
        mask = (filtered_nuclei == label)
        
        # Set corresponding pixels in resulting_nuclei to zero
        cytoplasm[mask] = 0

    # Resample YAP stack
    resampled_yap_gpu = cle.scale(yap_stack, factor_x=scaling_x_um, factor_y=scaling_y_um, factor_z=scaling_z_um, auto_size=True)
    # Pull YAP OCL array from GPU to feed into regionprops as intensity_image afterwards
    resampled_yap = cle.pull(resampled_yap_gpu)

    #Extract regionprops from filtered nuclei and generated-cytoplasm labels
    props_nuclei = regionprops_table(label_image=filtered_nuclei, intensity_image=resampled_yap, properties=["label", "intensity_mean", "area"])
    props_cytoplasm = regionprops_table(label_image=cytoplasm, intensity_image=resampled_yap, properties=["label", "intensity_mean", "area"])


    # Transform regionprops_table into a Dataframe to process it using Pandas
    df_nuclei = pd.DataFrame(props_nuclei)
    df_cytoplasm = pd.DataFrame(props_cytoplasm)

    # Renaming columns
    df_nuclei.rename(columns={'intensity_mean': 'intensity_mean_nuclei', 'area': 'area_nuclei'}, inplace=True)
    df_cytoplasm.rename(columns={'intensity_mean': 'intensity_mean_cyto', 'area': 'area_cyto'}, inplace=True)

    # Merge dataframes on label
    merged_df = pd.merge(df_nuclei, df_cytoplasm, on='label')

    # Calculate nuclei/cytoplasm ratio of mean intensity of YAP signal
    merged_df['nuclei_cyto_ratio'] = merged_df['intensity_mean_nuclei'] / merged_df['intensity_mean_cyto']

    # Add extracted ID info to the dataframe
    merged_df.insert(0, 'experiment_id', experiment_id)
    merged_df.insert(1, 'mouse_id', mouse_id)
    merged_df.insert(2, 'treatment_id', treatment_id)
    merged_df.insert(3, 'replica_id', replica_id)

    dataframes.append(merged_df)



100%|██████████| 117/117 [53:08<00:00, 27.26s/it]


In [4]:
# Concatenate list of dataframes into a single final df
final_df = pd.concat(dataframes)

# Create a 'results' folder in the root directory
results_folder = 'results'

try:
    os.makedirs(results_folder)
    print(f"'{results_folder}' folder created successfully.")
except FileExistsError:
    print(f"'{results_folder}' folder already exists.")

# Save the df containing per_label results into a CSV file
final_df.to_csv('./results/per_label_results.csv')

'results' folder created successfully.


In [11]:
# Grouping by the specified columns and calculating the mean of intensity_mean and area_filled
grouped_df = final_df.groupby(['experiment_id', 'mouse_id', 'treatment_id', 'replica_id']).agg({
    'intensity_mean_nuclei': 'mean',
    'area_nuclei': 'mean',
    'intensity_mean_cyto': 'mean',
    'area_cyto': 'mean',
    'nuclei_cyto_ratio': 'mean'
}).reset_index()

grouped_df.to_csv('./results/average_nuclear_cyto_intensity.csv')

In [12]:
grouped_df

Unnamed: 0,experiment_id,mouse_id,treatment_id,replica_id,intensity_mean_nuclei,area_nuclei,intensity_mean_cyto,area_cyto,nuclei_cyto_ratio
0,RY20240607,4541,BCM,1,23.084674,277.590613,18.244725,663.318123,1.265822
1,RY20240607,4541,BCM,2,19.472734,235.555950,18.681109,635.859680,1.073264
2,RY20240607,4541,BCM,3,16.287965,258.058480,15.316726,705.067251,1.118118
3,RY20240607,4541,BCM,4,17.615791,246.101266,16.769120,601.620253,1.242694
4,RY20240607,4541,BCM,5,21.025104,247.781818,18.165439,574.047934,1.175974
...,...,...,...,...,...,...,...,...,...
109,RY20240619,4544,PGE2_60min,2,32.546492,290.178771,24.159847,688.685289,1.393834
110,RY20240619,4544,PGE2_60min,3,31.838044,264.732346,27.387265,659.861048,1.200320
111,RY20240619,4544,PGE2_60min,4,26.644534,243.425234,25.208600,627.757009,1.058343
112,RY20240619,4544,PGE2_60min,5,34.421425,266.374833,28.166739,631.215529,1.260916


In [8]:
# Create the box plot
fig = px.box(
    grouped_df,
    x='treatment_id',
    y='intensity_mean_nuclei',
    color='treatment_id',
    points="all",  # Display all points
    hover_data=['experiment_id', 'mouse_id', 'replica_id'],
    title='YAP Nuclear Intensity Average by Treatment ID'
)

# Show the plot
fig.show()

In [9]:
# Create the box plot
fig = px.box(
    grouped_df,
    x='treatment_id',
    y='intensity_mean_cyto',
    color='treatment_id',
    points="all",  # Display all points
    hover_data=['experiment_id', 'mouse_id', 'replica_id'],
    title='YAP Cytoplasmic Intensity Average by Treatment ID'
)

# Show the plot
fig.show()

In [13]:
# Create the box plot
fig = px.box(
    grouped_df,
    x='treatment_id',
    y='nuclei_cyto_ratio',
    color='treatment_id',
    points="all",  # Display all points
    hover_data=['experiment_id', 'mouse_id', 'replica_id'],
    title='YAP Nuclear/Cytoplasmic signal Ratio by Treatment ID'
)

# Show the plot
fig.show()