<h3>Nuclear segmentation of 3D stacks of intestinal organoids, nuclei labeling using Voronoi-Otsu. Measurement of YAP signal in the nuclei</h3>

In [15]:
from utils import extract_scaling_metadata
import os
from tqdm import tqdm
from pathlib import Path
import czifile
import numpy as np
from skimage.measure import regionprops_table
import pandas as pd
import pyclesperanto_prototype as cle
import plotly.express as px

cle.select_device("RTX")

In [2]:
directory_path = Path("./raw_data/")
images = []

# Iterate through the lsm files in the directory
for file_path in directory_path.glob("*.czi"):
    # Remove Control and Isotype stainings from the analysis
    if "Control" and "Isotype" not in file_path.stem:
        images.append(str(file_path))
    
len(images)

117

In [3]:
# Initialize an empty list to store per image dataframe
dataframes = []

for image in tqdm(images):

    # Read path storing raw image and extract filename
    file_path = Path(image)
    filename = file_path.stem

    # Read the image file and remove singleton dimensions
    img = czifile.imread(image)
    img = img.squeeze()

    # Extract experiment, mouse, treatment and replica ids
    experiment_id = filename.split(" ")[0]
    mouse_id = filename.split(" ")[1]
    treatment_id = filename.split(" ")[2]
    replica_id = filename.split(" ")[-1]

    # Image size reduction to improve processing times (slicing, not lossless compression)
    slicing_factor = None # Use 2 or 4 for compression (None for lossless)

    # Extract the stack containing the UEA-1 (0), YAP (1), nuclei (2) and BCAT (3) channels.
    uea1_stack = img[0, :, ::slicing_factor, ::slicing_factor]
    yap_stack = img[1, :, ::slicing_factor, ::slicing_factor]
    nuclei_stack = img[2, :, ::slicing_factor, ::slicing_factor]
    bcat_stack = img[3, :, ::slicing_factor, ::slicing_factor]

    # Extract x,y,z scaling from .czi file metadata in order to make data isotropic
    scaling_x_um, scaling_y_um, scaling_z_um = extract_scaling_metadata(file_path)

    # Resample nuclei stack for Voronoi-Otsu labeling
    resampled_nuclei = cle.scale(nuclei_stack, factor_x=scaling_x_um, factor_y=scaling_y_um, factor_z=scaling_z_um, auto_size=True)
    background_subtracted = cle.top_hat_box(resampled_nuclei, radius_x=5, radius_y=5, radius_z=5)
    segmented = cle.voronoi_otsu_labeling(background_subtracted, spot_sigma=2, outline_sigma=2)
    del background_subtracted

    # Resample yap to perform intensity measurment within the nuclei labels
    resampled_yap = cle.scale(yap_stack, factor_x=scaling_x_um, factor_y=scaling_y_um, factor_z=scaling_z_um, auto_size=True)

    # Pull OCL arrays from GPU memory and transform into numpy arrays to be fed into regionprops extraction by skimage        
    nuclei_labels_3d = cle.pull(segmented)
    resampled_yap = cle.pull(resampled_yap)

    # Extract regionprops
    props = regionprops_table(label_image=nuclei_labels_3d, intensity_image=resampled_yap, properties=["label", "intensity_mean", "area_filled"])

    # Construct a dataframe for the extracted props
    df = pd.DataFrame(props)

    # Add extracted id info to the dataframe
    df['experiment_id'] = experiment_id
    df['mouse_id'] = mouse_id
    df['treatment_id'] = treatment_id
    df['replica_id'] = replica_id

    dataframes.append(df)



100%|██████████| 117/117 [02:38<00:00,  1.36s/it]


In [6]:
# Concatenate list of dataframes into a single final df
final_df = pd.concat(dataframes)

# Create a 'results' folder in the root directory
results_folder = 'results'

try:
    os.makedirs(results_folder)
    print(f"'{results_folder}' folder created successfully.")
except FileExistsError:
    print(f"'{results_folder}' folder already exists.")

# Save the df containing per_label results into a CSV file
final_df.to_csv('./results/per_label_results.csv')

'results' folder created successfully.


In [9]:
# Grouping by the specified columns and calculating the mean of intensity_mean and area_filled
grouped_df = final_df.groupby(['experiment_id', 'mouse_id', 'treatment_id', 'replica_id']).agg({
    'intensity_mean': 'mean',
    'area_filled': 'mean'
}).reset_index()

grouped_df.to_csv('./results/average_nuclear_intensity.csv')

In [10]:
grouped_df

Unnamed: 0,experiment_id,mouse_id,treatment_id,replica_id,intensity_mean,area_filled
0,RY20240607,4541,BCM,1,22.046294,1105.362069
1,RY20240607,4541,BCM,2,18.625787,1095.475436
2,RY20240607,4541,BCM,3,15.034235,1495.311927
3,RY20240607,4541,BCM,4,17.584209,1103.004975
4,RY20240607,4541,BCM,5,18.951698,1123.169435
...,...,...,...,...,...,...
109,RY20240619,4544,PGE2_60min,2,30.450087,936.651562
110,RY20240619,4544,PGE2_60min,3,30.682754,906.425878
111,RY20240619,4544,PGE2_60min,4,25.267632,967.930661
112,RY20240619,4544,PGE2_60min,5,32.678753,939.852286


In [14]:
# Create the box plot
fig = px.box(
    grouped_df,
    x='treatment_id',
    y='intensity_mean',
    color='treatment_id',
    points="all",  # Display all points
    hover_data=['experiment_id', 'mouse_id', 'replica_id'],
    title='YAP Nuclear Intensity Average by Treatment ID'
)

# Show the plot
fig.show()