In [1]:
import pyclesperanto_prototype as cle
import apoc
from pathlib import Path
import tifffile
import napari
import os
import numpy as np
import pandas as pd
import plotly.express as px
from utils_stardist import get_gpu_details, list_images, read_image, maximum_intensity_projection, simulate_cytoplasm_chunked_3d, simulate_cell_chunked_3d, simulate_cytoplasm, simulate_cell

get_gpu_details()


cle.select_device('RTX')

Device name: /device:GPU:0
Device type: GPU
GPU model: device: 0, name: NVIDIA GeForce RTX 4090 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


<NVIDIA GeForce RTX 4090 Laptop GPU on Platform: NVIDIA CUDA (1 refs)>

In [2]:
# Copy the path where your images are stored, you can use absolute or relative paths to point at other disk locations
# At this point you should have generate the nuclei label predictions in advance
directory_path = Path("../raw_data/test_data")

# Define the channels for which you want to train the ObjectClassifier using the following structure:
# markers = [(channel_name, channel_nr, cellular_location),(..., ..., ...)]
# cellular locations can be "nucleus", "cytoplasm" or "cell" (cell being the sum volume of nucleus and cytoplasm)
# Remember in Python one starts counting from 0, so your first channel will be 0
# i.e. markers = [("ki67", 0, "nucleus"), ("neun", 1, "cell"), ("calbindin", 2, "cytoplasm")]

markers = [("ki67", 0, "nucleus"), ("neun", 1, "cell"), ("calbindin", 2, "cytoplasm")]

# Iterate through the .czi and .nd2 files in the raw_data directory
images = list_images(directory_path)

images

['..\\raw_data\\test_data\\HI 1  Contralateral Mouse 8  slide 6 Neun Red Calb Green KI67 Magenta 40x technical replica 1.czi',
 '..\\raw_data\\test_data\\HI 1  Ipsilateral Mouse 8  slide 6 Neun Red Calb Green KI67 Magenta 40x technical replica 1.czi']

In [3]:
# Explore each image to analyze (0 defines the first image in the directory)
image = images[0]

# Image size reduction (downsampling) to improve processing times (slicing, not lossless compression)
# Now, in addition to xy, you can downsample across your z-stack
# Try and use the same factors that you applied during your nuclei label prediction and analysis
slicing_factor_xy = None # Use 2 or 4 for downsampling in xy (None for lossless)
slicing_factor_z = None # Use 2 to select 1 out of every 2 z-slices

# Define the nuclei and markers of interest channel order ('Remember in Python one starts counting from zero')
nuclei_channel = 3

# Segmentation type ("2D" or "3D"). 
# 2D takes a z-stack as input, performs MIP (Maximum Intensity Projection) and predicts nuclei from the resulting projection (faster, useful for single layers of cells)
# 3D is more computationally expensive. Predicts 3D nuclear volumes, useful for multilayered structures
segmentation_type = "3D"

# Nuclear segmentation model type ("Stardist")
# Choose your Stardist fine-tuned model (model_name) from stardist_models folder
model_name = "MEC0.1"

# Type the ROI name you wish to load (by default it is "full_image")
# It is recommended to traom the ObjectClassifier based on the full imag
roi_name = "full_image"

# Choose the channel you want to use to train the ObjectClassifier for:
marker_name = "neun"

# Read image, apply slicing if needed and return filename and img as a np array
img, filename = read_image(image, slicing_factor_xy, slicing_factor_z)
# Construct ROI and nuclei predictions paths from directory_path above
roi_path = directory_path / "ROIs"
nuclei_preds_path =  directory_path / "nuclei_preds" / segmentation_type / model_name



Image analyzed: HI 1  Contralateral Mouse 8  slide 6 Neun Red Calb Green KI67 Magenta 40x technical replica 1
Original Array shape: (4, 14, 3803, 2891)
Compressed Array shape: (4, 14, 3803, 2891)


In [4]:
# Retrieve the first and second values (channel and location) of the corresponding tuple in markers
for item in markers:
    if item[0] == marker_name:
        marker_channel = item[1]
        location = item[2]
        break  # Stop searching once the marker is found

# Close any previous Napari instances that are open, ignore WARNING messages
try:
    viewer.close()

except NameError:
    pass

except RuntimeError:
    pass

if segmentation_type == "3D":

    # Load Napari viewer
    viewer = napari.Viewer(ndisplay=2)
    # Slice marker stack
    marker_img = img[marker_channel]
    viewer.add_image(marker_img)

elif segmentation_type == "2D":

    # Generate maximum intensity projection 
    img = maximum_intensity_projection(img)
    # Load Napari viewer
    viewer = napari.Viewer(ndisplay=2)
    # Slice marker stack
    marker_img = img[marker_channel]
    viewer.add_image(marker_img)

In [5]:
# Load nuclei labels and transform them into cell or cytoplasm labels if necessary
try:
    # Read the nuclei predictions per ROI
    labels = tifffile.imread(nuclei_preds_path / roi_name / f"{filename}.tiff")
    print(f"Pre-computed nuclei labels found for {filename}")

except FileNotFoundError:
    print(f"Nuclei labels for filename: {filename} ROI: {roi_name} not found. Please generate them using 002_BP_Predict_nuclei_labels.ipynb")

if location == "cytoplasm":
    if segmentation_type == "3D":
        print(f"Generating {segmentation_type} cytoplasm labels for: {marker_name}")
        # Simulate a cytoplasm by dilating the nuclei and subtracting the nuclei mask afterwards
        labels = simulate_cytoplasm_chunked_3d(labels, dilation_radius=2, erosion_radius=0, chunk_size=(1, 1024, 1024))

    elif segmentation_type == "2D":
        print(f"Generating {segmentation_type} cytoplasm labels for: {marker_name}")
        # Simulate a cytoplasm by dilating the nuclei and subtracting the nuclei mask afterwards
        labels = simulate_cytoplasm(labels, dilation_radius=2, erosion_radius=0)

elif location == "cell":
    if segmentation_type == "3D":
        print(f"Generating {segmentation_type} cell labels for: {marker_name}")
        # Simulate a cell volume by dilating the nuclei 
        labels = simulate_cell_chunked_3d(labels, dilation_radius=2, erosion_radius=0, chunk_size=(1, 1024, 1024))

    elif segmentation_type == "2D":
        print(f"Generating {segmentation_type} cell labels for: {marker_name}")
        # Simulate a cytoplasm by dilating the nuclei and subtracting the nuclei mask afterwards
        labels = simulate_cell(labels, dilation_radius=2, erosion_radius=0)

viewer.add_labels(labels, opacity=0.3)

Pre-computed nuclei labels found for HI 1  Contralateral Mouse 8  slide 6 Neun Red Calb Green KI67 Magenta 40x technical replica 1
Generating 3D cell labels for: neun


<Labels layer 'labels' at 0x1e9dc8f4a60>

<h2>Data Annotation in Napari</h2>

Create a new Labels layers and draw on top of each label according to the class you want to assign to them. In this example we have cells negative for Neun (label 1), low Neun (label 2) and high Neun cells (label 3). Once you are done proceed to run the next cells.

<video controls>
  <source src="../assets/apoc_oc_annotation.mp4" type="video/mp4">
</video>

In [154]:
# Create folder structure to store resulting Object Classifiers
apoc_path = Path("APOC_ObjectClassifiers") / directory_path.name
try:
    os.makedirs(apoc_path)
except FileExistsError:
    pass

# Define features on which the classifier will be trained on (see train -help for full list of features)
features = 'min_intensity,max_intensity,sum_intensity,mean_intensity,standard_deviation_intensity'

cl_filename = f"./{apoc_path}/ObjClass_{segmentation_type}_ch{marker_channel}.cl"

# Create an object classifier
apoc.erase_classifier(cl_filename) # Delete it if it was existing before
classifier = apoc.ObjectClassifier(cl_filename)

If you are not happy with the classifier go back to Napari and edit the "Labels" layer with a few more annotations, then run the cells below to fetch your modifications, train the classifier again and display the results.

In [157]:
# Collect user input from Napari and train/retrain the ObjectClasifier based on it
user_input = user_input = viewer.layers["Labels"].data

# Train or retrain your classifier
classifier.train(features, labels, user_input, marker_img, continue_training=True)

# Print the weights of each feature in the decision process
classifier.feature_importances()



{'min_intensity': 0.11734732285229599,
 'max_intensity': 0.08846625807667363,
 'sum_intensity': 0.23350138516650665,
 'mean_intensity': 0.38762326688261955,
 'standard_deviation_intensity': 0.1730617670219043}

In [6]:
apoc_path = Path("APOC_ObjectClassifiers") / directory_path.name
cl_filename = f"./{apoc_path}/ObjClass_{segmentation_type}_ch{marker_channel}.cl"

# Reload the classifier from disc to use the latest version
classifier = apoc.ObjectClassifier(cl_filename)

# Determine object classification
result = classifier.predict(labels, marker_img)

# Show the result
viewer.add_labels(result)

<Labels layer 'result' at 0x1e9d186a170>

In [7]:
# Calculate the average intensity of a marker in each of the classified populations
# Treat all 'labels' holding the same value as a mask

# Extract the number of different populations ignoring the background
unique_masks = np.unique(result).tolist()
unique_masks.remove(0)  # Remove background (0)

# Initialize an empty dictionary to store the average intensity values for each mask of labels
mean_intensities = {}

# Calculate the average intensity of a marker in each of the classified populations
for mask_value in unique_masks:
    mask = cle.pull(result) == mask_value  # Create a boolean mask
    mean_intensity = marker_img[mask].mean() if np.any(mask) else 0 # Calculate average intensity
    mean_intensities[mask_value] = mean_intensity # Store the values

# Sort dictionary from min to max mean_intensity
sorted_mean_intensities = dict(sorted(mean_intensities.items(), key=lambda item: item[1]))

print(sorted_mean_intensities)

{1: 5.309164698861625, 2: 42.170361358465414, 3: 113.86806458350068}


In [None]:
# Faster alternative with regionprops and pandas
from skimage import measure

# Calculate the average intensity of a marker in each of the classified populations
regionprops = measure.regionprops_table(cle.pull(result), marker_img, properties=('label', 'mean_intensity'))

# Transform the result into a Dataframe and sort by mean_intensity
regionprops_df = pd.DataFrame(regionprops)
regionprops_df = regionprops_df.sort_values(by='mean_intensity', ascending=True).reset_index(drop=True)

# Extract the order of the labels according to mean_intensity values
sorted_mean_intensities = regionprops_df['label'].to_list()

In [26]:
# Assign subpopulation suffixes based on the number of subpopulations (masks > 0) present
if len(sorted_mean_intensities) == 1:
    subpopulations = ['']

elif len(sorted_mean_intensities) == 2:
    subpopulations = ['low', 'high']

elif len(sorted_mean_intensities) == 3:
    subpopulations = ['low', 'med', 'high']

elif len(sorted_mean_intensities) > 3:
    subpopulations = list(range(1, len(sorted_mean_intensities) + 1))

print(subpopulations)


['low', 'med', 'high']


In [None]:
# Assign subpopulation suffix to each label according to the mean_intensity values (from min to max)
regionprops_df['subpopulation'] = subpopulations

regionprops_df

Unnamed: 0,label,mean_intensity,subpopulation
0,2,0.0,low
1,1,42.170361,med
2,3,113.868065,high


In [None]:
for label in regionprops_df['label']:

    # Retrieve subpopulation definition
    subpopulation = regionprops_df.loc[regionprops_df['label'] == label, 'subpopulation'].iloc[0]

    print(label)
    print(subpopulation)

2
low
1
med
3
high
