In [None]:
import sys
from pathlib import Path
# Add src directory to path to import organoid_analysis package
sys.path.insert(0, str(Path('..') / 'src'))
import os
import pandas as pd
import numpy as np
from cellpose import models, core, io
import pyclesperanto_prototype as cle 
from skimage.measure import regionprops_table
from tifffile import imwrite, imread
from organoid_analysis.utils import list_images, read_image, extract_scaling_metadata, segment_organoids_from_cp_labels, extract_organoid_stats_and_merge

io.logger_setup() # run this to get printing of progress

#Check if notebook has GPU access
if core.use_gpu()==False:
  raise ImportError("No GPU access, change your runtime")

#Load pre-trained Cellpose models
model = models.CellposeModel(gpu=True, model_type="cyto3") 

In [None]:
# Define the markers you wish to analyze and its cellular compartment (i.e. cell or membrane)
# ("channel_name", position, location)
markers = [("Occludin_RFP", 0, "membrane"), ("Claudin_FITC", 1, "membrane"), ("Occludin_RFP", 0, "cell")]

In [None]:
# Copy the path where your images are stored, you can use absolute or relative paths to point at other disk locations
directory_path = Path(r"\\forskning.it.ntnu.no\ntnu\mh\ikom\cmic_konfokal\lusie.f.kuraas\PhD\Nikon Spinning Disc\20260114_T7_2microns")
#directory_path = Path("./raw_data")

# Iterate through the .czi and .nd2 files in the directory
images = list_images(directory_path)

# Image size reduction (downsampling) to improve processing times (slicing, not lossless compression)
slicing_factor_xy = None # Use 2 or 4 for downsampling in xy (None for lossless)

images

In [None]:
folders_to_create = []

# Extract experiment_id from data folder Path object
experiment_id = Path(directory_path).name

# Create a 'results' folder in the root directory
results_folder = Path("results") / experiment_id
folders_to_create.append(results_folder)

# Create a 'cellpose_labels' folder inside the data folder
cellpose_folder = directory_path / "cellpose_labels"
folders_to_create.append(cellpose_folder)

for path in folders_to_create:
    try:
        os.makedirs(path)
        print(f"'{path}' folder created successfully.")
    except FileExistsError:
        print(f"'{path}' folder already exists.")

In [None]:
# Loop through all .nd2 files in the directory (each file contains multiple xy positions)
for image in images:

    # Read image, apply slicing if needed and return filename and img as a np array
    img, filename = read_image(image, slicing_factor_xy)

    # Extract well_id from filename
    well_id = filename.split("_")[0]

    # Check if results are already present in the results folder and skip the rest of the loop if so
    # Define the per_well_id .csv name
    csv_name = f"{well_id}_per_cell_results.csv"
    
    # Check if there is a previous copy of the .csv file at results_folder / csv_name 
    csv_path = results_folder / csv_name
    # Skip to the next image if the results already exist
    if csv_path.is_file():
        print(f"Skipping {well_id} well analysis: Results already found at: {csv_path}")
        continue  # Skip to the next image if the results already exist

    # Extract x,y,z scaling from .nd2 file metadata in order feed the Z pixel size / XY pixel size ratio into Cellpose
    pixel_size_x, pixel_size_y, voxel_size_z = extract_scaling_metadata(image)

    # Calculate anisotropy ratio:
    z_to_xy_ratio = voxel_size_z / pixel_size_x

    # Empty list to hold per_position final Dataframes
    per_pos_dfs = []

    # Loop through all the positions inside the .nd2 file
    for position in range(img.shape[0]):

        print(f"Analyzing multiposition index {position}")
        # Generate name for Cellpose prediction for current well and position
        cellpose_filename = f"{well_id}_{position}"

        # Check if Cellpose prediction is ready, if that is the case load it for next steps
        # Construct path to store/check for cytoplasm labels
        cellpose_prediction_path = cellpose_folder / f"{cellpose_filename}.tif"

        # Open one of the multipositions in the img file
        single_img = img[position]
        # Input shape for next step has to be C, Z, Y, X
        single_img = single_img.transpose(1, 0, 2, 3)

        # Check if the prediction has already been generated and load it
        if cellpose_prediction_path.exists():
            cytoplasm_labels = imread(cellpose_prediction_path)
        
        else: # cytoplasm labels prediction file not present

            # Keep only 3 meaningful input fluorescence channels to mimick Cellpose GUI normalization
            # 0 membrane marker, 2 nuclei, 3 Cellmask (Numpy 0-index)
            cellpose_input = single_img[[0, 2, 3],:,:,:] # it becomes 0 membrane marker, 1 nuclei, 2 Cellmask (Numpy 0-index)

            # Segment cells with Cellpose using Cellmask (3) and DAPI (2) channel as inputs 
            # (corrected for anisotropy and cell diameter)

            cytoplasm_labels, _, _ = model.eval(
                cellpose_input,                # numpy array
                channels=[3, 2],               # channels=[cyto_chan, nuclear_chan] # Cellmask (ch3, index 2), nuclei (ch2, index 1)
                diameter=20,                   # in pixels (XY), checked with GUI
                do_3D=True,
                anisotropy=z_to_xy_ratio,      # Z pixel size / XY pixel size
                normalize=True,
                flow_threshold=0.4,
                cellprob_threshold=0.0,
                min_size=15,
            )
            del cellpose_input

            # Save prediction as .tif to avoid extra Cellpose computations in next rounds

            # Save cytoplasm labels as .tif
            imwrite(cellpose_prediction_path, cytoplasm_labels)

        # Create a dictionary containing all image descriptors
        # Add multiposition index during BP
        descriptor_dict = {
                    "filename": filename,
                    "well_id": well_id,
                    "multiposition_id": position,
                    }

        props_list = [] # Empty list to hold all per marker feature dataframes
        membrane_labels = None # Variable to check in order to compute membrane_labels just once, avoid repeated GPU ops

        # Loop through markers and extract 
        for marker_name, ch_nr, location in markers:
            print(f"Analyzing channel: {marker_name} in {location} ...")

            # Region cell or region membrane, generate membrane and extract info from that location if needed
            if location == "cell":
                props = regionprops_table(label_image=cytoplasm_labels,
                                        intensity_image=single_img[ch_nr],
                                        properties=[
                                            "label",
                                            "area",
                                            "intensity_mean",
                                            "intensity_min",
                                            "intensity_max",
                                            "intensity_std",
                                        ],
                                    )
                
            elif location == "membrane":
                # Check if membrane_labels have already been precomputed, otherwise generate
                if membrane_labels is None:
                    # Generate membrane by keeping a single pixel edge (might modify later)
                    membrane_labels = cle.reduce_labels_to_label_edges(cytoplasm_labels)
                    membrane_labels = cle.pull(membrane_labels)

                props = regionprops_table(label_image=membrane_labels,
                                        intensity_image=single_img[ch_nr],
                                        properties=[
                                            "label",
                                            "area",
                                            "intensity_mean",
                                            "intensity_min",
                                            "intensity_max",
                                            "intensity_std",
                                        ],
                                    )
            
            # Convert to dataframe
            props_df = pd.DataFrame(props)

            # Rename intensity_mean column to indicate the specific image
            prefix = f"{location}_{marker_name}"

            rename_map = {
                "area": f"{location}_area",
                "intensity_mean": f"{prefix}_mean_int", # concentration proxy
                "intensity_min":  f"{prefix}_min_int",
                "intensity_max":  f"{prefix}_max_int",
                "intensity_std":  f"{prefix}_std_int",
            }

            props_df.rename(columns=rename_map, inplace=True)

            # Max / mean ratio (puncta vs diffuse signal)
            props_df[f"{prefix}_max_mean_ratio"] = (props_df[f"{prefix}_max_int"] /props_df[f"{prefix}_mean_int"].replace(0, np.nan))
            # Total marker content per cell
            props_df[f"{prefix}_sum_int"] = (props_df[f"{prefix}_mean_int"] * props_df[f"{location}_area"])

            # Append each props_df to props_list
            props_list.append(props_df)

        # Initialize the df with the first df in the list
        props_df = props_list[0]
        # Start looping from the second df in the list
        for df in props_list[1:]:
            props_df = props_df.merge(df, on="label")

        # Add each key-value pair from descriptor_dict to props_df at the specified position
        insertion_position = 0    
        for key, value in descriptor_dict.items():
            props_df.insert(insertion_position, key, value)
            insertion_position += 1  # Increment position to maintain the order of keys in descriptor_dict

        # Obtain rough single organoid outlines by fusing Cellpose labels and dilation, merging, erosion morphological operations
        mip_labels, organoid_labels = segment_organoids_from_cp_labels(cytoplasm_labels)

        # Obtain final stats (including per cell and per organoid)
        final_df = extract_organoid_stats_and_merge(mip_labels, organoid_labels, props_df)

        # Append each position results and concatenate later to store on a per well_id(img) basis
        per_pos_dfs.append(final_df)

    # Concatenate all Dataframes in the per_pos_dfs list
    df_well_id = pd.concat(per_pos_dfs, ignore_index=True)

    # Save to CSV
    df_well_id.to_csv(csv_path, index=False)  