In [None]:
from pathlib import Path
import glob
import os
from tqdm import tqdm
import numpy as np
import pyclesperanto_prototype as cle
import pandas as pd
from skimage.filters import gaussian, threshold_otsu
from skimage import measure
from scipy.ndimage import binary_fill_holes
import plotly.express as px
from utils_stardist import get_gpu_details, list_images, read_image, maximum_intensity_projection

get_gpu_details()

In [None]:
# Copy the path where your images are stored, you can use absolute or relative paths to point at other disk locations
directory_path = Path("../raw_data/nihanseb_organoid")

# Define the nuclei and markers of interest channel order ('Remember in Python one starts counting from zero')
nuclei_channel = 2

# Image size reduction (downsampling) to improve processing times (slicing, not lossless compression)
# Now, in addition to xy, you can downsample across your z-stack
slicing_factor_xy = 4 # Use 2 or 4 for downsampling in xy (None for lossless)
slicing_factor_z = None # Use 2 to select 1 out of every 2 z-slices

# Define the channels you want to analyze using the following structure:
# markers = [(channel_name, channel_nr),(..., ...)]
# Remember in Python one starts counting from 0, so your first channel will be 0
# i.e. markers = [("ARSA", 0), ("MBP", 1)]
markers = [("ARSA", 0), ("MBP", 1)]

# Fill holes inside the resulting organoid mask? Set to False if you want to keep the holes
fill_holes = True

# Dilate your labels (set value to add 1, 2, 3 pixels around them)
dilation_radius = 0

# Iterate through the .czi and .nd2 files in the raw_data directory
images = list_images(directory_path)

images

In [None]:
# Extract the experiment name from the data directory path
experiment_id = directory_path.name

# Create a 'results' folder in the root directory
results_folder = Path("results") / experiment_id

try:
    os.makedirs(results_folder)
    print(f"'{results_folder}' folder created successfully.")
except FileExistsError:
    print(f"'{results_folder}' folder already exists.")

for image in tqdm (images):

    # Read image, apply slicing if needed and return filename and img as a np array
    img, filename = read_image(image, slicing_factor_xy, slicing_factor_z)

    # Generate maximum intensity projection 
    img_mip = maximum_intensity_projection(img)

    # Generate mean intensity projection
    img_mean = np.mean(img, axis=1)

    # Extract nuclei channel to create an organoid mask based on this
    nuclei_img_mip = img_mip[nuclei_channel]

    # Blur nuclei to "fuse" them
    blurred_nuclei = gaussian(nuclei_img_mip, sigma=10)

    # Extract a threshold separating background from foreground (nuclei) using Otsu and generate a mask
    organoid_mask = blurred_nuclei > threshold_otsu(blurred_nuclei)

    if fill_holes:

        # Close empty holes surrounded by True pixels
        organoid_mask = binary_fill_holes(organoid_mask)

    # Label connected components to filter out small ones later on
    organoid_labels = measure.label(organoid_mask)

    # Dilate labels to cover surrounding areas
    organoid_labels = cle.dilate_labels(organoid_labels, radius=dilation_radius)
    organoid_labels = cle.pull(organoid_labels)

    # Initialize an empty list to hold the extracted dataframes on a per channel basis
    props_list = []

    # Create a dictionary containing all image descriptors
    descriptor_dict = {
                "filename": filename,
                "fill_holes": fill_holes,
                "dilation_radius":dilation_radius,
                "slicing_factor_xy": slicing_factor_xy
                }

    for channel_name, ch_nr in tqdm(markers):

        # Extract intensity information from each marker channel
        props = measure.regionprops_table(label_image=organoid_labels,
                                intensity_image=img_mip[ch_nr],
                                properties=["label", "area", "intensity_mean"])
        
        # Convert to dataframe
        props_df = pd.DataFrame(props)

        # Rename intensity_mean column to indicate the specific image
        props_df.rename(columns={"intensity_mean": f"{channel_name}_avg_int"}, inplace=True)

        # Append each props_df to props_list
        props_list.append(props_df)

    # Initialize the df with the first df in the list
    props_df = props_list[0]
    # Start looping from the second df in the list
    for df in props_list[1:]:
        props_df = props_df.merge(df, on=("label","area"))

    # Add each key-value pair from descriptor_dict to props_df at the specified position
    insertion_position = 0    
    for key, value in descriptor_dict.items():
        props_df.insert(insertion_position, key, value)
        insertion_position += 1  # Increment position to maintain the order of keys in descriptor_dict

    # Sort by area in descending order
    props_df = props_df.sort_values(by='area', ascending=False)

    # Save the df containing per_label results into a CSV file
    props_df.to_csv(results_folder / f'{filename}_per_label_avg_int.csv')

# Get all CSV files in the folder
csv_files = glob.glob(os.path.join(results_folder, "*.csv"))

# Read and concatenate all CSV files
all_dataframes = [pd.read_csv(file) for file in csv_files]
combined_df = pd.concat(all_dataframes, ignore_index=True)

# Save the concatenated DataFrame to a new CSV file
output_path = os.path.join(results_folder, "BP_per_filename_summary.csv")
combined_df.to_csv(output_path, index=False)

print(f"All CSV files concatenated and saved to {output_path}")