In [11]:
from pathlib import Path
import glob
import os
from tqdm import tqdm
import numpy as np
import tifffile
import pandas as pd
from skimage import measure
from skimage.transform import resize
from scipy.ndimage import binary_fill_holes
from utils import get_gpu_details, list_images, read_image

get_gpu_details()

Device name: /device:GPU:0
Device type: GPU
GPU model: device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:01:00.0, compute capability: 8.9


In [12]:
# Copy the path where your images are stored, you can use absolute or relative paths to point at other disk locations
directory_path = Path("./raw_data/nihanseb_organoid")

# Image size reduction (downsampling) to improve processing times (slicing, not lossless compression)
# Now, in addition to xy, you can downsample across your z-stack
slicing_factor_xy = 2 # Use 2 or 4 for downsampling in xy (None for lossless)
slicing_factor_z = None # Use 2 to select 1 out of every 2 z-slices

# Define the nuclei and markers of interest channel order ('Remember in Python one starts counting from zero')
nuclei_channel = 2

# Fill holes inside the resulting organoid mask? Set to False if you want to keep the holes
fill_holes = True

# Analyze intensity within the 3D volume of the ROI, or perform a mean or max intensity projection of the marker channel (2D)
analysis_type = "2D" #"2D" or "3D"

# If 2D analysis type, Choose projection type (mean intensity or max intensity)
# Mean intensity projection would be the equivalent of analyzing avg_intensity within the 3D volume
projection_type = "mean" # "mean" or "max"

# Stardist model name if nuclei labels predictions are present
model_name = None

# Iterate through the .czi and .nd2 files in the raw_data directory
images = list_images(directory_path)

images

['raw_data\\nihanseb_organoid\\MLD 1.8 block4 ARSA MBP batch 1 40x.nd2',
 'raw_data\\nihanseb_organoid\\MLD 2.2 block7 MBP MAP2 slide 7 batch 2 40x.nd2']

In [13]:
# Define the channels you want to analyze using the following structure:
# markers = [(channel_name, channel_nr, min_max_range),(..., ...)]
# Remember in Python one starts counting from 0, so your first channel will be 0
# min_max range defines the pixel intensity range within which a cell is considered positive for a marker
# i.e. markers = [("ARSA", 0, (0, 65536)), ("MBP", 1, (0, 65536))]
markers = [("ARSA", 0, (110, 65536)), ("MBP", 1, (110, 65536))]

In [None]:
# Extract the experiment name from the data directory path
experiment_id = directory_path.name

# Create a 'results' folder in the root directory
results_folder = Path("results") / experiment_id / "avg_int"

# Construct ROI and nuclei predictions paths from directory_path above
roi_path = directory_path / "ROIs"
# nuclei_preds_path =  directory_path / "nuclei_preds" / analysis_type / model_name

# Check for presence of ROIs
try:
    roi_names = [folder.name for folder in roi_path.iterdir() if folder.is_dir()]

except FileNotFoundError:
    roi_names = ["auto_generated_ROI"]
    print("No manually defined ROI found, generating ROI automatically...")

try:
    os.makedirs(results_folder)
    print(f"'{results_folder}' folder created successfully.")
except FileExistsError:
    print(f"'{results_folder}' folder already exists.")

if analysis_type == "3D":
    # Set projection_type variable to None
    projection_type = None

for image in tqdm (images):

    # Read image, apply slicing if needed and return filename and img as a np array
    img, filename = read_image(image, slicing_factor_xy, slicing_factor_z)

    # Generate maximum or mean intensity projection
    if projection_type == "max":
        img_projection = np.max(img, axis=1)
    elif projection_type == "mean":
        img_projection = np.mean(img, axis=1)

    for roi_name in roi_names:

        print(f"\nAnalyzing ROI: {roi_name}")

        # Initialize an empty list to hold the extracted dataframes on a per channel basis
        props_list = []

        # Read the user defined ROIs, in case of missing ROI implement logic for automatic segmentation
        try:
            # Read previously defined ROIs
            organoid_mask = tifffile.imread(roi_path / roi_name / f"{filename}.tiff")

        except FileNotFoundError:
            # Add logic to automatically generate an organoid mask
            pass

        # Resample the organoid ROI if input img and ROI shape differ
        if organoid_mask.shape[-2:] != img.shape[-2:]:
            roi_slicing_factor = organoid_mask.shape[-1] / img.shape[-1]
            
            if roi_slicing_factor > 1:
                print("Slicing ROI to match input image shape")
                roi_slicing_factor = round(organoid_mask.shape[-1] / img.shape[-1])
                organoid_mask = organoid_mask[::round(roi_slicing_factor), ::round(roi_slicing_factor)]
        
            elif roi_slicing_factor < 1:
                print("Upsampling ROI to match input image shape")
                organoid_mask = resize(
                    organoid_mask, img.shape[-2:], order=0, preserve_range=True, anti_aliasing=False
                )

        # If analysis type == "3D" extend ROI over the entire volume
        if analysis_type == "3D":
            # Extract the number of z-slices to extend the mask
            slice_nr = img.shape[1]
            # Extend the mask across the entire volume
            organoid_mask = np.tile(organoid_mask, (slice_nr, 1, 1))
            
        if fill_holes:
            # Close empty holes surrounded by True pixels
            organoid_mask = binary_fill_holes(organoid_mask)

        # Transform organoid mask into a label type without the need to perform connected components
        organoid_mask = organoid_mask.astype(np.uint8)

        # Initialize an empty list to hold the extracted dataframes on a per channel basis
        props_list = []

        # Create a dictionary containing all image descriptors
        descriptor_dict = {
                    "filename": filename,
                    "roi": roi_name,
                    "fill_holes": fill_holes,
                    "slicing_factor_xy": slicing_factor_xy,
                    "analysis_type": analysis_type,
                    "projection_type": projection_type,
                    }

        for channel_name, ch_nr, min_max_range in markers:

            print(f"Extracting avg_int for {channel_name} inside {analysis_type}_{roi_name}")

            if analysis_type == "2D":
                # Ignore pixel values below the min_range (set them to 0)
                img_projection[ch_nr] = np.where(img_projection[ch_nr] > min_max_range[0], img_projection[ch_nr], 0)

                # Ignore pixels whose value is equal or above the max_range
                # ROI is modified to ignore said pixels (results in filtered organoid_mask)
                filtered_organoid_mask = np.where(img_projection[ch_nr] <= min_max_range[1], organoid_mask, 0)

                # Transform organoid mask into a label type without the need to perform connected components
                filtered_organoid_mask = filtered_organoid_mask.astype(np.uint8)

                # Extract intensity information from each marker channel
                props = measure.regionprops_table(label_image=filtered_organoid_mask,
                                        intensity_image=img_projection[ch_nr],
                                        properties=["label", "area", "intensity_mean"])
                
            elif analysis_type == "3D":
                # Ignore pixel values below the min_range (set them to 0)
                img[ch_nr] = np.where(img[ch_nr] > min_max_range[0], img[ch_nr], 0)

                # Ignore pixels whose value is equal or above the max_range
                # ROI is modified to ignore said pixels (results in filtered organoid_mask)
                filtered_organoid_mask = np.where(img[ch_nr] <= min_max_range[1], organoid_mask, 0)

                # Transform organoid mask into a label type without the need to perform connected components
                filtered_organoid_mask = filtered_organoid_mask.astype(np.uint8)

                # Extract intensity information from each marker channel
                props = measure.regionprops_table(label_image=filtered_organoid_mask,
                                        intensity_image=img[ch_nr],
                                        properties=["label", "area", "intensity_mean"])
                            
            # Convert to dataframe
            props_df = pd.DataFrame(props)

            # Rename intensity_mean column to indicate the specific image
            props_df.rename(columns={"intensity_mean": f"{channel_name}_avg_int"}, inplace=True)

            # Append each props_df to props_list
            props_list.append(props_df)

        # Initialize the df with the first df in the list
        props_df = props_list[0]
        # Start looping from the second df in the list
        for df in props_list[1:]:
            props_df = props_df.merge(df, on=("label","area"))

        # Add each key-value pair from descriptor_dict to props_df at the specified position
        insertion_position = 0    
        for key, value in descriptor_dict.items():
            props_df.insert(insertion_position, key, value)
            insertion_position += 1  # Increment position to maintain the order of keys in descriptor_dict

        # Sort by area in descending order
        props_df = props_df.sort_values(by='area', ascending=False)

        # Define the .csv path
        csv_path = results_folder / f'{filename}_per_label_avg_int.csv'

        # SAve to .csv
        props_df.to_csv(csv_path)

# Get all CSV files in the folder
csv_files = glob.glob(os.path.join(results_folder, "*.csv"))

# Read and concatenate all CSV files
all_dataframes = [pd.read_csv(file) for file in csv_files]
combined_df = pd.concat(all_dataframes, ignore_index=True)

# Save the concatenated DataFrame to a new CSV file
output_path = os.path.join(results_folder, "BP_per_filename_summary.csv")
combined_df.to_csv(output_path, index=False)

print(f"All CSV files concatenated and saved to {output_path}")

'results\nihanseb_organoid\avg_int' folder already exists.


  0%|          | 0/2 [00:00<?, ?it/s]



Image analyzed: MLD 1.8 block4 ARSA MBP batch 1 40x
Original Array shape: (3, 24, 10797, 10797)
Compressed Array shape: (3, 24, 5399, 5399)

Analyzing ROI: organoid_sf4
Upsampling ROI to match input image shape
Extracting avg_int for ARSA inside 2D_organoid_sf4
Extracting avg_int for MBP inside 2D_organoid_sf4


 50%|█████     | 1/2 [00:17<00:17, 17.87s/it]



Image analyzed: MLD 2.2 block7 MBP MAP2 slide 7 batch 2 40x
Original Array shape: (3, 24, 10191, 12603)
Compressed Array shape: (3, 24, 5096, 6302)

Analyzing ROI: organoid_sf4
Upsampling ROI to match input image shape
Extracting avg_int for ARSA inside 2D_organoid_sf4
Extracting avg_int for MBP inside 2D_organoid_sf4


100%|██████████| 2/2 [00:49<00:00, 24.95s/it]

All CSV files concatenated and saved to results\nihanseb_organoid\avg_int\BP_per_filename_summary.csv



