In [1]:
from pathlib import Path
import os
from tqdm import tqdm
import czifile
import tifffile
import napari
import pyclesperanto_prototype as cle
import numpy as np
import pandas as pd
from utils import check_filenames, segment_nuclei_2d, segment_marker_positive_nuclei

In [2]:
# Define the intensity threshold above which a cell is considered positive for a marker
neun_channel_threshold = 30
reelin_channel_threshold = 40
gad67_channel_threshold = 40

# Sets the amount of erosion that is applied to areas where the marker+ signal colocalizes with nuclear signal
# The higher the value, the stricter the conditions to consider a nuclei as marker+
neun_erosion_factor = 3
reelin_erosion_factor = 3
gad67_erosion_factor = 4

In [3]:
# Copy the path where your images are stored, ideally inside the raw_data directory
directory_path = Path("./raw_data/Reelin")
roi_directory_path = Path("./raw_data/Reelin/ROI")

# Define the subdirectories containing your data
subdirectories = ["Contra", "Ipsi", "Sham"]

# Create empty lists to store all image filepaths and ROIs within the dataset directory
images = []
rois = []

# Create an empty list to store all stats extracted from each image
stats = []

# Scan subdirectories and add paths to images fitting certain conditions
for subdir in subdirectories:
    # Construct the subdirectory path
    image_path = directory_path / subdir
    # Iterate through the .czi files in the subdirectories
    for file_path in image_path.glob("*.czi"):
        # Remove unwanted images
        if "AWT" not in str(file_path) and "BWT" not in str(file_path):
            images.append(str(file_path))

# Scan ROI directory and add paths to the list
for file_path in roi_directory_path.glob("*.tif"):
    # Remove unwanted images
        if "AWT" not in str(file_path) and "BWT" not in str(file_path):
            rois.append(str(file_path))

# Check if there is any missing ROI or image file in their corresponding directories
check_filenames(images, rois)

# Extract filenames without extensions and sort the lists so they appear in the same order
images_sorted = sorted(images, key=lambda x: Path(x).stem)
rois_sorted = sorted(rois, key=lambda x: Path(x).stem)

No files missing in images list.
No files missing in rois list.


In [4]:
for image_path, roi_path in tqdm(zip(images_sorted, rois_sorted)):

    # Read path storing raw image and extract filename
    file_path = Path(image_path)
    filename = file_path.stem

    # Get rid of double spaces in the filename
    filename = filename.replace("  ", " ")

    # Extract experimental conditions from the filename
    descriptors = filename.split(" ")
    condition = descriptors[0]

    try:
        # Convert strings to int
        condition_nr = int(descriptors[1])
        brain_location = descriptors[2]
        mouse_id = int(descriptors[4])
        slide = int(descriptors[5][-1])
        tech_replica = int(descriptors[-1])

    except ValueError:
        # In the case of erroneous filenaming add info as strings
        condition_nr = descriptors[1]
        brain_location = descriptors[2]
        mouse_id = descriptors[4]
        slide = descriptors[5][-1]
        tech_replica = descriptors[-1]

    # Read image and ROI files into Numpy arrays
    img = czifile.imread(image_path)
    roi = tifffile.imread(roi_path)

    # Remove singleton dimensions and perform MIP on input image
    img = img.squeeze()
    img_mip = np.max(img, axis=1)

    # Perform MIP for the region of interest
    roi_mip = np.max(roi, axis=0)

    # We will create a mask where label_mip is greater than or equal to 1
    mask = roi_mip >= 1

    try:

        # Apply the mask to img_mip
        masked_img = np.where(mask, img_mip, 0)

    except ValueError:
        # Catching which ROIs have different shape compared to img_mip
        print(f"File {filename}. ROI shape: {roi_mip.shape} Mask shape: {mask.shape} Input image shape: {img_mip.shape}")
        pass

    # Extract each of the channels separately
    neun_mip = masked_img[0, :, :]
    reelin_mip = masked_img[1, :, :]
    gad67_mip = masked_img[2, :, :]
    nuclei_mip = masked_img[3, :, :]

    # Segment nuclei inside the ROI
    nuclei_labels = segment_nuclei_2d(nuclei_mip)

    # Dilate or erode nuclei to check for cytoplasmic or nuclear marker colocalization
    cyto_nuclei_labels = cle.dilate_labels(nuclei_labels, radius=2)
    cyto_nuclei_labels = cle.pull(cyto_nuclei_labels)
    eroded_nuclei_labels = cle.erode_labels(nuclei_labels, radius=2)
    eroded_nuclei_labels = cle.pull(eroded_nuclei_labels)

    # Select marker positive nuclei
    neun_tuple = segment_marker_positive_nuclei (nuclei_labels, neun_mip, neun_channel_threshold, neun_erosion_factor)
    reelin_tuple = segment_marker_positive_nuclei (cyto_nuclei_labels, reelin_mip, reelin_channel_threshold, reelin_erosion_factor)
    gad67_tuple = segment_marker_positive_nuclei (cyto_nuclei_labels, gad67_mip, gad67_channel_threshold, gad67_erosion_factor)

    # Select Cajal cells by removing Reelin+ cells that are also positive for Neun

    # Convert neun labels into a mask
    neun_mask = neun_tuple[1] >= 1

    # Check the shape of the arrays to ensure they match
    assert reelin_tuple[1].shape == neun_mask.shape, "Label image and mask must have the same shape."

    # Create a copy of the reelin label image to prevent modifying the original
    cajal_cells = reelin_tuple[1].copy()

    # Remove labels where the mask is True (or 1) by setting them to background values (0)
    cajal_cells[neun_mask] = 0  

    # Previous operation leaves residual cytoplasmic region of Neun+ cells (perform an erosion and dilation cycle)
    cajal_cells = cle.erode_labels(cajal_cells, radius=2)
    cajal_cells = cle.dilate_labels(cajal_cells, radius=2)
    cajal_cells = cle.pull(cajal_cells)

    # Count the number of positive cells for each marker (or cell population)
    total_nuclei_count = len(np.unique(nuclei_labels)) - 1
    neun_nuclei_count = len(np.unique(neun_tuple[1])) - 1
    reelin_nuclei_count = len(np.unique(reelin_tuple[1])) - 1
    gad67_nuclei_count = len(np.unique(gad67_tuple[1])) - 1
    cajal_nuclei_count = len(np.unique(cajal_cells)) - 1

    # Create a dictionary containing all extracted info per masked image
    stats_dict = {
                "filename": filename,
                "condition": condition,
                "condition_nr": condition_nr,
                "brain_location": brain_location,
                "mouse_id": mouse_id,
                "slide_nr": slide,
                "tech_replica": tech_replica,
                "total_nuclei": total_nuclei_count,
                "neun+_nuclei": neun_nuclei_count,
                "reelin+_nuclei": reelin_nuclei_count,
                "gad67+_nuclei": gad67_nuclei_count,
                "cajal_nuclei": cajal_nuclei_count,
                "%_neun+_cells": (neun_nuclei_count * 100) / total_nuclei_count,
                "%_reelin+_cells": (reelin_nuclei_count * 100) / total_nuclei_count,
                "%_gad67+_cells": (gad67_nuclei_count * 100) / total_nuclei_count,
                "%_cajal_cells": (cajal_nuclei_count * 100) / total_nuclei_count
                }

    # Append the current data point to the stats_list
    stats.append(stats_dict)

# Define output folder for results
results_folder = "./results/"

# Create the necessary folder structure if it does not exist
try:
    os.mkdir(str(results_folder))
    print(f"Output folder created: {results_folder}")
except FileExistsError:
    print(f"Output folder already exists: {results_folder}")

# Transform into a dataframe to store it as .csv later
df = pd.DataFrame(stats)

# Overwrite the .csv with new data points each round
df.to_csv("./results/Reelin_summary_2D.csv", index=True)

df

20it [13:48, 41.43s/it]

Output folder created: ./results/





Unnamed: 0,filename,condition,condition_nr,brain_location,mouse_id,slide_nr,tech_replica,total_nuclei,neun+_nuclei,reelin+_nuclei,gad67+_nuclei,cajal_nuclei,%_neun+_cells,%_reelin+_cells,%_gad67+_cells,%_cajal_cells
0,HI 1 Contralateral Mouse 8 Slide09 GAD67green ...,HI,1,Contralateral,8,9,1,4675,1353,98,32,80,28.941176,2.096257,0.684492,1.71123
1,HI 1 Contralateral Mouse 8 Slide09 GAD67green ...,HI,1,Contralateral,8,9,2,4911,1837,106,40,79,37.405824,2.15842,0.814498,1.608634
2,HI 1 Ipsilateral Mouse 8 Slide16 GAD67green Ne...,HI,1,Ipsilateral,8,6,1,3591,1796,109,115,68,50.013924,3.035366,3.202451,1.893623
3,HI 1 Ipsilateral Mouse 8 Slide16 GAD67green Ne...,HI,1,Ipsilateral,8,6,2,3737,2726,263,162,26,72.946214,7.037731,4.335028,0.695745
4,HI 2 Contralateral Mouse 10 Slide08 GAD67green...,HI,2,Contralateral,10,8,1,3897,1526,113,35,75,39.158327,2.899666,0.898127,1.924557
5,HI 2 Contralateral Mouse 10 Slide08 GAD67green...,HI,2,Contralateral,10,8,2,4027,1707,114,91,78,42.388875,2.830891,2.259747,1.936926
6,HI 2 Ipsilateral Mouse 10 Slide08 GAD67green N...,HI,2,Ipsilateral,10,8,1,4058,566,74,18,57,13.947758,1.823558,0.443568,1.404633
7,HI 2 Ipsilateral Mouse 10 Slide08 GAD67green N...,HI,2,Ipsilateral,10,8,2,3089,693,83,24,66,22.434445,2.686954,0.77695,2.136614
8,HI 3 Contralateral Mouse 11 Slide09 GAD67green...,HI,3,Contralateral,11,9,1,4308,2689,118,48,55,62.418756,2.73909,1.114206,1.276695
9,HI 3 Contralateral Mouse 11 Slide09 GAD67green...,HI,3,Contralateral,11,9,2,4577,2899,138,36,59,63.338431,3.015075,0.786541,1.289054
