In [4]:
from utils import extract_image_info, split_channels, segment_nuclei, extract_intensities, classify_cells_percentage
from tqdm import tqdm
import pandas as pd
import os
from pathlib import Path
from cellpose import models
import plotly.express as px

In [5]:
# -------- DEFINE YOUR ANALYSIS PARAMETERS BELOW --------- #

# This is the default Cellpose model to segment nuclei, it leverages a GPU if its available
model = models.Cellpose(gpu=True, model_type="nuclei")

# Diameter in pixels of the nuclei present in your image, helps Cellpose to adjust nuclei mask predictions
cellpose_nuclei_diameter = 70

# Blurs the mip_nuclei image to even out high intensity foci within the nucleus, the higher the value blurriness increases
# High values help segment sparse nuclei (CA and CTX regions) but as a drawback it merges nuclei entities that are very close together (DG region)
gaussian_sigma = 5

# This Jupyter notebook will allow you to classify your cells based on percentage:

# Any nuclei containing a mean intensity signal within the top (top_percentage)% and (ratio)times above the mean intensity of the marker within all nuclei
# will be considered as a + cell for said marker. The ratio is implemented to avoid situation when all cells in an image have a similar average signal intensity
# but none are positive (i.e. background), given the nature of these samples we know there is no situation where most of the cells are positive.

h2a_top_percentage = 50
cfos_top_percentage = 60
h2a_ratio = 1.2
cfos_ratio = 2.2

# -------- DEFINE YOUR DATA FOLDER BELOW --------- #

# Scan for all images present in the data folder and add their Paths to the images list
data_path = Path("./data")
images = []

for file_path in data_path.glob("**/**/**/*.lsm"):
    images.append(file_path)

In [9]:
# ------- ANALYSIS PIPELINE --------- #

# Create an empty list to append the resulting dataframes from each analysis round
dataframes = []

for image in tqdm(images):

    # Extract filename, region, mouse and IHC round
    filename, region, mouse_id, ihc_round = extract_image_info(image)

    # Split channels
    nuclei_img, h2a_img, cfos_img = split_channels(image)

    # Segment nuclei
    nuclei_masks = segment_nuclei(nuclei_img, gaussian_sigma, model, cellpose_nuclei_diameter)

    # Morphological and intensity measurements
    merged_df = extract_intensities(nuclei_masks, h2a_img, cfos_img, filename, region, mouse_id, ihc_round)

    # Select H2A and CFOS positive cells based on top percentages, return a mask of + cells
    merged_df, h2a_nuclei_labels, cfos_nuclei_labels, double_pos_nuclei_labels, h2a_pos_labels, cfos_pos_labels, double_pos_labels = classify_cells_percentage(merged_df, nuclei_masks, h2a_top_percentage, cfos_top_percentage, h2a_ratio, cfos_ratio)
    
    dataframes.append(merged_df)
    
# Concatenate all DataFrames in the list into a single DataFrame
final_df = pd.concat(dataframes, ignore_index=True)

# Define output folder for results
results_folder = "./results/"

# Create the necessary folder structure if it does not exist
try:
    os.mkdir(str(results_folder))
    print(f"Output folder created: {results_folder}")
except FileExistsError:
    print(f"Output folder already exists: {results_folder}")
    
# Saves a copy of final_df as a .csv file
final_df.to_csv(f"./results/results_cellpdia{cellpose_nuclei_diameter}_sigma{gaussian_sigma}_h2atop{h2a_top_percentage}_h2ar{h2a_ratio}_cfostop{cfos_top_percentage}_cfosr{cfos_ratio}.csv")


100%|██████████| 264/264 [20:01<00:00,  4.55s/it]

Output folder created: ./results/





In [10]:
final_df.head()

Unnamed: 0,filename,region,mouse_id,ihc_round,label,cfos_intensity_mean,cfos_intensity_max,h2a_intensity_mean,h2a_intensity_max,area_filled,perimeter,equivalent_diameter,h2a_pos_cells,cfos_pos_cells,double_pos_cells
0,Image1,CA1,AD1867,IHC_1,1,13.342907,44.0,13.032239,58.0,1706.0,161.740115,46.606294,False,False,False
1,Image1,CA1,AD1867,IHC_1,2,18.582311,38.0,22.26275,83.0,1549.0,157.497475,44.409999,True,False,False
2,Image1,CA1,AD1867,IHC_1,3,19.051891,60.0,9.917071,62.0,2062.0,177.4386,51.238852,False,False,False
3,Image1,CA1,AD1867,IHC_1,4,18.442348,49.0,11.41789,59.0,1431.0,140.710678,42.68496,False,False,False
4,Image1,CA1,AD1867,IHC_1,5,16.42567,42.0,13.602257,107.0,3545.0,245.279221,67.183586,False,False,False


In [11]:
# Create a histogram of cfos_intensity_mean values
fig = px.histogram(final_df, x='h2a_intensity_mean', nbins=50, title='Distribution of H2A Intensity Mean')

# Update layout if necessary
fig.update_layout(
    xaxis_title='H2A Intensity Mean',
    yaxis_title='Count',
    bargap=0.2
)

# Show the plot
fig.show()

In [12]:
# Create a histogram of cfos_intensity_mean values
fig = px.histogram(final_df, x='cfos_intensity_mean', nbins=50, title='Distribution of CFOS Intensity Mean')

# Update layout if necessary
fig.update_layout(
    xaxis_title='CFOS Intensity Mean',
    yaxis_title='Count',
    bargap=0.2
)

# Show the plot
fig.show()