In [None]:
import numpy as np
from cellpose import models, core, io
from spotiflow.model import Spotiflow
from pathlib import Path
import os
import apoc
from tqdm import tqdm
import pyclesperanto_prototype as cle 
import pandas as pd
from utils import list_images, read_image, brightfield_correction, detect_infection_load, extract_intensity_information, puncta_detection

io.logger_setup() # run this to get printing of progress

#Check if notebook has GPU access
if core.use_gpu()==False:
  raise ImportError("No GPU access, change your runtime")

#Activate .cle GPU acceleration
cle.select_device("RTX")

#Load pre-trained Cellpose-SAM and Spotiflow models
model = models.CellposeModel(gpu=True)
spotiflow_model = Spotiflow.from_pretrained("general")

In [None]:
# Load pretrained Object Classifier for Mycobacterium infection detection
mtb_cl_filename = "./pretrained_classifiers/no_nuclei_signal/siMtb screen I_LØ/Mtb_segmenter.cl"
mtb_segmenter = apoc.ObjectSegmenter(opencl_filename=mtb_cl_filename)

# Create a list of subdirectories containing the Nuc string (stained for nuclei)
main_directory_path = Path("X:\Lisa\siMtb screen I_LØ")

folders_with_nuc = [
    name for name in os.listdir(main_directory_path)
    if os.path.isdir(os.path.join(main_directory_path, name)) and "Nuc" not in name and "Results" not in name
]

print(folders_with_nuc)

In [None]:
# Define the channels you want to analyze using the following structure:
# markers = [(channel_name, channel_nr),(..., ...)]
# Remember in Python one starts counting from 0, so your first channel will be 0
# i.e. markers = [("ki67", 0), ("neun", 1)]

# This list hold the markers for signal intensity analysis 
markers = [("LC3B", 0), ("GAL3", 1), ("Chmp4B", 2), ("Mtb", 3)]

# This list hold the markers for spot (puncta) detection analysis 
puncta_markers = [("LC3B", 0), ("GAL3", 1), ("Chmp4B", 2)]

In [None]:
for folder in tqdm(folders_with_nuc):

    print(f"Analyzing Plate: {folder}")

    # Copy the path where your images are stored, you can use absolute or relative paths to point at other disk locations
    directory_path = main_directory_path / folder

    # Iterate through the .czi and .nd2 files in the directory
    images = list_images(directory_path)

    # Image size reduction (downsampling) to improve processing times (slicing, not lossless compression)
    slicing_factor_xy = None # Use 2 or 4 for downsampling in xy (None for lossless)

    # Substract uneven and remove background from BF by obtaining the median of all BF channels
    bf_correction = brightfield_correction(directory_path, images, slicing_factor_xy) 

    # Empty list to populate with per well features
    per_well_props = []

    # Empty list to populate with per infection stats
    infection_stats = []

    # Loop through well images in each plate folder (A1 --> H12)
    for image in tqdm(images):

        # Read image, apply slicing if needed and return filename and img as a np array
        img, filename = read_image(image, slicing_factor_xy)

        # Extract plate number and well_id
        plate_nr = filename.split("_")[0]
        well_id = filename.split("Wells-")[1].split("__")[0]

        # Predict cytoplasm labels using CellposeSAM
        cytoplasm_labels, flows, styles = model.eval(np.stack((img[[0,1]].sum(axis=0), (img[4] - bf_correction)), axis=0), niter=1000) # need to check the arguments

        # ----- Mtb infection detection ------

        infected_labels = detect_infection_load(img, mtb_segmenter, cytoplasm_labels, plate_nr, well_id, infection_stats)

        # ----- per_label intensity information ------

        props_df = extract_intensity_information(img, cytoplasm_labels, markers, plate_nr, well_id, image)

        # ----- puncta filtering and counting ------ 

        props_df = puncta_detection(img, puncta_markers, spotiflow_model, cytoplasm_labels, props_df)

        # ----- Dataframe update and save logic ------ 

        # Rename label(id) to CellProfiler format ObjectNumber
        props_df.rename(columns={"label": "ObjectNumber"}, inplace=True)

        # Add infected flag to props_df if labels is in infected_labels
        # Find position of "ObjectNumber" column
        col_idx = props_df.columns.get_loc("ObjectNumber")

        # Insert new column right after "ObjectNumber"
        props_df.insert(col_idx + 1, "Mtb_infected", props_df["ObjectNumber"].isin(infected_labels))

        # Append each props_df to per_well_props
        per_well_props.append(props_df)

    # ----- Data save after loop completion ------

    # Transform infection stats list into a dataframe to store it as .csv
    df = pd.DataFrame(infection_stats)
    df.to_csv(f"./results_infection_{plate_nr}.csv")

    # Concatenate all per_well_props into final_df
    final_df = pd.concat(per_well_props, ignore_index=True)
    final_df.to_csv(f"./results_per_label_{plate_nr}.csv")