load libraries

In [None]:
import czifile
import numpy as np
from skimage.filters import gaussian, threshold_otsu
from skimage.measure import label, regionprops
import matplotlib.pyplot as plt
import cv2
from IPython.display import Image, display
from skimage.color import label2rgb, rgb2hsv
from skimage.filters import gaussian, sobel
from skimage.measure import label, regionprops
from skimage.morphology import binary_erosion, binary_dilation, disk, local_maxima
from scipy.ndimage import binary_fill_holes
from skimage.feature import blob_log
from skimage.color import rgb2gray
from skimage.io import imshow
from skimage.draw import circle_perimeter
from matplotlib.colors import LogNorm
import scipy.ndimage as ndi
import pandas as pd
import os
from IPython.display import clear_output

define functions

In [None]:

def preprocess_image(image, sigma=2):
    """
    Preprocesses the image by applying Gaussian blur for background subtraction.
    """
    blurred_image = gaussian(image, sigma=sigma)
    processed_image = np.subtract(image, blurred_image)
    return processed_image

def plot_frames_initially(image_squeezed, basename):
    """
    Plots three frames in a horizontal line.
    """
    # Extract the desired frames
    first_frame = preprocess_image(image_squeezed[0, :, :])
    second_frame = preprocess_image(image_squeezed[1, :, :])

    # Create a figure with subplots
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))

    # Plot each frame
    axes[0].imshow(first_frame)
    axes[0].set_title('RFP')
    axes[0].axis('off')

    axes[1].imshow(second_frame)
    axes[1].set_title('GFP')
    axes[1].axis('off')

    plt.savefig(fr'cells.png') #this path can be edited to a path of your choice where you would like to save the frames image
    plt.show()

def visualize_mask(image_squeezed, mask, basename, i):


    '''Plots the mask as applied to the two channels in the image in order to visualize whether masking is occuring correctly'''

    # Invert the mask to cover everything outside the region
    inverted_mask = mask

    # Apply inverted mask to each channel of the image
    channel1_masked = image_squeezed[0,:,:].copy()
    channel2_masked = image_squeezed[1,:,:].copy()

    channel1_masked[inverted_mask] = 0  # Set masked pixels to 0
    channel2_masked[inverted_mask] = 0  # Set masked pixels to 0


    # Display original image with masked regions
    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.imshow(channel1_masked, cmap='gray')
    plt.title('Channel 1 Masked')
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(channel2_masked, cmap='gray')
    plt.title('Channel 2 Masked')
    plt.axis('off')
    plt.savefig(fr'visualize_mask_{i}.png')
    plt.show()

def display_cells(labeled_image, first_frame):

    '''Vizualize the cells that have been segmented along with the size of each cell that has been segmented'''

    min_size_threshold = 500

    structure_sizes = []
    for region in regionprops(labeled_image):
        if region.area < min_size_threshold:
            continue

        structure_sizes.append(region.area)

    # Overlay the sizes on the original image
    image_with_sizes = label2rgb(labeled_image, image=first_frame, bg_label=0)

    for region in regionprops(labeled_image):
        if region.area < min_size_threshold:
            continue

        y0, x0 = region.centroid
        label_size = region.area

        plt.text(x0, y0, f'Size: {label_size}', color='white', fontsize=8, ha='center', va='center')

    # Display the image with labels
    plt.imshow(image_with_sizes, cmap='gray')
    plt.axis('off')
    plt.title('Original Image with Structure Sizes')
    plt.show()


def fill_holes_with_original_values(mask, original_matrix):
    """Fills holes in a binary mask with their corresponding values from the original matrix,
       setting everything outside the mask to 0.

    Args:
        mask: A numpy array representing the binary mask (0s and 1s).
        original_matrix: The original matrix with the values to fill the holes.

    Returns:
        A modified numpy array with holes filled using values from the original matrix and everything outside the mask set to 0.
    """

    # Ensure mask is boolean
    mask = mask.astype(bool)
    
    # Make a copy to avoid modifying original data
    filled_matrix = np.zeros_like(original_matrix)
    
    # find holes, but also keep everything inside the mask
    holes = binary_fill_holes(mask) 

    # Replace filled areas in the matrix with their original values
    filled_matrix[holes] = original_matrix[holes]

    return filled_matrix

def mask_near_border(mask, distance=5):
    """Checks if a binary mask comes within a specified distance of the image borders.

    Args:
        mask: A numpy array representing the binary mask (0s and 1s).
        distance: The distance from the border to check (default: 5 pixels).

    Returns:
        True if the mask comes within the specified distance of any border, False otherwise.
    """
    
    # Check if there are any True values in the specified range from the border
    if np.any(mask[:distance, :]) or np.any(mask[-distance:, :]): # Check if there are any Trues within distance from the top or bottom edge
        return True
    if np.any(mask[:, :distance]) or np.any(mask[:, -distance:]): # Check if there are any Trues within distance from the left or right edge
        return True
    
    # Otherwise, there are no Trues within 'distance' of the border
    return False

def process_file(file_path, basename):
    '''main function'''

    '''interactive function that processes each .czi file and returns a Pandas dataframe'''
    '''Type y to approve a cell and have it be added to the dataframe'''
    try:
        image = czifile.imread(file_path)
        image_squeezed = np.squeeze(image)
        # plot_frames_initially(image_squeezed, basename)
        first_frame = image_squeezed[0,:,:]
        second_frame = image_squeezed[1,:,:]
        # plt.imshow(first_frame)
        red_channel = gaussian(first_frame, sigma=2)
        green_channel = gaussian(second_frame, sigma=2)
        #define cells
        initial_threshold = 0.02
        cells_thresholded = green_channel > initial_threshold
        cells_edges = sobel(cells_thresholded)  # Using Sobel edge detection to highlight edges
        cells_edges = binary_dilation(cells_edges, disk(2))  # Increase border thickness
        cells_edges = binary_erosion(cells_edges, disk(1))  # Ensure borders are well-defined

        labeled_image = label(cells_edges)
        # display_cells(labeled_image, first_frame) #visualize cell boundaries and sizes of cells, adjust min_cell size cutoff in this function

        # Set minimum size threshold
        cell_size_threshold = 1000
        red_wholecell_mfi = []
        green_cytoplasm_mfi = []
        green_wholecell_mfi = []
        num_inclusions = []
        inclusion_sizes = []
        cell_sizes = []
        inclusion_total_sizes = []
        green_inclusions_mfi = []
        red_inclusions_mfi = []
        # Iterate over regions
        i = 0

        for region in regionprops(labeled_image):
            # Check if region area is above the threshold
            cell_size = region.area
            if region.area < cell_size_threshold:
                continue
            selem = disk(radius=3)
            # Create a mask for the current region
            mask = labeled_image == region.label
            if mask_near_border(mask):
                continue
            i = i + 1
            dilated_mask = fill_holes_with_original_values(mask, second_frame)
            save_var_1 = dilated_mask
            dilated_mask2 = binary_fill_holes(mask)
            inverted_mask = ~dilated_mask
            channel1_masked = image_squeezed[0,:,:] * inverted_mask
            channel1_full = np.where(dilated_mask > 0, image_squeezed[0,:,:], 0) 
            channel2_masked = np.where(dilated_mask2 > 0, image_squeezed[1,:,:], 0) 
            channel2_inclusion = np.where(dilated_mask > 0, image_squeezed[1,:,:], 0) 
            original_green = channel2_inclusion
            # Normalize to 0-1 if not already done
            channel2_inclusion_mean = (channel2_inclusion - channel2_inclusion.min()) / (channel2_inclusion.max() - channel2_inclusion.min())
            threshold = 0.4
            #adaptive threshold to most accurately segement the cells
            if threshold_otsu(channel2_inclusion_mean) > threshold:
                if np.mean(channel2_inclusion_mean) > 0.5:
                    threshold = threshold_otsu(channel2_inclusion_mean) + 0.2
                else:
                    threshold = threshold_otsu(channel2_inclusion_mean) - 0.1


            mask_inclusion = channel2_inclusion_mean > threshold # apply a new threshold to segment the inclusions in the cell
            inclusion_edges = binary_dilation(mask_inclusion, disk(1))  # Increase border thickness
            labeled_inclusions = label(inclusion_edges)

            rows, cols = image_squeezed[1,:,:].shape

            # Create a zero matrix with the same shape
            zero_matrix = np.zeros((rows, cols))
            
            inclusion_counter = 0
            inclusion_size = []
            inclusions = []
            for region in regionprops(labeled_inclusions):
                if region.area/cell_size > 0.6:
                    continue
                inclusions.append(region.area)
                inclusion_size.append(region.area)
                inclusion_counter +=1
                mask = labeled_inclusions == region.label  # Mask for the current inclusion
                dilated_mask = binary_fill_holes(mask)     # Fill any holes
                inclusion_only_mask = zero_matrix + dilated_mask
                inverted_mask_2 = ~dilated_mask
                channel2_inclusion = np.where(inverted_mask_2 > 0, channel2_inclusion, 0) 
                
            # # Calculate mean fluorescence intensity for each channel
            nonzero_mask = channel2_inclusion != 0
            nonzero_mask2 = channel1_full != 0
            nonzero_mask3 = original_green != 0
            mean_intensity_channel1 = np.mean(channel1_full[nonzero_mask2])
            mean_intensity_channel2 = np.mean(channel2_inclusion[nonzero_mask])
            mean_intensity_channel2_whole = np.mean(original_green[nonzero_mask3])
            mean_intensity_channel1_inclusions = np.mean(channel1_full[inclusion_only_mask > 0])
            mean_intensity_channel2_inclusions = np.mean(original_green[inclusion_only_mask > 0])

            print(mean_intensity_channel1, mean_intensity_channel2, mean_intensity_channel2_whole, inclusion_counter, inclusion_counter, sum(inclusions), cell_size)
            

            #Visualize the results of the segmentation, and approve the cell if margins look clear

            fig, axs = plt.subplots(2, 2, figsize=(6, 5))  # Adjust figure size as needed

            # Plot the first plot in the first subplot
            axs[0, 0].imshow(save_var_1, cmap='nipy_spectral')
            axs[0, 0].set_title(f'Mask for Cell')
            fig.colorbar(axs[0, 0].imshow(save_var_1, cmap='nipy_spectral'), ax=axs[0, 0], shrink=0.8)
            axs[0, 0].axis('off')

            # Plot the second plot in the second subplot
            axs[0, 1].imshow(channel2_masked, cmap='viridis')
            axs[0, 1].set_title('Segmented Cell')
            fig.colorbar(axs[0, 1].imshow(original_green, cmap='viridis'), ax=axs[0, 1], shrink=0.8)
            axs[0, 1].axis('off')

            # Plot the third plot in the third subplot
            axs[1, 0].imshow(inclusion_edges, cmap='nipy_spectral')
            axs[1, 0].set_title('Labeled Inclusions')
            fig.colorbar(axs[1, 0].imshow(mask_inclusion, cmap='nipy_spectral'), ax=axs[1, 0], shrink=0.8)
            axs[1, 0].axis('off')

            # Plot the fourth plot in the fourth subplot
            axs[1, 1].imshow(channel2_inclusion, cmap = 'viridis')
            axs[1, 1].set_title('Green Cytoplasm')
            fig.colorbar(axs[1, 1].imshow(channel2_inclusion, cmap='viridis'), ax=axs[1, 1], shrink=0.8)
            axs[1, 1].axis('off')

            # Adjust layout to prevent overlap
            plt.tight_layout()

            # Show the plot
            plt.show()

            add_df = input("Do you want to add this dataframe? (y/n): ").lower()
            plt.close('all')
            plt.clf()
            clear_output(wait=True) #this will clear the display of all output

            #if you approve the cell, type y"
            if 'y' in add_df:
                red_wholecell_mfi.append(mean_intensity_channel1)
                green_cytoplasm_mfi.append(mean_intensity_channel2)
                green_wholecell_mfi.append(mean_intensity_channel2_whole)
                num_inclusions.append(inclusion_counter)
                inclusion_sizes.append(inclusion_size)
                cell_sizes.append(cell_size)
                inclusion_total_sizes.append(sum(inclusions))
                green_inclusions_mfi.append(mean_intensity_channel2_inclusions)
                red_inclusions_mfi.append(mean_intensity_channel1_inclusions)
            elif add_df == 'n':
                with open('discard.txt', 'a') as f:
                    f.write(f"Discarded: {basename} (Cell {i})\n")
        df = pd.DataFrame({
            "Filename": [basename] * len(num_inclusions),
            "Cell": [val + 1 for val in range(len(inclusion_sizes))],
            "red_wholecell_mfi": red_wholecell_mfi,
            "green_wholecell_mfi": green_wholecell_mfi,
            "green_cytoplasm_mfi": green_cytoplasm_mfi,
            "red_inclusions_mfi" : red_inclusions_mfi, 
            "green_inclusions_mfi": green_inclusions_mfi,
            "num_inclusions": num_inclusions,
            'cell_size': cell_sizes,
            'total_inclusion_size': inclusion_total_sizes,
            "inclusion_sizes": inclusion_sizes,
            })
        return df
    except Exception as e:
        print(f"Failed on {basename}: {e}")
        return None


main

In [None]:
all_data = []
discarded_images = []

folder_path = input('input the path to the folder that you would like to process here:')
for well_image in os.listdir(folder_path):
    if well_image.lower().endswith(".czi"):  # Filter for CZI files
        well_image_path = os.path.join(folder_path, well_image)
        well_image_base_name = os.path.basename(well_image)[:-4]
        df = process_file(well_image_path, well_image_base_name)
        if df is not None:
            all_data.append(df)
        else:
            discarded_images.append(well_image_base_name)

# Concatenate all DataFrames into one
combined_df = pd.concat(all_data, ignore_index=True)

# Save the combined DataFrame to a single CSV
combined_csv_path = ("results.xlsx")
combined_df.to_excel(combined_csv_path, index=False)