**Table of contents**<a id='toc0_'></a>    
- [Import packages](#toc1_)    
- [Set up working directory](#toc2_)    
- [ISV loss analysis](#toc3_)    
  - [Define parameters](#toc3_1_)    
  - [Define functions](#toc3_2_)    
  - [Perform ISV loss analysis](#toc3_3_)    
    - [Run samples in working directory](#toc3_3_1_)    
    - [Generate ISV segmentation masks](#toc3_3_2_)    
    - [Calculate probability of ISV loss and save results](#toc3_3_3_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

# <a id='toc1_'></a>[Import packages](#toc0_)

In [None]:
import sys
import os
import numpy as np
import vessel_metrics as vm
import czifile
from aicspylibczi import CziFile
import matplotlib.pyplot as plt
import cv2
import imageio
import matplotlib.pyplot as plt
from matplotlib import rcParams
import matplotlib.colors
from matplotlib.pyplot import rc_context
import math
import seaborn as sns
from scipy.stats import median_abs_deviation 
import glob
import time
import pandas as pd
import openpyxl
import seaborn as sns
import plotly.graph_objects as go
from skimage.measure import label, regionprops
from skimage import color
from skimage.color import label2rgb
from matplotlib.cm import get_cmap
from skimage.morphology import remove_small_objects
from aicsimageio import AICSImage, readers
from scipy.spatial import distance
from skimage.morphology import skeletonize
import traceback
import gc
from scipy.spatial.distance import cdist

# <a id='toc2_'></a>[Set up working directory](#toc0_)

In [None]:
"""
Define the path to the working directory and the output folder where the 3D volumes will be saved.
Define the name of the files to be processed.
"""
data_path = 'path_to_working_directory'
output_path = 'path_to_output_directory'
file_name = glob.glob(f'{data_path}*.*', recursive = True) # List of file names to process

In [None]:
"""
Ensure the output directory exists and that files are read correctly.
"""
total_files = len(file_name)
print(f"Wolking forlder contains {total_files} files")

# <a id='toc3_'></a>[ISV loss analysis](#toc0_)

## <a id='toc3_1_'></a>[Define parameters](#toc0_)

In [None]:
## Please README before running the following code ##

## Parameters that the user should define before running VISTA-Z ROI analysis ##

    # clahe = (int, int) # CLAHE parameters (clipLimit, tileGridSize)
    # ROI_mode = 'square'  or 'polygon'
    # ROI_num = int # Number of ROIs to be analysed per sample
    # channel = int [0,1,2] # Channel number to be analysed per sample (only if multi-channel images) [0]: mCherry, [1]: EGFP, [2]: DAPI, etc.
    # name_start = str # Starting string of the file names to be processed
    # pdf_dpi = int # DPI for saving PDF figures
    # min_vessel_size = int # Minimum vessel size (in pixels) to be thresholded for analysis
    # Vessel segmentation parameters:
        # seg_method = str # Vessel segmentation method: 'meijering', 'frangi', 'sato' or 'jerman'
        # sigma1 = range(int, int, int) # Sigma range for vessel enhancement filter
        # hole_size = int # Maximum hole size to be filled in the vessel mask
        # ditzle_size = int # Maximum size of small objects to be removed from the vessel mask
        # thresh = int # Threshold value to binarize the vessel enhanced image
        # tolerance = float # Tolerance for skeleton pruning (if using 'lee' method)
    # Normalisation of vessel metrics to the ROI area:
        # scale = float # Scale factor to normalise vessel metrics (microns per pixel)
        # z = int # Z-step size (microns) between slices in the 3D image stack
    # output_name = str # Name of the output files to be saved (excel file)

In [None]:
# Image pre-processing and ROI selection parameters
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))
ROI_mode = 'square'
ROI_num = 1
# channel = 0 # Use for multiple channel images
name_start = 'flk1EGFP'
pdf_dpi = 300

# Vessel segmentation parameters
min_vessel_size = 500 # Note: User are reccommended to set this parameter after initial analysis based on median(vessel_length) - std(vessel_length)
seg_method = 'meijering'
sigma1 = range(3, 8, 1)
hole_size = 200
ditzle_size = 50
thresh = 10
tolerance = 0.02

# Probability of ISV loss parameters
ref_name = 'WTsib'
target_name = 'mut'
scale = 1.2044
z = 5

# Output file name
output_name = "ISV_Results"

# Define empty lists to store all ROI data
all_mipROIs = []
all_grayROIs = []
all_claheROIs= []
all_otsuROIs = []
output_ROI_paths = []
output_ROImask_paths = []
all_coords = []
seg_mask = []
ISV_labels = []
centroids = []
embryo_data = []

## <a id='toc3_2_'></a>[Define functions](#toc0_)

In [None]:
def select_roi(image, mode=ROI_mode):
    """
    Interactively select a region of interest (ROI) from a 2D image.
    The ROI can be selected as a rotated rectangle ("square") or a polygon.
    For square selection, the user can rotate and move the box before confirming the ROI.
    
    Args:
        image (ndarray): 2D image from which to select the ROI.
        mode (str): Mode of ROI selection, either 'square' or 'polygon'.
    
    Returns:
        roi_img (ndarray): The selected ROI image.
        mask (ndarray): Boolean mask of the ROI.
        coords (tuple or None): Coordinates of the ROI vertices for polygon selection.
        rect (tuple or None): Coordinates of the ROI (center, size, angle) for rectangle selection.
        image (ndarray): The original image for reference if no ROI is selected.

    """
    image_disp = (image / image.max() * 255).astype(np.uint8) # Convert image to uint8 for display
    roi_img, mask, coords, rect = None, None, None, None
    print("Press Enter to confirm, 'r' to rotate clockwise, 'e' to rotate counter-clockwise,")
    print("'a', 'd', 'w', 's' to move left, right, up, down respectively, and 'c' to cancel.") 
    if mode == 'square': # ROI selection as square
        from_center = False # Set to True to draw from center
        show_crosshair = True # Set to False to hide crosshair
        x, y, w, h = cv2.selectROI("Select ROI", image_disp, showCrosshair=show_crosshair, fromCenter=from_center) # Create ROI
        if w == 0 or h == 0: # Return full image if no ROI selected
            print("No ROI selected, returning full image")
            roi_img = image # Full image as ROI
            mask = np.ones_like(image, dtype=bool)
            coords = None
            rect = None
            cv2.destroyAllWindows() # Close ROI window
            return roi_img, mask, coords, rect
        angle = 0 # Defines initial angle
        rect = ((x + w / 2, y + h / 2), (w, h), angle) # Define rectangle coordinates
        while True: # Rotate and adjust rectangle
            display_img = cv2.cvtColor(image_disp, cv2.COLOR_GRAY2BGR)
            box = cv2.boxPoints(rect).astype(int) # Get rectangle box points
            cv2.drawContours(display_img, [box], 0, (0, 255, 0), 2) # Draw rectangle contour
            cv2.imshow("Select ROI", display_img) # Display the image with the rectangle
            key = cv2.waitKey(0) & 0xFF # Wait for key press
            if key == 13:  # Enter
                break 
            elif key == ord('r'): # Rotate clockwise
                rect = (rect[0], rect[1], (rect[2] + 1) % 360)
            elif key == ord('e'): # Rotate counter-clockwise
                rect = (rect[0], rect[1], (rect[2] - 1) % 360)
            elif key == ord('a'): # Move left
                cx, cy = rect[0]
                rect = ((cx - 10, cy), rect[1], rect[2])
            elif key == ord('d'): # Move right
                cx, cy = rect[0]
                rect = ((cx + 10, cy), rect[1], rect[2])
            elif key == ord('w'): # Move up
                cx, cy = rect[0]
                rect = ((cx, cy - 10), rect[1], rect[2])
            elif key == ord('s'): # Move down
                cx, cy = rect[0]
                rect = ((cx, cy + 10), rect[1], rect[2])
            elif key == ord('c'): # Cancel selection
                cv2.destroyAllWindows() # Close ROI window
                print("Cancelled ROI selection.")
                return image, np.ones_like(image, dtype=bool), None, None # Return full image if cancelled
        cv2.destroyAllWindows() # Close ROI window
        mask_uint8 = np.zeros_like(image, dtype=np.uint8) # Create mask for rotated ROI with zeros
        box = cv2.boxPoints(rect).astype(int) # Get rectangle box points
        cv2.fillPoly(mask_uint8, [box], 1) # Fill the rectangle area in the mask
        mask = mask_uint8.astype(bool) # Convert mask to boolean
        ys, xs = np.where(mask) # Get coordinates of the mask
        ymin, ymax = ys.min(), ys.max() # Get bounding box of the mask
        xmin, xmax = xs.min(), xs.max() 
        roi_img = image[ymin:ymax, xmin:xmax] # Crop ROI from image
        mask = mask[ymin:ymax, xmin:xmax] # Crop mask to ROI
        coords = (xmin, ymin, xmax, ymax) # Define ROI coordinates
    elif mode == 'polygon': # ROI selection as polygon
        points = [] # List to store polygon points
        def click_event(event, x, y, flags, param): # Mouse click to record points
            if event == cv2.EVENT_LBUTTONDOWN: # Left mouse button click
                points.append((x, y)) # Add point to list
                cv2.circle(display_img, (x, y), 3, (255, 0, 0), -1) # Draw point on image
                cv2.imshow("Select ROI", display_img) # Update display with new point
        display_img = cv2.cvtColor(image_disp, cv2.COLOR_GRAY2BGR) 
        cv2.namedWindow("Select ROI", cv2.WINDOW_NORMAL | cv2.WINDOW_GUI_EXPANDED)
        cv2.setWindowProperty("Select ROI", cv2.WND_PROP_TOPMOST, 1)
        cv2.imshow("Select ROI", display_img)
        cv2.setMouseCallback("Select ROI", click_event)
        print("Click points to define polygon. Press 'Enter' when done.") 
        while True: # Wait for user to finish polygon selection
            key = cv2.waitKey(1) & 0xFF # Wait for key press
            if key == 13:  # Enter
                break
            elif key == 27:  # ESC to cancel
                points = []
                break
        cv2.destroyAllWindows() # Close ROI window
        if len(points) < 3: # Return full image if no polygon selected
            print("Polygon not selected, returning full image")
            roi_img = image # Full image as ROI
            mask = np.ones_like(image, dtype=bool)
            coords = None
        else:
            mask_uint8 = np.zeros_like(image, dtype=np.uint8) # Create mask for polygon ROI with zeros
            cv2.fillPoly(mask_uint8, [np.array(points, dtype=np.int32)], 1) # Fill polygon area in the mask
            mask = mask_uint8.astype(bool) # Convert mask to boolean
            ys, xs = np.where(mask) # Get coordinates of the mask
            ymin, ymax = ys.min(), ys.max() # Get bounding box of the mask
            xmin, xmax = xs.min(), xs.max()
            roi_img = image[ymin:ymax, xmin:xmax] # Crop ROI from image
            coords = (xmin, ymin, xmax, ymax) # Get ROI coordinates
    else:
        raise ValueError("mode must be 'square' or 'polygon'") # Raise error for invalid mode
    return roi_img, mask, coords # Return ROI image, mask, and coordinates

def load_and_process_czi_singleROI(file_path, name, output_path, roi = ROI_num):
    """
    Loads and processes a fluorescent image (.czi file) with a single channel, 
    
    Args:
        file_path (str): Full path to the .czi file.
        name (str): File name used to derive the sample ID.
        output_path (str): Directory where output images will be saved.
        roi (int): Number of ROIs to be selected and processed per sample.

    Returns:
        tuple:
            image (ndarray): Raw image data after squeezing dimensions.
            roi_masks (list[ndarray]): List of binary masks for each selected ROI.
            roi_coords (list[tuple]): List of coordinates for each selected ROI.
            mip_image (ndarray): Normalized maximum-intensity projection (MIP) image in uint8.
            gray_image (ndarray): Greyscale MIP image.
            clahe_image (ndarray): CLAHE-enhanced greyscale image.
            otsu_mask (ndarray): Binary mask from Otsu thresholding.
            output_mip_path (str): Path for saving the MIP image.
            output_mask_path (str): Path for saving the Otsu mask image.
            mip_imageROI (list[ndarray]): List of MIP images cropped to each selected ROI.
            roi_grayROI (list[ndarray]): List of greyscale MIP images cropped to each selected ROI.
            roi_claheROI (list[ndarray]): List of CLAHE-enhanced images cropped to each selected ROI.
            roi_otsuROI (list[ndarray]): List of Otsu masks cropped to each selected ROI.
            output_mipROI_paths (list[str]): List of paths for saving each ROI MIP image.
            output_maskROI_paths (list[str]): List of paths for saving each ROI Otsu mask image.
        Returns (None, None, None, None) if processing fails.
    """
    # Defining lists to store ROI data
    roi_masks = []
    roi_coords = []
    mip_imageROI = []
    roi_grayROI = []
    roi_claheROI = []
    roi_otsuROI = []
    output_mipROI_paths = []
    output_maskROI_paths = []
    try:
        sampleID = name # Name of the sample
        with czifile.CziFile(file_path) as czi: 
            data = czi.asarray() # Read confocal image data from .czi file                        
        data_squeezed = np.squeeze(data) # Squeeze data to remove unwanted dimensions/metadata
        image = data_squeezed 
        mip_image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8) # Normalise image to uint8
        mip_image_rgb = cv2.cvtColor(mip_image, cv2.COLOR_GRAY2RGB) # Convert grayscale MIP to RGB
        gray_image = cv2.cvtColor(mip_image_rgb, cv2.COLOR_RGB2GRAY) # Convert RGB MIP to grayscale
        clahe_image = clahe.apply(gray_image) # Apply CLAHE for contrast enhacement
        _, otsu_mask = cv2.threshold(clahe_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Otsu's thresholding
        output_mip_path = os.path.join(output_path, f"{sampleID}.tiff") # Define output MIP path
        output_mask_path = os.path.join(output_path, f"{sampleID}_mask.tiff") # Define output mask path
        for i in range(roi): # Loop to selected number of ROIs
            _, mask, coords = select_roi(image, mode=ROI_mode) # Select ROIs
            roi_masks.append(mask) # Store ROI masks
            roi_coords.append(coords) # Store ROI coordinates
        for i, (mask, coords) in enumerate(zip(roi_masks, roi_coords)): # Process each ROI
            if coords is None: # If coordinates are empty
                print(f"No coordinates selected") # No ROI selected, entire image used
                continue
            x1, y1, x2, y2 = coords # Get ROI coordinates
            roi_mip = mip_image[y1:y2, x1:x2] # Crop MIP to ROI
            roi_gray = gray_image[y1:y2, x1:x2] # Crop grayscale MIP to ROI
            roi_clahe = clahe_image[y1:y2, x1:x2] # Crop CLAHE image to ROI
            roi_otsu = otsu_mask[y1:y2, x1:x2] # Crop Otsu mask to ROI
            mip_imageROI.append(roi_mip) # Store ROI MIP
            roi_grayROI.append(roi_gray) # Store ROI grayscale MIP
            roi_claheROI.append(roi_clahe) # Store ROI CLAHE image
            roi_otsuROI.append(roi_otsu) # Store ROI Otsu mask
            output_mipROI_path = os.path.join(output_path, f"{sampleID}_ROI{i}.tiff") # Define output ROI MIP path
            output_maskROI_path = os.path.join(output_path, f"{sampleID}_ROI{i}_mask.tiff") # Define output ROI mask path
            output_mipROI_paths.append(output_mipROI_path) # Store output ROI MIP path
            output_maskROI_paths.append(output_maskROI_path) # Store output ROI mask path
            display_and_save_mip_otsu(roi_gray, roi_clahe, roi_otsu, output_mipROI_path, output_maskROI_path) # Display and save ROI images
        return image, roi_masks, roi_coords, mip_image, gray_image, clahe_image, otsu_mask, output_mip_path, output_mask_path, mip_imageROI, roi_grayROI, roi_claheROI, roi_otsuROI, output_mipROI_paths, output_maskROI_paths
    except Exception as e:
        print(f"Error processing {sampleID}: {e}") # Print error message if processing fails
        return None, None, None, None
    
def save_plot_as_pdf(fig, output_path, dpi=pdf_dpi):
    """
    Save a Matplotlib figure to a PDF file.

    Args:
        fig (matplotlib.figure.Figure): Figure to save.
        output_path (str): Output path to save the PDF file.
        dpi (int): Resolution to use when saving.

    Returns:
        None
    """
    try:
        fig.savefig(output_path, format='pdf', dpi=dpi, bbox_inches='tight') # Save figure as PDF
        print(f"Plot saved as PDF: {output_path}")
    except Exception as e: # Print error message if saving fails
        print(f"Error saving plot as PDF: {e}")
        
def display_and_save_mip_otsu(gray_image, clahe_image, otsu_mask, output_mip_path, output_mask_path):
    """
    Display MIP, CLAHE, and Otsu mask panels and save images.
    
    Args:
        gray_image (ndarray): Greyscale MIP image.
        clahe_image (ndarray): CLAHE-enhanced greyscale image.
        otsu_mask (ndarray): Binary mask from Otsu thresholding.
        output_mip_path (str): Path for saving the MIP image.
        output_mask_path (str): Path for saving the Otsu mask image.

    Returns:
        None
    """
    fig, axes = plt.subplots(1, 3, figsize=(8, 5)) # Create figure with 3 panels
    axes[0].imshow(gray_image, cmap="gray") # Display MIP
    axes[0].set_title("Maximum Intensity Projection (MIP)")
    axes[0].axis("off")
    axes[1].imshow(clahe_image, cmap="gray") # Display CLAHE processed image
    axes[1].set_title("Clahe processed image")
    axes[1].axis("off")
    axes[2].imshow(otsu_mask, cmap="gray") # Display Otsu's threshold mask
    axes[2].set_title("Otsu’s Threshold Mask")
    axes[2].axis("off")
    plt.show() # Show the figure
    cv2.imwrite(output_mip_path, gray_image) # Save MIP image
    cv2.imwrite(output_mask_path, otsu_mask) # Save Otsu mask image
    print(f"Images saved: {output_mip_path} and {output_mask_path}")

def visualize_segmented_regions(gray_image, seg_im, cmap_name='hsv', min_vessel_size = min_vessel_size):
    """
    Visualise labelled vessel segments overlaid on the greyscale image.
    
    Args:
        gray_image (ndarray): Greyscale MIP image for background.
        seg_im (ndarray): Binary vessel segmentation mask.
        cmap_name (str): Matplotlib colormap name for segment colouring.
        min_vessel_size (int): Minimum segment size to keep (pixels).

    Returns:
        labeled_segments (ndarray): Labelled segmentation image with unique integer labels for each segment.
        Returns None on failure.
    """
    try:
        gray_image_norm = (gray_image - gray_image.min()) / (gray_image.max() - gray_image.min()) # Normalise grayscale image for better visualisation
        labeled_segments = label(seg_im) # Label each vessel segment
        num_labels = np.max(labeled_segments) # Get number of unique segments
        labeled_segments = remove_small_objects(labeled_segments, min_size=min_vessel_size) # Remove small vessel segments based on minimum vessel size
        if num_labels == 0: # Check if any segments are detected
            print("No segments detected.")
            return None
        cmap = matplotlib.colormaps.get_cmap(cmap_name) # Generate colours from the chosen colourmap
        colors = cmap(np.linspace(0, 1, num_labels))[:, :3] # Get RGB values for each label
        overlay = label2rgb(labeled_segments, image=gray_image_norm, bg_label=0, alpha=0.6, colors=colors) # Overlay coloured segments on grayscale image
        # Create a figure
        fig, ax = plt.subplots(figsize=(8, 6)) # Set figure size
        ax.imshow(overlay, cmap='gray') # Display overlay image
        ax.set_title("Vessel Segmentation Visualization")
        for region in regionprops(labeled_segments): # Loop through each labeled region
            centroid = region.centroid # Plot segment labels at centroids
            ax.text(centroid[1], centroid[0], str(region.label), color='white', fontsize=8, ha='center', va='center', fontweight='bold')
        ax.axis('off') # Remove axes
        plt.show() # Show the figure
        return labeled_segments  # Return labeled segments for further processing
    except Exception as e: 
        print(f"Error visualizing segmented regions: {e}") # Print error message if visualization fails
        return None
    
def remove_selected_segments(labeled_segments, remove_list):
    """
    Manual curation: Remove user-selected labelled segments from a segmentation mask.

    Args:
        labeled_segments (ndarray): Labelled segmentation image.
        remove_list (list[int] or None): Segment labels to remove. Use the segment numbers displayed in the visualisation for reference. If None, no segments will be removed.

    Returns:
        cleaned_mask (ndarray): Binary mask with curated segments 
        Returns None on failure.
    """
    try:
        mask = np.isin(labeled_segments, remove_list, invert=True) # Create mask to exclude selected segments
        cleaned_mask = labeled_segments * mask # Apply mask to labeled segments
        return cleaned_mask > 0 # Return manually curated binary mask
    except Exception as e:
        print(f"Error removing selected segments: {e}") # Print error message if removal fails
        return None

def segment_and_analyze_vessels(file_name, image, gray_image, clahe_image, otsu_mask, output_path, 
                                im_filter=seg_method, sigma1=sigma1, hole_size=hole_size, 
                                ditzle_size=ditzle_size, thresh=thresh):
    """
    Vessel segmentation, optional apply of manual curation and results visualisation
    
    Args:
        file_name (str): File name used to derive the sample ID.
        image (ndarray): Raw image data used for segmentation.
        gray_image (ndarray): Greyscale MIP image.
        clahe_image (ndarray): CLAHE-enhanced greyscale image.
        otsu_mask (ndarray): Binary mask from Otsu thresholding.
        output_path (str): Directory for saving figures.
        im_filter (str): Vessel enhancement filter name (meijering, frangi, sato or jerman).
        sigma1 (range): Sigma range for vessel enhancement.
        hole_size (int): Max hole size to fill in the mask.
        ditzle_size (int): Max size of small objects to remove.
        thresh (int): Threshold for binarisation of the enhanced image.

    Returns:
        tuple:
            vessel_seg (ndarray): Final vessel segmentation mask (curated if manual curation is applied).
            remove_list (list[int] or None): Labels removed by the user.
            num_labels (int): Number of labelled segments in the final segmentation mask.
        Returns None on failure.
    """
    try:
        # Segment vessels using specified filtering method and parameters
        vessel_seg = vm.segment_image(image,
                                      im_filter=im_filter, # Vessel segmentation method
                                      sigma1=sigma1, # Sigma range for vessel enhancement filter
                                      hole_size=hole_size, # Hole size to be filled in the vessel mask
                                      ditzle_size=ditzle_size, # Size of small objects to be removed from the vessel mask
                                      thresh=thresh) # Threshold value to binarise the vessel enhanced image
        seg_im = vessel_seg.astype(np.uint8) # Convert binary mask to uint8 format
        # Visualise initial segmentation results
        fig, axes = plt.subplots(1, 4, figsize=(15, 5)) # Create figure with 4 panels
        axes[0].imshow(gray_image, cmap="gray") # Display MIP
        axes[0].set_title("Maximum Intensity Projection (MIP)")
        axes[0].axis("off")
        axes[1].imshow(clahe_image, cmap="gray") # Display CLAHE processed image
        axes[1].set_title("Clahe processed image")
        axes[1].axis("off")
        axes[2].imshow(seg_im, cmap="gray") # Display vessel segmentation image
        axes[2].set_title("Vessel segmentation image")
        axes[2].axis("off")
        axes[3].imshow(otsu_mask, cmap="gray") # Display Otsu's threshold mask
        axes[3].set_title("Otsu’s Threshold Mask")
        axes[3].axis("off")
        plt.show() # Show the figure
        sampleID = file_name.split('.tiff')[0] 
        fig_title = f"{sampleID}_segmentation_mask_prefiltering" # Title for the figure
        output_pdf_path = os.path.join(output_path, f"{fig_title}.pdf") # Define output PDF path
        save_plot_as_pdf(fig, output_pdf_path) # Save figure as PDF
        # Visualise segmented regions with labels for manual curation    
        labeled_segments = visualize_segmented_regions(gray_image, seg_im, cmap_name='hsv', min_vessel_size = min_vessel_size) # Visualise segmented regions
        sampleID = file_name.split('.tiff')[0]
        fig_title = f"{sampleID}_segmentation_labelled" # Title for the figure
        output_pdf_path = os.path.join(output_path, f"{fig_title}.pdf") # Define output PDF path
        save_plot_as_pdf(fig, output_pdf_path) # Save figure as PDF
        # Manual curation: Remove unwanted segments based on user input
        remove_list = input("Enter segment numbers to remove (comma-separated) or press Enter to skip: ").strip() # Get user input for segments to remove
        if remove_list:
            remove_list = [int(x) for x in remove_list.split(",")] # Convert input string to list of integers
            cleaned_mask = remove_selected_segments(labeled_segments, remove_list) # Remove selected segments
            labeled_segments = label(cleaned_mask) # Relabel the manually curated mask
            num_labels = np.max(labeled_segments) # Calculate the maximum number of labelled ISVs
            vessel_seg = labeled_segments # Update vessel segmentation mask
        else:
            remove_list = None # No segments removed
            cleaned_mask = remove_selected_segments(labeled_segments, remove_list) # Keep original mask
            labeled_segments = label(cleaned_mask) # Relabel the manually curated mask
            num_labels = np.max(labeled_segments) # Calculate the maximum number of labelled ISVs
            vessel_seg = labeled_segments # Update vessel segmentation mask
        # Visualise manually curated segmentation results    
        print(f"Visualising filtered segmentation image")
        fig, axes = plt.subplots(1, 4, figsize=(15, 5)) # Create figure with 4 panels
        axes[0].imshow(gray_image, cmap="gray")
        axes[0].set_title("Maximum Intensity Projection (MIP)") # Display MIP
        axes[0].axis("off")
        axes[1].imshow(clahe_image, cmap="gray") # Display CLAHE processed image
        axes[1].set_title("Clahe processed image")
        axes[1].axis("off")
        axes[2].imshow(cleaned_mask, cmap="gray") # Display manually filtered vessel segmentation image
        axes[2].set_title("Filtered vessel segmentation image")
        axes[2].axis("off")
        axes[3].imshow(otsu_mask, cmap="gray") # Display Otsu's threshold mask
        axes[3].set_title("Otsu’s Threshold Mask")
        axes[3].axis("off")
        plt.show() # Show the figure
        sampleID = file_name.split('.tiff')[0]
        fig_title = f"{sampleID}_segmentation_mask" # Title for the figure
        output_pdf_path = os.path.join(output_path, f"{fig_title}.pdf") # Define output PDF path
        save_plot_as_pdf(fig, output_pdf_path) # Save figure as PDF  
        return vessel_seg, remove_list, num_labels
    except Exception as e:
        print(f"Error segmenting vessels for {file_name}: {e}") # Print error message if segmentation fails
        return None

def extract_isv_centroids(vessel_seg):
    """
    Extract centroids of labelled intersegmental vessel (ISV) segments and sort them left-to-right.
    
    Args:
        vessel_seg (ndarray): Labelled vessel segmentation mask with unique integer labels for each segment.
        
    Returns:
        centroids (ndarray): Array of centroids (y, x) for each ISV segment, sorted by increasing x-coordinate (left to right).
    """
    regions = regionprops(vessel_seg) # Extract connected vessel components (each ISV is a unique segment)
    centroids = np.array([r.centroid for r in regions])  # Extract centroid (y, x) of each region and convert list into array
    order = np.argsort(centroids[:, 1]) # Order ISV from left to right (by increasing x-coordinates)
    centroids = centroids[order] # Sort ISV position
    return centroids

def normalize_x_positions(centroids):
    """
    Normalise x-coordinates of centroids to the range [0, 1].
    This allows for comparison of ISV positions across samples with different sizes and scales.
    
    Args:
        centroids (ndarray): Array of centroids (y, x) for each ISV segment.
    
    Returns:
        numpy.ndarray: Normalised x-coordinates of the centroids, scaled to the range [0, 1].
    """
    x = centroids[:, 1] # Extract only x-coordinates
    return (x - x.min()) / (x.max() - x.min()) # Returns normalised x-centroids

def detect_lost_isvs(ref_centroids, target_centroids, tol = tolerance):
    """
    Identify ISVs that are missing in a target sample relative to a reference.
    
    Args:
        ref_centroids (ndarray): Array of centroids (y, x) for ISVs in the reference sample (e.g. wild type).
        target_centroids (ndarray): Array of centroids (y, x) for ISVs in the target sample (e.g. mutant).
        tol (float): Maximum normalised x-distance allowed for an ISV to count as "matched" between the reference and target samples. ISVs with no match within this tolerance are considered "lost".

    Returns:
        lost (ndarray): Boolean array indicating which ISVs in the reference sample are "lost" in the target sample (True for lost, False for matched).
        min_dist (ndarray): Array of minimum normalised x-distances from each reference ISV to the nearest target ISV
    """
    ref_x = normalize_x_positions(ref_centroids)       # Normalise ISV x positions to [0, 1] in the reference sample dataset (e.g. wild type)
    target_x = normalize_x_positions(target_centroids) # Normalise ISV x positions to [0, 1] in the target dataset (e.g. mutant)
    dist = cdist(ref_x[:, None], target_x[:, None])    # Compute pairwise Euclidean distances between the reference and target samples
    min_dist = dist.min(axis=1)  # For each reference ISV, find the nearest target ISV
    # tol: Maximum normalised x-distance allowed for an ISV to count as "matched"
    lost = min_dist > tol  # An ISV is "lost" if no ISV is within tolerance
    return lost, min_dist # returns ISV possitions and reference to compute probability of ISV loss

def plot_loss_probabilities(prob_summary):
    """
    Plot grouped bar chart of ISV loss probabilities for each group.
    
    Args:
        prob_summary (DataFrame): DataFrame containing ISV labels and loss probabilities for each group.
    
    Returns None
    """
    isvs = prob_summary["ISV"] # Extract ISV labels from the dataframe
    groups = [c for c in prob_summary.columns if c != "ISV"] # Extract group names (e.g. reference versus target samples)
    bar_width = 0.35 # Width of each bar in the grouped barplot
    x = np.arange(len(isvs)) # Create x-coordinate positions for each ISV
    plt.figure(figsize=(8, 6))
     # Colours assigned to each group for plotting
    colors = {
        ref_name: "steelblue",
        target_name: "firebrick"
    }
    # Loop through each group and plot its bar series
    for i, group in enumerate(groups):
        plt.bar(
            x + i * bar_width,                  # Shift each bar horizontally
            prob_summary[group],                # Height of the bar is defined by the probability of ISV loss
            width=bar_width,                    # Bar width
            label=group,                        # Labels for legend
            color=colors.get(group, "gray"),    # Create a predefined grey canvas to colour-coded on top based by group
            alpha=0.8                           # Reduce the opacity of the colours
        )
    plt.xticks(x + bar_width / 2, isvs, rotation=90)        # Set x-axis tick centered between grouped bars
    plt.ylabel("Probability of ISV loss by position (%)")   # Title for y-axis
    # plt.xlabel("ISV index (left → right)")                # Title for x-axis
    # plt.title("ISV Loss Probability per Group")           # Figure title
    plt.legend()                                            # Display figure legend within the plot
    # Remove top and right spines for visualisation purposes
    ax = plt.gca()
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    plt.tight_layout() # Adjust layout to avoid label cutoff
    title_fig = 'ISV_loss_probabilities.pdf'
    plt.savefig(title_fig, format="pdf")
    plt.show()         # Display figure

## <a id='toc3_3_'></a>[Perform ISV loss analysis](#toc0_)

### <a id='toc3_3_1_'></a>[Run samples in working directory](#toc0_)

In [None]:
# Define file name and extension. Collect all files in the directory that match the name prefix
file_names = [f for f in os.listdir(data_path) if f.startswith(name_start) and f.endswith(".czi")]

for file_name in file_names: # Loop through each file in the working directory
    file_path = os.path.join(data_path, file_name) # Build full path to the file
    sampleID = file_name.split('_MaxInt')[0] # Extract sample identifier 
    print(f"\n--- Processing {sampleID} ---")
    (image,                 # Original mage
     roi_masks,             # ROI mask
     roi_coords,            # ROI coordinates
     mip_image,             # Maximum intensity projection (MIP)
     gray_image,            # Greyscale MIP
     clahe_image,           # CLAHE-enhanced MIP
     otsu_mask,             # OTSU threshold mask
     output_mip_path,       # Output path for MIP
     output_mask_path,      # Output path for mask
     mip_imageROI,          # List of ROI MIP images
     roi_grayROI,           # List of ROI grayscale images
     roi_claheROI,          # List of ROI CLAHE images
     roi_otsuROI,           # List of ROI OTSU masks
     output_mipROI_paths,   # Output paths for ROI MIPs
     output_maskROI_paths,  # Output paths for ROI masks
    ) = load_and_process_czi_singleROI(file_path, file_name, output_path, roi = ROI_num) # Load and process the image (single channel)
    # Store all ROI-level outputs for later analysis
    all_mipROIs.extend(mip_imageROI)
    all_grayROIs.extend(roi_grayROI)
    all_claheROIs.extend(roi_claheROI)
    all_otsuROIs.extend(roi_otsuROI)
    output_ROI_paths.extend(output_mipROI_paths)
    output_ROImask_paths.extend(output_maskROI_paths)
    all_coords.extend(roi_coords)
    if mip_imageROI is None:
            continue      # Skip this file if ROI extraction failed

### <a id='toc3_3_2_'></a>[Generate ISV segmentation masks](#toc0_)

In [None]:
# Note: The user is reccommended to clear all jupyter outputs to save memory
for i in range(len(all_mipROIs)): # Loop through each ROI to perform vessel segmentation
    # Extract sample ID and filename from the ROI output path
    sampleID = output_ROI_paths[i].split("/")[-1].split('.tiff')[0]
    sample_name = output_ROI_paths[i].split("/")[-1]
    # Vessel segmentation within the defined ROI
    vessel_seg, remove_list, num_labels = segment_and_analyze_vessels(file_name=sample_name,        # Sample identifier
                                                                      image=all_mipROIs[i],         # MIP ROI
                                                                      gray_image=all_grayROIs[i],   # Greyscale ROI
                                                                      clahe_image=all_claheROIs[i], # CLAHE-enhanced ROI
                                                                      otsu_mask=all_otsuROIs[i],    # OTSU threshold ROI
                                                                      output_path=output_path,      # Path to output directory
                                                                      im_filter=seg_method,         # Vessel segmentation method
                                                                      sigma1=sigma1,                # Sigma for enhacement filtering
                                                                      hole_size=hole_size,          # Minimum hole size to fill
                                                                      ditzle_size=ditzle_size,      # Small object removal threshold
                                                                      thresh=thresh)                # Thresholding value
    seg_mask.append(vessel_seg)   # Collect all vessel segmentations
    ISV_labels.append(num_labels) # Collect the number of ISV per sample
    centr = extract_isv_centroids(vessel_seg) # Determine the centroid for each ISV
    centroids.append(centr) # Collect all ISV centroids
    # Creates a dictionary that collects ISV information
    embryo_data.append({
            "sample": sampleID,     # Sample identifier
            "centroids": centr,     # ISV centroid coordinates
            "isv_count": num_labels # Total number of ISV
        })

### <a id='toc3_3_3_'></a>[Calculate probability of ISV loss and save results](#toc0_)

In [None]:
# Note: Reference image is selected based on the maximum number of ISV
ref_embryo = max(embryo_data, key=lambda e: e["isv_count"]) # Select reference image 
ref_centroids = ref_embryo["centroids"][:30] # Collect the ISV centroids from the reference image and ensure no more than 30 ISV have been recognised
ref_n = len(ref_centroids) # Calculate the number of ISVs used for comparison
isv_loss_matrix = []  # Empty matrix that will store probability of ISV loss.
# Compare each embryo with the reference and classify groups
for e in embryo_data:
    lost, _ = detect_lost_isvs(ref_centroids, e["centroids"]) # Compute which ISVs are lost relative to reference embryo
    isv_loss_matrix.append(lost.astype(int)) # Append results into a matrix in a binary format (1 = lost, 0 = present)
    # Assign group label based on sample name
    if ref_name in e['sample']:
        e['group'] = 'reference'        # Reference sample
    elif target_name in e['sample']:
        e['group'] = 'target'           # Target sample
    else:
        e['group'] = 'Unknown'
    e['lost_isvs'] = lost # Store probability of ISV loss
isv_loss_matrix = np.array(isv_loss_matrix) # Convert list into array
loss_probability = isv_loss_matrix.mean(axis=0) # Calculate the probability of ISV loss across all samples
all_loss = np.array([r["lost_isvs"] for r in embryo_data]) # Extract lost ISV arrays for each embryo
df_loss = pd.DataFrame(all_loss, columns=[f"ISV_{i+1}" for i in range(ref_n)]) # Create DataFrame with one column per ISV
# Add metadata columns
df_loss.insert(0, "group", [r["group"] for r in embryo_data])
df_loss.insert(0, "sample", [r["sample"] for r in embryo_data])
df_loss.insert(0, "isv_count", [r["isv_count"] for r in embryo_data])
# Sort by group and compute mean probability loss for each ISV
prob_summary = (
    df_loss.drop(columns=["sample", "isv_count"]) # keep only ISV columns and group identifiers
        .groupby("group")
        .mean() # Mean loss per group
        .T      # Transpose so ISVs are rows
        .reset_index()
        .rename(columns={"index": "ISV"}) # Name ISV column
    )
plot_loss_probabilities(prob_summary)

# Save results as Excel file
with pd.ExcelWriter(output_name, engine='openpyxl') as writer:
    prob_summary.to_excel(writer, sheet_name="Summary", index=False) # Write first sheet with the summary results
    for i, embryo in enumerate(embryo_data): # Write individual embryo data into separate sheets
        df = pd.DataFrame({ # Create a dataframe for each embryo
            # 'ISV_index': np.arange(1, embryo['isv_count'] + 1),
            'Centroid_Y': embryo['centroids'][:, 0], # Store the y coordinate of the ISV centroid
            'Centroid_X': embryo['centroids'][:, 1], # Store the x coordinate of the ISV centroid
        })
        sheet_name = "_".join(embryo['sample'].split("_")[0:5]) # Create sheet name with embryo and ISV identifiers
        if len(embryo['sample']) < 30: # Use shorter names if sample names are too long
            sheet_name = embryo['sample'].split('.')[0][:30]
        df.to_excel(writer, sheet_name=sheet_name, index=False) # Save individual embryo data within the sheet