In [1]:
import os
import re
import numpy as np
import pandas as pd
from PIL import Image
from segment_anything import sam_model_registry, SamPredictor

In [2]:
# Load SAM Model
model_type = "vit_h"  # Model type can be "vit_h", "vit_l", or "vit_b"
sam = sam_model_registry[model_type](checkpoint="./sam_vit_h_4b8939.pth")
predictor = SamPredictor(sam)

  state_dict = torch.load(f)


In [3]:
def get_identifier(filename: str) -> str:
    """
    Extracts a unique identifier from a filename using a regular expression pattern.
    
    Args:
        filename (str): The filename of the image (e.g., 'A1_03_1_1_Phase Contrast_001.tif').
    
    Returns:
        str: A unique identifier based on the first two parts and last three digits of the filename.
             If the filename does not match the expected pattern, returns None.
    
    Example:
        For a filename "A1_03_1_1_Phase Contrast_001.tif", the function returns "A1_03_001".
    """
    match = re.match(r"^([A-D]\d+_\d+).*_(\d{3})\.tif$", filename)
    return f"{match.group(1)}_{match.group(2)}" if match else None

In [None]:
def process_images(input_dir: str, output_dir: str, 
                   save_masks: bool = False, save_masked_images: bool = False) -> pd.DataFrame:
    """
    Processes pairs of 'Phase Contrast' and 'GFP' images to generate a binary mask using SAM, 
    calculate GFP intensity within the mask, and optionally save mask and masked images.

    Steps:
    1. Matches pairs of 'Phase Contrast' and 'GFP' images based on unique identifiers.
    2. Uses SAM (Segment Anything Model) to create a binary mask of the zebrafish in the 'Phase Contrast' image.
    3. Calculates the mean and total GFP intensity within the masked zebrafish region in the corresponding GFP image.
    4. Saves the results to a DataFrame and optionally outputs masks and masked GFP images.

    Args:
        input_dir (str): Directory containing the input TIF images.
        output_dir (str): Directory to save output files (e.g., masks and results).
        save_masks (bool): If True, saves generated masks as PNG files for visual confirmation.
        save_masked_images (bool): If True, saves the masked GFP images to output directory.

    Returns:
        pd.DataFrame: A DataFrame containing GFP intensity data (mean and total) for each image pair.
    
    Example:
        process_images("input_directory", "output_directory", save_masks=True, save_masked_images=True)
    """
    # Create a subdirectory for phase masks if saving masks is enabled
    phase_mask_dir = os.path.join(output_dir, "phase_masks")
    if save_masks:
        os.makedirs(phase_mask_dir, exist_ok=True)

    # Collect Phase Contrast and GFP image files into dictionaries by unique identifier
    phase_contrast_files = {}  # Stores 'Phase Contrast' image paths by identifier
    gfp_files = {}             # Stores 'GFP' image paths by identifier
    
    # Loop through files in the input directory to identify and categorize images
    for file_name in os.listdir(input_dir):
        if file_name.endswith(".tif"):  # Only process .tif files
            identifier = get_identifier(file_name)  # Get unique identifier for pairing
            if identifier:
                if "Phase Contrast" in file_name:
                    phase_contrast_files[identifier] = file_name
                elif "GFP" in file_name:
                    gfp_files[identifier] = file_name

    # List to store calculated GFP intensity results for each identifier
    gfp_intensity_results = []

    # Loop through each identifier and process the image pairs
    for i, (identifier, phase_file) in enumerate(phase_contrast_files.items()):
        if identifier in gfp_files:  # Only process if both Phase Contrast and GFP images exist
            try:
                # Construct full file paths for Phase Contrast and GFP images
                phase_path = os.path.join(input_dir, phase_file)
                gfp_path = os.path.join(input_dir, gfp_files[identifier])
                
                # Load Phase Contrast image and convert to numpy array
                phase_image = Image.open(phase_path)
                phase_np = np.array(phase_image, dtype=np.uint16)  # Preserve original data type

                # Normalize Phase Contrast image to uint8 range for compatibility with SAM
                phase_np = (phase_np / phase_np.max() * 255).astype(np.uint8)
                phase_rgb = np.stack([phase_np] * 3, axis=-1)  # Convert grayscale to RGB

                # Set the normalized RGB image in SAM model to generate the mask
                predictor.set_image(phase_rgb)

                # Define a point at the center of the image for SAM mask generation
                input_point = np.array([[phase_rgb.shape[1] // 2, phase_rgb.shape[0] // 2]])
                input_label = np.array([1])  # Label '1' indicates foreground

                # Generate mask from SAM based on the central point input
                masks, scores, _ = predictor.predict(
                    point_coords=input_point,
                    point_labels=input_label,
                    multimask_output=False
                )
                mask = masks[0]  # Retrieve the primary mask from SAM's output

                # Save the mask as a PNG if save_masks is True
                if save_masks:
                    mask_output_path = os.path.join(phase_mask_dir, f"mask_{os.path.splitext(phase_file)[0]}.png")
                    Image.fromarray((mask * 255).astype(np.uint8)).save(mask_output_path)

                # Load GFP image and convert to numpy array
                gfp_image = Image.open(gfp_path)
                gfp_np = np.array(gfp_image, dtype=np.uint16)  # Keep original data type for intensity accuracy

                # Apply the mask to the GFP image to isolate the region within the fish
                gfp_values_within_fish = gfp_np[mask > 0]  # Masked GFP intensity values
                mean_gfp_intensity = gfp_values_within_fish.mean() if gfp_values_within_fish.size > 0 else 0
                total_gfp_intensity = gfp_values_within_fish.sum()

                # Save the masked GFP image if save_masked_images is True
                if save_masked_images:
                    gfp_masked = gfp_np * mask  # Apply mask to the GFP image array
                    output_path = os.path.join(output_dir, f"masked_{gfp_files[identifier]}.png")
                    Image.fromarray(gfp_masked.astype(np.uint16)).save(output_path)

                # Append the calculated GFP intensities to the results list
                gfp_intensity_results.append({
                    "Identifier": identifier,
                    "Mean_GFP_Intensity": mean_gfp_intensity,
                    "Total_GFP_Intensity": total_gfp_intensity
                })

                # Clear memory by deleting large arrays after processing
                del phase_np, phase_rgb, mask, gfp_np, gfp_values_within_fish

            except Exception as e:
                print(f"Error processing {identifier}: {e}")  # Report error without stopping execution

        # Print progress every 5 processed pairs
        if (i + 1) % 5 == 0:
            print(f"Processed {i + 1}/{len(phase_contrast_files)} image pairs.")

    # Convert the results to a DataFrame and save to a CSV file
    results_df = pd.DataFrame(gfp_intensity_results)
    csv_output_path = os.path.join(output_dir, "gfp_intensity_results.csv")
    results_df.to_csv(csv_output_path, index=False)
    
    print(f"Results saved to {csv_output_path}")
    return results_df

In [5]:
# Define paths for loading and saving images
input_dir = r"C:\Users\dave-\OneDrive - ZHAW\HS24\MoIm\MolecularIMaging\Images\test_images"
output_dir = r"C:\Users\dave-\OneDrive - ZHAW\HS24\MoIm\MolecularIMaging\Images\Output_test_images"
phase_mask_dir = os.path.join(output_dir, "phase_masks")
os.makedirs(output_dir, exist_ok=True)

In [6]:
# Run the function on the directory with options for saving images and masked images
gfp_intensity_results_df = process_images(input_dir, output_dir, save_masks=True, save_masked_images=True)

Results saved to C:\Users\dave-\OneDrive - ZHAW\HS24\MoIm\MolecularIMaging\Images\Output_test_images\gfp_intensity_results.csv
