# Configure Phenotype Parameters

This notebook should be used as a test for ensuring correct phenotype image loading and processing before running phenotype module.
Cells marked with <font color='red'>SET PARAMETERS</font> contain crucial variables that need to be set according to your specific experimental setup and data organization.
Please review and modify these variables as needed before proceeding with the analysis.

## <font color='red'>SET PARAMETERS</font>

### Fixed parameters for phenotype processing

- `CONFIG_FILE_PATH`: Path to a Brieflow config file used during processing. Absolute or relative to where workflows are run from.

In [2]:
CONFIG_FILE_PATH = "config/config.yml"

## Imports

In [None]:
from pathlib import Path

import yaml
import numpy as np
from tifffile import imread
import matplotlib.pyplot as plt
from microfilm.microplot import Microimage
from skimage import measure

from lib.shared.configuration_utils import (
    CONFIG_FILE_HEADER,
    create_micropanel,
    random_cmap,
    image_segmentation_annotations,
    convert_tuples_to_lists,
)
from lib.shared.file_utils import get_filename
from lib.shared.illumination_correction import apply_ic_field
from lib.phenotype.align_channels import align_phenotype_channels, visualize_phenotype_alignment
from lib.shared.align import apply_custom_offsets
from lib.phenotype.identify_cytoplasm_cellpose import (
    identify_cytoplasm_cellpose,
)

## <font color='red'>SET PARAMETERS</font>

### Parameters for testing phenotype processing

- `TEST_PLATE`, `TEST_WELL`, `TEST_TILE`: Plate/well/tile combination used for configuring parameters in this notebook.

### Channels
- `CHANNEL_NAMES`: A list of names for each channel in your phenotyping image. These names will be used in the output data frame to label the features extracted from each channel.
- `CHANNEL_CMAPS`: A list of color maps to use when showing channel microimages. These need to be a Matplotlib or microfilm colormap. We recommend using: `["pure_red", "pure_green", "pure_blue", "pure_cyan", "pure_magenta", "pure_yellow"]`.

### Feature Extraction

- `FOCI_CHANNEL`: Name of the channel used for foci detection (e.g., "GH2AX", "DAPI"). The channel index will be automatically derived from this name.

In [None]:
# Parameters for testing
TEST_PLATE = None
TEST_WELL = None
TEST_TILE = None
WILDCARDS = dict(well=TEST_WELL, tile=TEST_TILE)

CHANNEL_NAMES = None
CHANNEL_CMAPS = None

# Parameters for feature extraction
FOCI_CHANNEL = None

In [None]:
# Load config file
with open(CONFIG_FILE_PATH, "r") as config_file:
    config = yaml.safe_load(config_file)

# Load test image data
print("Loading test image...")
ROOT_FP = Path(config["all"]["root_fp"])
PREPROCESS_FP = ROOT_FP / "preprocess"
phenotype_test_image_path = str(
    PREPROCESS_FP
    / "images"
    / "phenotype"
    / get_filename(
        {"plate": TEST_PLATE, "well": TEST_WELL, "tile": TEST_TILE},
        "image",
        "tiff",
    )
)
phenotype_test_image = imread(phenotype_test_image_path)

print("Applying illumination correction...")
# Read the illumination correction file
ic_field_path = str(
    PREPROCESS_FP
    / "ic_fields"
    / "phenotype"
    / get_filename({"plate": TEST_PLATE, "well": TEST_WELL}, "ic_field", "tiff")
)
ic_field = imread(ic_field_path)

# Apply illumination correction
corrected_image = apply_ic_field(phenotype_test_image, correction=ic_field)

# Create and display micropanel of corrected images
print("Example corrected image:")
corrected_microimages = [
    Microimage(
        corrected_image[i], channel_names=CHANNEL_NAMES[i], cmaps=CHANNEL_CMAPS[i]
    )
    for i in range(corrected_image.shape[0])
]
corrected_panel = create_micropanel(corrected_microimages, add_channel_label=True)
plt.show()

## <font color='red'>SET PARAMETERS</font>

### Aligning (optional)

- `ALIGN`: Whether to conduct alignment. This is suggested **unless** each image is captured with each channel consecutively. 
- `TARGET`: Name of the channel that other channels will be aligned to.
- `SOURCE`: Name of the channel to align with the target.
- `RIDERS`: Additional channel indices that should follow the same alignment as the source channel.
- `REMOVE_CHANNEL`: Specifies whether to remove channels after alignment. In the case of duplicate channels that are used to align the image, should be set to `source`.
- `UPSAMPLE_FACTOR`: Subpixel alignment precision factor (default: 2). Higher values provide more precise alignment but increase processing time.
- `WINDOW`: Size of the region used for alignment calculation (default: 2). Higher values use a smaller centered region of the image.

**Note for multi-round phenotyping**: For more than 2 imaging cycles (e.g., 3 rounds with repeated DAPI channels), perform sequential alignments by calling `align_phenotype_channels` multiple times in the next cell. Each round should align its channels to the same reference (e.g., the first DAPI).

### Custom Alignment (optional)

- `CUSTOM_CHANNEL_OFFSETS`: Dict mapping channel names to their (y, x) pixel offsets. Can be used independently or in combination with standard alignment for fine-tuning channel registration. Example: `{"DAPI": (5, 10), "AF750": (3, -2)}` shifts DAPI by 5 pixels up and 10 left, AF750 by 3 up and 2 right. Channel names must match those in `CHANNEL_NAMES`. Offset directions: +y = up, -y = down, +x = left, -x = right.

In [None]:
# Set alignment parameters
ALIGN = None
TARGET = None
SOURCE = None
RIDERS = None
REMOVE_CHANNEL = None
UPSAMPLE_FACTOR = 2
WINDOW = 2

# Set custom channel offsets (use channel names, not indices)
CUSTOM_CHANNEL_OFFSETS = None  # Example: {"DAPI": (5, 10), "AF750": (3, -2)}

# Derive alignment indexes
if ALIGN:
    TARGET_INDEX = CHANNEL_NAMES.index(TARGET)
    SOURCE_INDEX = CHANNEL_NAMES.index(SOURCE)
    RIDER_INDEXES = [CHANNEL_NAMES.index(r) for r in RIDERS]

# Derive custom alignment indexes from channel names
if CUSTOM_CHANNEL_OFFSETS:
    CUSTOM_CHANNEL_OFFSETS_INDEXED = {
        CHANNEL_NAMES.index(name): offset 
        for name, offset in CUSTOM_CHANNEL_OFFSETS.items()
    }

In [None]:
# Start with the corrected image
aligned_image = corrected_image.copy()

# Apply custom offsets 
if CUSTOM_CHANNEL_OFFSETS:
    print(f"Custom offsets: {CUSTOM_CHANNEL_OFFSETS_INDEXED}")
    aligned_image = apply_custom_offsets(
        aligned_image,
        offsets_dict=CUSTOM_CHANNEL_OFFSETS_INDEXED
    )

# Apply automatic alignment
if ALIGN:
    aligned_image = align_phenotype_channels(
        aligned_image,
        target=TARGET_INDEX,
        source=SOURCE_INDEX,
        riders=RIDER_INDEXES,
        remove_channel=REMOVE_CHANNEL,
        upsample_factor=UPSAMPLE_FACTOR,
        window=WINDOW,
        verbose=True,
    )
    # Automatically remove channels based on REMOVE_CHANNEL
    if REMOVE_CHANNEL == "source":
        remove_index = CHANNEL_NAMES.index(SOURCE)
        CHANNEL_NAMES.pop(remove_index)
        CHANNEL_CMAPS.pop(remove_index)
    elif REMOVE_CHANNEL == "target":
        remove_index = CHANNEL_NAMES.index(TARGET)
        CHANNEL_NAMES.pop(remove_index)
        CHANNEL_CMAPS.pop(remove_index)
    elif REMOVE_CHANNEL == "riders":
        # Remove riders in reverse order to maintain correct indices
        for rider in reversed(RIDERS):
            remove_index = CHANNEL_NAMES.index(rider)
            CHANNEL_NAMES.pop(remove_index)
            CHANNEL_CMAPS.pop(remove_index)

### Visualize Alignment Quality (Optional)

Visualize channel alignment across 16 locations in the image. The first channel (DAPI) is shown in grayscale with the remaining 3 channels as an RGB overlay. You may want to consider removing channels for a first pass if you want to visualize alignment between different rounds.

- `VIZ_CHANNELS`: List of exactly 4 channel names to visualize (1st=grayscale base, 2nd-4th=RGB overlay)

In [None]:
# Set channels to visualize (first=grayscale, remaining 3=RGB overlay)
VIZ_CHANNELS = None

if VIZ_CHANNELS is not None:
    print("Visualizing alignment across 16 locations...")
    fig = visualize_phenotype_alignment(
        aligned_image,
        channel_names=CHANNEL_NAMES,
        viz_channels=VIZ_CHANNELS,
        crop_size=300
    )
    plt.show()
else:
    print("Skipping visualization (VIZ_CHANNELS not set)")

## <font color='red'>SET PARAMETERS</font>

### Segmentation

**IMPORTANT: GPU Recommendation for CPSAM**
If testing the CPSAM model (`CELLPOSE_MODEL="cpsam"`), we strongly recommend:
- Using a GPU-enabled machine (`GPU=True`)
- Allocating sufficient time (segmentation can take 30+ minutes per tile)
- Consider running this notebook in a GPU-enabled environment or testing on a smaller region

#### Select Segmentation Method
- `SEGMENTATION_METHOD`: Choose from "cellpose" or "stardist" for cell segmentation.

#### Cellpose Parameters (if using "cellpose")
- `CELLPOSE_MODEL`: CellPose model to use. Options: "cyto3" (default), "cyto2", "cyto", or "cpsam" (requires Cellpose 4.x).
- `CELL_FLOW_THRESHOLD` & `NUCLEI_FLOW_THRESHOLD`: Flow threshold for Cellpose segmentation. Default is 0.4.
- `CELL_CELLPROB_THRESHOLD` & `NUCLEI_CELLPROB_THRESHOLD`: Cell probability threshold for Cellpose. Default is 0.
- `HELPER_INDEX`: (Optional) Index of additional channel to help with CPSAM segmentation. Only used with `CELLPOSE_MODEL="cpsam"`. Default is None.
- Note: For Cellpose 3.x models (cyto3, cyto2), nuclei and cell diameters will be estimated automatically. For CPSAM (Cellpose 4.x), diameters can be left as None and will be estimated from initial segmentation results.

#### StarDist Parameters (if using "stardist")
- `STARDIST_MODEL`: StarDist model type. Default is "2D_versatile_fluo".
- `CELL_PROB_THRESHOLD` & `NUCLEI_PROB_THRESHOLD`: Probability threshold for segmentation. Default is 0.479071.
- `CELL_NMS_THRESHOLD` & `NUCLEI_NMS_THRESHOLD`: Non-maximum suppression threshold. Default is 0.3.

In [None]:
# Common parameters
CYTO_CHANNEL = None
GPU = False
RECONCILE = "contained_in_cells"
DAPI_INDEX = CHANNEL_NAMES.index("DAPI")
CYTO_INDEX = CHANNEL_NAMES.index(CYTO_CHANNEL)

# Select segmentation method
SEGMENTATION_METHOD = "cellpose"

if SEGMENTATION_METHOD == "cellpose":
    # Parameters for CellPose method
    CELLPOSE_MODEL = "cyto3"
    NUCLEI_FLOW_THRESHOLD = 0.4
    NUCLEI_CELLPROB_THRESHOLD = 0.0
    CELL_FLOW_THRESHOLD = 1
    CELL_CELLPROB_THRESHOLD = 0
    HELPER_INDEX = None  # Optional: channel index to help with CPSAM segmentation

    # Only estimate diameters for non-CPSAM models
    if CELLPOSE_MODEL != "cpsam":
        from lib.shared.segment_cellpose import estimate_diameters
        print("Estimating optimal cell and nuclei diameters...")
        NUCLEI_DIAMETER, CELL_DIAMETER = estimate_diameters(
            aligned_image,
            dapi_index=DAPI_INDEX,
            cyto_index=CYTO_INDEX,
            cellpose_model=CELLPOSE_MODEL,
        )
    else:
        print("CPSAM model selected. Initial diameters set to None.")
        print("Note: Diameters will be estimated automatically from segmentation results in the next cell.")
        NUCLEI_DIAMETER = None  # Will be estimated from segmentation
        CELL_DIAMETER = None    # Will be estimated from segmentation

elif SEGMENTATION_METHOD == "stardist":
    # Parameters for StarDist method
    STARDIST_MODEL = "2D_versatile_fluo"
    NUCLEI_PROB_THRESHOLD = 0.479071
    NUCLEI_NMS_THRESHOLD = 0.3
    CELL_PROB_THRESHOLD = 0.479071
    CELL_NMS_THRESHOLD = 0.3

In [None]:
print(f"Segmenting image with {SEGMENTATION_METHOD}...")

if SEGMENTATION_METHOD == "cellpose":
    from lib.shared.segment_cellpose import segment_cellpose
    nuclei, cells = segment_cellpose(
        aligned_image,
        dapi_index=DAPI_INDEX,
        cyto_index=CYTO_INDEX,
        nuclei_diameter=NUCLEI_DIAMETER,
        cell_diameter=CELL_DIAMETER,
        cellpose_kwargs=dict(
            nuclei_flow_threshold=NUCLEI_FLOW_THRESHOLD,
            nuclei_cellprob_threshold=NUCLEI_CELLPROB_THRESHOLD,
            cell_flow_threshold=CELL_FLOW_THRESHOLD,
            cell_cellprob_threshold=CELL_CELLPROB_THRESHOLD,
        ),
        cellpose_model=CELLPOSE_MODEL,
        helper_index=HELPER_INDEX,
        gpu=GPU,
        reconcile=RECONCILE,
    )

elif SEGMENTATION_METHOD == "stardist":
    from lib.shared.segment_stardist import segment_stardist
    nuclei, cells = segment_stardist(
        aligned_image,
        dapi_index=DAPI_INDEX,
        cyto_index=CYTO_INDEX,
        model_type=STARDIST_MODEL,
        stardist_kwargs=dict(
            nuclei_prob_threshold=NUCLEI_PROB_THRESHOLD,
            nuclei_nms_threshold=NUCLEI_NMS_THRESHOLD,
            cell_prob_threshold=CELL_PROB_THRESHOLD,
            cell_nms_threshold=CELL_NMS_THRESHOLD,
        ),
        gpu=GPU,
        reconcile=RECONCILE,
    )

# Create and display micropanel of nuclei segmentation
print("Example microplots for DAPI channel and nuclei segmentation:")
nuclei_cmap = random_cmap(num_colors=len(np.unique(nuclei)))
nuclei_seg_microimages = [
    Microimage(
        aligned_image[DAPI_INDEX],
        channel_names="DAPI",
        cmaps=CHANNEL_CMAPS[DAPI_INDEX],
    ),
    Microimage(nuclei, cmaps=nuclei_cmap, channel_names="Nuclei"),
]
nuclei_seg_panel = create_micropanel(nuclei_seg_microimages, add_channel_label=True)
plt.show()

# Create and display micropanel of segmented cells
print("Example microplots for merged channels and cells segmentation:")
cells_cmap = random_cmap(num_colors=len(np.unique(cells)))
cells_seg_microimages = [
    Microimage(
        aligned_image,
        channel_names="Merged",
        cmaps=CHANNEL_CMAPS,
    ),
    Microimage(cells, cmaps=cells_cmap, channel_names="Cells"),
]
cells_seg_panel = create_micropanel(cells_seg_microimages, add_channel_label=True)
plt.show()

# Create and display micropanel of annotated phenotype data
print("Example microplot for phenotype data annotated with segmentation:")
annotated_data = image_segmentation_annotations(aligned_image, nuclei, cells)
annotated_microimage = [
    Microimage(
        annotated_data, channel_names="Merged", cmaps=CHANNEL_CMAPS + ["pure_cyan"]
    )
]
annotated_panel = create_micropanel(
    annotated_microimage, num_cols=1, figscaling=10, add_channel_label=False
)
plt.show()

# Create and display micropanel of cytoplasms
print("Example microplots for cytoplasms relative to nuclei:")
cytoplasms = identify_cytoplasm_cellpose(nuclei, cells)
cytoplasms_cmap = random_cmap(num_colors=len(np.unique(cytoplasms)))
cytoplasms_microimages = [
    Microimage(nuclei, cmaps=nuclei_cmap, channel_names="Nuclei"),
    Microimage(cytoplasms, cmaps=cytoplasms_cmap, channel_names="Cytoplasms"),
]
cytoplasms_panel = create_micropanel(cytoplasms_microimages, add_channel_label=True)
plt.show()

if SEGMENTATION_METHOD == "cellpose" and CELLPOSE_MODEL == "cpsam":
    from skimage.measure import regionprops
    import numpy as np

    # Calculate nuclei diameters
    nuclei_props = regionprops(nuclei)
    nuclei_diameters = [prop.equivalent_diameter for prop in nuclei_props]
    estimated_nuclei_diameter = np.mean(nuclei_diameters)
    print(f"Nuclei - Average diameter: {estimated_nuclei_diameter:.2f} pixels")

    # Calculate cell diameters  
    cells_props = regionprops(cells)
    cells_diameters = [prop.equivalent_diameter for prop in cells_props]
    estimated_cell_diameter = np.mean(cells_diameters)
    print(f"Cells - Average diameter: {estimated_cell_diameter:.2f} pixels")
    
    # Update the diameter variables for config
    NUCLEI_DIAMETER = estimated_nuclei_diameter
    CELL_DIAMETER = estimated_cell_diameter
    print(f"\nUpdated NUCLEI_DIAMETER to {NUCLEI_DIAMETER:.2f} pixels")
    print(f"Updated CELL_DIAMETER to {CELL_DIAMETER:.2f} pixels")

Note: You may want to adjust these parameters and run segmentation tests if you feel you are capturing too little or too much area for the masks. For cellpose, the nuclei and cell diameters will be automatically estimated, but can be manually adjusted if needed. You manually can set `NUCLEI_DIAMETER` and `CELL_DIAMETER` and rerun the above blocks as many times as needed.

## <font color='red'>SET PARAMETERS</font>

### Feature extraction

- `CP_METHOD`: Methodology for phenotype feature extraction.  
    - `cp_multichannel`: Use emulated code from original _Feldman et. al. 2019_ to extract CellProfiler-like features.
    - `cp_measure`: Use Pythonic version of [CellProfiler](https://github.com/afermg/cp_measure) directly from Imaging Platform. Still in development, may run slowly in Jupyter notebook for testing purposes.

In [None]:
CP_METHOD = None

In [None]:
print("Extracting phenotype features:")

# Compute foci channel index from channel name
if FOCI_CHANNEL:
    FOCI_CHANNEL_INDEX = CHANNEL_NAMES.index(FOCI_CHANNEL)
else:
    FOCI_CHANNEL_INDEX = None

if CP_METHOD == "cp_measure":
    from lib.phenotype.extract_phenotype_cp_measure import extract_phenotype_cp_measure
    # Extract features using cp_measure
    phenotype_cp = extract_phenotype_cp_measure(
        aligned_image,
        nuclei=nuclei,
        cells=cells,
        cytoplasms=cytoplasms,
        channel_names=CHANNEL_NAMES,
    )
else:
    from lib.phenotype.extract_phenotype_cp_multichannel import (
        extract_phenotype_cp_multichannel,
    )
    # Extract features using CellProfiler emulator
    phenotype_cp = extract_phenotype_cp_multichannel(
        aligned_image,
        nuclei=nuclei,
        cells=cells,
        wildcards=WILDCARDS,
        cytoplasms=cytoplasms,
        foci_channel=FOCI_CHANNEL_INDEX,
        channel_names=CHANNEL_NAMES,
    )

phenotype_cp

In [None]:
# Remove channel names from feature names
def remove_channel_name(feature, channels):
    for channel in channels:
        feature = feature.replace(f"_{channel}", "")
    return feature


# Remove label, well, tile and isolate remaining feature names
filtered_features = [
    feature
    for feature in phenotype_cp.columns.tolist()
    if feature not in ["label", "well", "tile"]
]

# Apply the function to remove channel names
feature_types = [
    remove_channel_name(feature, CHANNEL_NAMES) for feature in filtered_features
]

# Get unique feature types
unique_feature_types = sorted(set(feature_types))

print("Unique feature types:")
unique_feature_types

## <font color='red'>SET PARAMETERS</font>

### Secondary object detection (optional)

- `SECOND_OBJ_DETECTION`: Whether to perform secondary object detection (e.g., intracellular pathogen, organelles).
- `SECOND_OBJ_CHANNEL`: Name of the channel used for secondary object detection.
- `SECOND_OBJ_METHOD`: Segmentation method to use. Options:
  - `"threshold"`: Traditional thresholding-based approach
  - `"cellpose"`: ML-based segmentation using Cellpose
  - `"stardist"`: ML-based segmentation using StarDist

#### Size Filtering (applies to all methods)
- `SIZE_FILTER_METHOD`: Method for size filtering. Options:
  - `"feret"`: Use Feret diameters (min and max widths of rotated bounding box).
  - `"area"`: Use pixel area.
- `SECOND_OBJ_MIN_SIZE`: Minimum size for valid secondary objects. Interpreted as Feret diameter or area depending on `SIZE_FILTER_METHOD`.
- `SECOND_OBJ_MAX_SIZE`: Maximum size for valid secondary objects.


#### Cell Association (applies to all methods)
- `MAX_OBJECTS_PER_CELL`: Maximum secondary objects allowed per cell.
- `OVERLAP_THRESHOLD`: Minimum overlap ratio to associate object with cell.
- `MAX_TOTAL_OBJECTS`: Failsafe limit on detected objects. Returns empty results if exceeded to avoid processing over-segmented images.

#### Cellpose Parameters (if `SECOND_OBJ_METHOD="cellpose"`)
- `SECOND_OBJ_CELLPOSE_MODEL`: Cellpose model type. Options: `"cyto3"` (default), `"cyto2"`, `"cyto"`, `"nuclei"`, etc.
- `SECOND_OBJ_DIAMETER`: Expected diameter of objects in pixels. If `None`, will be estimated automatically.
- `SECOND_OBJ_FLOW_THRESHOLD`: Flow error threshold for Cellpose segmentation (default: 0.4).
- `SECOND_OBJ_CELLPROB_THRESHOLD`: Cell probability threshold for Cellpose (default: 0.0).

#### StarDist Parameters (if `SECOND_OBJ_METHOD="stardist"`)
- `SECOND_OBJ_STARDIST_MODEL`: StarDist pretrained model name (default: `"2D_versatile_fluo"`).
- `SECOND_OBJ_PROB_THRESHOLD`: Probability threshold for object detection (default: 0.5).
- `SECOND_OBJ_NMS_THRESHOLD`: Non-maximum suppression threshold (default: 0.4).

#### Threshold Method Parameters (if `SECOND_OBJ_METHOD="threshold"`)

**Pre-processing**
- `THRESHOLD_SMOOTHING_SCALE`: Sigma for Gaussian smoothing before thresholding.
- `THRESHOLD_METHOD`: Thresholding method to use. Options:
  - `"otsu_two_peak"`: Standard 2-class Otsu thresholding.
  - `"otsu_three_peak_mid_bg"`: 3-class Otsu, keeps only highest intensity class.
  - `"otsu_three_peak_mid_fg"`: 3-class Otsu, keeps middle and high intensity classes.
  - `"min_cross_entropy"`: Minimum cross entropy (Li) thresholding.
- `USE_MORPHOLOGICAL_OPENING`: Apply morphological opening to separate weakly connected objects.
- `OPENING_DISK_RADIUS`: Radius of disk structuring element for morphological opening.
- `FILL_HOLES`: When to fill holes in segmented objects. Options:
  - `"threshold"`: Fill holes only after thresholding (before declumping)
  - `"declump"`: Fill holes only after declumping (per-label filling)
  - `"both"`: Fill holes after both thresholding and declumping (recommended)
  - `"none"`: Do not fill holes at any stage

**Declumping Method**
- `DECLUMP_METHOD`: Method for separating clumped objects. Options:
  - `"none"`: No declumping.
  - `"shape"`: Distance transform peaks (radial distance).
  - `"intensity"`: Local intensity maxima.
  - `"shape_intensity"`: Combined distance + intensity peaks.
- `DECLUMP_MODE`: Watershed segmentation mode. Options:
  - `"watershed"`: Standard watershed from markers
  - `"propagate"`: Distance propagation variant
  - `"none"`: Use markers only without watershed

**Seed Detection**
- `SUPPRESS_LOCAL_MAXIMA`: Minimum spacing between seed points in pixels. Controls spatial separation of detected peaks. Default: 20. Decrease if objects are being merged together. Increase if objects are being over-split.
- `MAXIMA_REDUCTION_FACTOR`: H-minima threshold for suppressing weak peaks (range: 0.0-1.0). Higher values = more aggressive suppression. If None, no h-minima filtering applied. Applied during seed detection (before watershed).

**Shape Refinement**
- `USE_SHAPE_REFINEMENT`: Apply boundary quality control after declumping. When enabled, evaluates watershed splits and rejects splits where the dividing boundary is long relative to perimeter.
- `PROPORTION_THRESHOLD`: Boundary/perimeter ratio threshold for shape refinement. Only used when `USE_SHAPE_REFINEMENT=True`. Splits accepted if boundary_length / perimeter < proportion_threshold.

In [None]:
# Set secondary object parameters
SECOND_OBJ_DETECTION = False
SECOND_OBJ_CHANNEL = None
SECOND_OBJ_METHOD = None  # "threshold", "cellpose", or "stardist"

# Common parameters (apply to all methods)
SECOND_OBJ_MIN_SIZE = None
SECOND_OBJ_MAX_SIZE = None
SIZE_FILTER_METHOD = None
MAX_OBJECTS_PER_CELL = None
OVERLAP_THRESHOLD = None
MAX_TOTAL_OBJECTS = None

# Cellpose parameters (only used if SECOND_OBJ_METHOD == "cellpose")
SECOND_OBJ_CELLPOSE_MODEL = "cyto3"
SECOND_OBJ_DIAMETER = None  # None = auto-estimate, or specify in pixels
SECOND_OBJ_FLOW_THRESHOLD = 0.4
SECOND_OBJ_CELLPROB_THRESHOLD = 0.0

# StarDist parameters (only used if SECOND_OBJ_METHOD == "stardist")
SECOND_OBJ_STARDIST_MODEL = "2D_versatile_fluo"
SECOND_OBJ_PROB_THRESHOLD = 0.5
SECOND_OBJ_NMS_THRESHOLD = 0.4

# Threshold method parameters (only used if SECOND_OBJ_METHOD == "threshold")
RETURN_INTERMEDIATE_OUTPUTS = False
THRESHOLD_SMOOTHING_SCALE = None
THRESHOLD_METHOD = None
USE_MORPHOLOGICAL_OPENING = False
OPENING_DISK_RADIUS = None
FILL_HOLES = None
DECLUMP_METHOD = None
DECLUMP_MODE = None
SUPPRESS_LOCAL_MAXIMA = None
MAXIMA_REDUCTION_FACTOR = None
USE_SHAPE_REFINEMENT = None
PROPORTION_THRESHOLD = None

# Derive secondary object channel index from CHANNEL_NAMES
if SECOND_OBJ_DETECTION:
    SECOND_OBJ_CHANNEL_INDEX = CHANNEL_NAMES.index(SECOND_OBJ_CHANNEL)
    
    # Optionally estimate diameter for Cellpose if set to None
    if SECOND_OBJ_METHOD == "cellpose" and SECOND_OBJ_DIAMETER is None:
        from lib.phenotype.segment_secondary_object import estimate_second_obj_diameter
        
        print(f"Estimating diameter for secondary objects in {SECOND_OBJ_CHANNEL} channel...")
        SECOND_OBJ_DIAMETER = estimate_second_obj_diameter(
            aligned_image,
            SECOND_OBJ_CHANNEL_INDEX,
            method="cellpose",
            model_type=SECOND_OBJ_CELLPOSE_MODEL,
            gpu=GPU
        )

In [None]:
# Segment secondary objects if enabled
if SECOND_OBJ_DETECTION:
    print(f"Performing secondary object segmentation with {SECOND_OBJ_CHANNEL} using {SECOND_OBJ_METHOD} method...")
    
    # Prepare nuclei centroids for distance calculations (optional)
    nuclei_regions = measure.regionprops(nuclei)
    nuclei_centroids_dict = {region.label: region.centroid for region in nuclei_regions}

    if SECOND_OBJ_METHOD in ["cellpose", "stardist"]:
        # ML-based segmentation
        from lib.phenotype.segment_secondary_object import (
            segment_second_objs_ml,
            create_second_obj_boundary_visualization,
            create_second_obj_standard_visualization,
        )
        
        # Build ML parameters based on method
        ml_params = {
            'second_obj_method': SECOND_OBJ_METHOD,
            'gpu': GPU,
        }
        
        if SECOND_OBJ_METHOD == "cellpose":
            ml_params.update({
                'second_obj_cellpose_model': SECOND_OBJ_CELLPOSE_MODEL,
                'second_obj_diameter': SECOND_OBJ_DIAMETER,
                'second_obj_flow_threshold': SECOND_OBJ_FLOW_THRESHOLD,
                'second_obj_cellprob_threshold': SECOND_OBJ_CELLPROB_THRESHOLD,
            })
        elif SECOND_OBJ_METHOD == "stardist":
            ml_params.update({
                'second_obj_stardist_model': SECOND_OBJ_STARDIST_MODEL,
                'second_obj_prob_threshold': SECOND_OBJ_PROB_THRESHOLD,
                'second_obj_nms_threshold': SECOND_OBJ_NMS_THRESHOLD,
            })
        
        # Call ML segmentation
        result = segment_second_objs_ml(
            image=aligned_image,
            second_obj_channel_index=SECOND_OBJ_CHANNEL_INDEX,
            cell_masks=cells,
            cytoplasm_masks=cytoplasms,
            second_obj_min_size=SECOND_OBJ_MIN_SIZE,
            second_obj_max_size=SECOND_OBJ_MAX_SIZE,
            size_filter_method=SIZE_FILTER_METHOD,
            max_objects_per_cell=MAX_OBJECTS_PER_CELL,
            overlap_threshold=OVERLAP_THRESHOLD,
            nuclei_centroids=nuclei_centroids_dict,
            max_total_objects=MAX_TOTAL_OBJECTS,
            **ml_params
        )
        
        # Unpack outputs (ML methods don't return threshold_output)
        second_obj_masks, cell_second_obj_table, updated_cytoplasms = result
        threshold_output = None
        
    elif SECOND_OBJ_METHOD == "threshold":
        # Traditional thresholding-based segmentation
        from lib.phenotype.segment_secondary_object import (
            segment_second_objs,
            create_second_obj_boundary_visualization,
            create_second_obj_standard_visualization,
        )
        
        # Segment secondary objects with threshold method
        result = segment_second_objs(
            image=aligned_image,
            second_obj_channel_index=SECOND_OBJ_CHANNEL_INDEX,
            cell_masks=cells,
            cytoplasm_masks=cytoplasms,
            second_obj_min_size=SECOND_OBJ_MIN_SIZE,
            second_obj_max_size=SECOND_OBJ_MAX_SIZE,
            size_filter_method=SIZE_FILTER_METHOD,
            threshold_smoothing_scale=THRESHOLD_SMOOTHING_SCALE,
            threshold_method=THRESHOLD_METHOD,
            use_morphological_opening=USE_MORPHOLOGICAL_OPENING,
            opening_disk_radius=OPENING_DISK_RADIUS,
            fill_holes=FILL_HOLES,
            declump_method=DECLUMP_METHOD,
            declump_mode=DECLUMP_MODE,
            suppress_local_maxima=SUPPRESS_LOCAL_MAXIMA,
            maxima_reduction_factor=MAXIMA_REDUCTION_FACTOR,
            use_shape_refinement=USE_SHAPE_REFINEMENT,
            proportion_threshold=PROPORTION_THRESHOLD,
            max_objects_per_cell=MAX_OBJECTS_PER_CELL,
            overlap_threshold=OVERLAP_THRESHOLD,
            nuclei_centroids=nuclei_centroids_dict,
            max_total_objects=MAX_TOTAL_OBJECTS,
            return_threshold_output=RETURN_INTERMEDIATE_OUTPUTS,
        )
        
        # Unpack outputs (threshold method may return threshold_output)
        second_obj_masks, cell_second_obj_table, updated_cytoplasms, *opt = result
        threshold_output = opt[0] if opt else None
    
    else:
        raise ValueError(f"Unknown SECOND_OBJ_METHOD: {SECOND_OBJ_METHOD}. Use 'threshold', 'cellpose', or 'stardist'")

    cell_summary = cell_second_obj_table["cell_summary"]

    # Print statistics
    print(f"Found secondary objects in {cell_summary['has_second_obj'].sum()} out of {len(cell_summary)} cells")
    print(f"Average objects per cell with objects: {cell_summary.loc[cell_summary['has_second_obj'], 'num_second_objs'].mean():.2f}")
    print(f"Average secondary object area ratio: {cell_summary['second_obj_area_ratio'].mean():.4f}")

    # Create standard visualizations
    print("Example microplots:")
    panel = create_second_obj_standard_visualization(
        aligned_image,
        SECOND_OBJ_CHANNEL_INDEX,
        SECOND_OBJ_CHANNEL,
        second_obj_masks,
        threshold_output=threshold_output,
    )
    plt.show()

    # Create enhanced boundary visualization
    print("Enhanced visualization with cell boundaries and secondary object boundaries:")
    boundary_panel = create_second_obj_boundary_visualization(
        aligned_image,
        SECOND_OBJ_CHANNEL_INDEX,
        cell_masks=cells,
        second_obj_masks=second_obj_masks,
        channel_names=CHANNEL_NAMES,
        channel_cmaps=CHANNEL_CMAPS,
    )
    plt.show()

In [None]:
if SECOND_OBJ_DETECTION:
    # Extract phenotype features for secondary objects
    from lib.phenotype.extract_phenotype_second_objs import extract_phenotype_second_objs

    second_obj_phenotype = extract_phenotype_second_objs(
        aligned_image,
        second_objs=second_obj_masks,
        second_obj_cell_mapping_df=cell_second_obj_table['second_obj_cell_mapping'],
        wildcards=WILDCARDS,
        foci_channel=FOCI_CHANNEL_INDEX, 
        channel_names=CHANNEL_NAMES
    )

    average_diameter = second_obj_phenotype['second_obj_diameter'].mean()
    print(f"Average diameter of secondary objects: {average_diameter}")
    average_area = second_obj_phenotype['second_obj_area'].mean()
    print(f"Average area of secondary objects: {average_area}")
    

In [None]:
if SECOND_OBJ_DETECTION:
    # Display feature list
    second_obj_feature_cols = [
        col for col in second_obj_phenotype.columns 
        if col not in ["label", "well", "tile", "cell_label"]
    ]
    print(f"\nNumber of secondary object features: {len(second_obj_feature_cols)}")

    # Apply the function to remove channel names
    second_obj_feature_types = [
        remove_channel_name(feature, CHANNEL_NAMES) 
        for feature in second_obj_feature_cols
    ]

    # Get unique feature types
    second_obj_unique_types = sorted(set(second_obj_feature_types))

    print("Unique secondary object feature types:")
    display(second_obj_unique_types)

## Add phenotype process parameters to config file

In [None]:
# Add phenotype section
config["phenotype"] = {
    "foci_channel_index": FOCI_CHANNEL_INDEX,
    "channel_names": CHANNEL_NAMES,
    "align": ALIGN,
    "dapi_index": DAPI_INDEX,
    "cyto_index": CYTO_INDEX,
    "segmentation_method": SEGMENTATION_METHOD,
    "reconcile": RECONCILE,
    "gpu": GPU,
    "cp_method": CP_METHOD,
}

# Add method-specific parameters based on segmentation method
if SEGMENTATION_METHOD == "cellpose":
    config["phenotype"].update({
        "nuclei_diameter": NUCLEI_DIAMETER,
        "cell_diameter": CELL_DIAMETER,
        "nuclei_flow_threshold": NUCLEI_FLOW_THRESHOLD,
        "nuclei_cellprob_threshold": NUCLEI_CELLPROB_THRESHOLD,
        "cell_flow_threshold": CELL_FLOW_THRESHOLD,
        "cell_cellprob_threshold": CELL_CELLPROB_THRESHOLD,
        "cellpose_model": CELLPOSE_MODEL,
    })
    # Add helper_index only if it's defined
    if HELPER_INDEX is not None:
        config["phenotype"]["helper_index"] = HELPER_INDEX
elif SEGMENTATION_METHOD == "stardist":
    config["phenotype"].update({
        "stardist_model": STARDIST_MODEL,
        "nuclei_prob_threshold": NUCLEI_PROB_THRESHOLD,
        "nuclei_nms_threshold": NUCLEI_NMS_THRESHOLD,
        "cell_prob_threshold": CELL_PROB_THRESHOLD,
        "cell_nms_threshold": CELL_NMS_THRESHOLD,
    })

# Add alignment parameters if defined
if ALIGN:  
    config["phenotype"]["target"] = TARGET_INDEX
    config["phenotype"]["source"] = SOURCE_INDEX
    config["phenotype"]["riders"] = RIDER_INDEXES
    config["phenotype"]["remove_channel"] = REMOVE_CHANNEL
    config["phenotype"]["upsample_factor"] = UPSAMPLE_FACTOR
    config["phenotype"]["window"] = WINDOW

# Add secondary object detection parameters
if SECOND_OBJ_DETECTION:
    # Determine if using ML-based segmentation method
    use_ml_segmentation = SECOND_OBJ_METHOD in ["cellpose", "stardist"]
    # Common parameters for all methods
    config["phenotype"].update({
        "second_obj_detection": SECOND_OBJ_DETECTION,
        "second_obj_channel_index": SECOND_OBJ_CHANNEL_INDEX,
        "second_obj_method": SECOND_OBJ_METHOD,
        "use_ml_segmentation": use_ml_segmentation,
        "second_obj_min_size": SECOND_OBJ_MIN_SIZE,
        "second_obj_max_size": SECOND_OBJ_MAX_SIZE,
        "size_filter_method": SIZE_FILTER_METHOD,
        "max_objects_per_cell": MAX_OBJECTS_PER_CELL,
        "overlap_threshold": OVERLAP_THRESHOLD,
        "max_total_objects": MAX_TOTAL_OBJECTS,
    })
    
    # Add method-specific parameters
    if SECOND_OBJ_METHOD == "cellpose":
        config["phenotype"].update({
            "second_obj_cellpose_model": SECOND_OBJ_CELLPOSE_MODEL,
            "second_obj_diameter": SECOND_OBJ_DIAMETER,
            "second_obj_flow_threshold": SECOND_OBJ_FLOW_THRESHOLD,
            "second_obj_cellprob_threshold": SECOND_OBJ_CELLPROB_THRESHOLD,
        })
    elif SECOND_OBJ_METHOD == "stardist":
        config["phenotype"].update({
            "second_obj_stardist_model": SECOND_OBJ_STARDIST_MODEL,
            "second_obj_prob_threshold": SECOND_OBJ_PROB_THRESHOLD,
            "second_obj_nms_threshold": SECOND_OBJ_NMS_THRESHOLD,
        })
    elif SECOND_OBJ_METHOD == "threshold":
        config["phenotype"].update({
            "threshold_smoothing_scale": THRESHOLD_SMOOTHING_SCALE,
            "threshold_method": THRESHOLD_METHOD,
            "use_morphological_opening": USE_MORPHOLOGICAL_OPENING,
            "opening_disk_radius": OPENING_DISK_RADIUS,
            "fill_holes": FILL_HOLES,
            "declump_method": DECLUMP_METHOD,
            "declump_mode": DECLUMP_MODE,
            "suppress_local_maxima": SUPPRESS_LOCAL_MAXIMA,
            "maxima_reduction_factor": MAXIMA_REDUCTION_FACTOR,
            "use_shape_refinement": USE_SHAPE_REFINEMENT,
            "proportion_threshold": PROPORTION_THRESHOLD,
        })

# Add custom channel offsets if defined
if CUSTOM_CHANNEL_OFFSETS:
    config["phenotype"]["custom_channel_offsets"] = CUSTOM_CHANNEL_OFFSETS_INDEXED

# Convert tuples to lists before dumping
safe_config = convert_tuples_to_lists(config)

# Write the updated configuration back with markdown-style comments
with open(CONFIG_FILE_PATH, "w") as config_file:
    # Write the introductory markdown-style comments
    config_file.write(CONFIG_FILE_HEADER)

    # Dump the updated YAML structure, keeping markdown comments for sections
    yaml.dump(safe_config, config_file, default_flow_style=False, sort_keys=False)