Skip to content

Commit

Permalink
Manual mask names (#1126)
Browse files Browse the repository at this point in the history
* mask_types arg and remove nuclear mask rows

* docstring fix

* Updated ez_seg notebook to include mask_types variable at cell table generation.

* cell table test fix

* why do we even still have these mibitiff functions

* fix mask names in nb

* aesthetics

* Updated ez nb to match new example dataset.

* Added updates to nb that weren't pushed last time.

* updated tests for new ezseg changes

---------

Co-authored-by: bryjc <bryjcannon@gmail.com>
Co-authored-by: Sricharan Reddy Varra <srivarra@stanford.edu>
Co-authored-by: Sricharan Reddy Varra <sricharanvarra@gmail.com>
  • Loading branch information
4 people committed Apr 9, 2024
1 parent dac262e commit a3c08d6
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 107 deletions.
20 changes: 5 additions & 15 deletions src/ark/segmentation/marker_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts
def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
is_mibitiff=False, fovs=None,
extraction='total_intensity', nuclear_counts=False,
fast_extraction=False, **kwargs):
fast_extraction=False, mask_types=['whole_cell'], **kwargs):
"""This function takes the segmented data and computes the expression matrices batch-wise
while also validating inputs
Expand All @@ -478,6 +478,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
set to True, the compartments coordinate in segmentation_labels must contain 'nuclear'
fast_extraction (bool):
if set, skips the custom regionprops and expensive base regionprops extraction steps
mask_types (list):
list of masks to extract data for, defaults to ['whole_cell']
**kwargs:
arbitrary keyword arguments for signal and regionprops extraction
Expand Down Expand Up @@ -526,19 +528,6 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
img_sub_folder=img_sub_folder,
fovs=[fov_name])

# define the files for whole cell and nuclear
whole_cell_file = fov_name + '_whole_cell.tiff'
nuclear_file = fov_name + '_nuclear.tiff'

# for each label given in the argument, read in that mask for the fov, and proceed with
# label and table appending
mask_files = io_utils.list_files(segmentation_dir, substrs=fov_name)
mask_types = get_existing_mask_types(fov_names=fovs, mask_names=mask_files)

# remove nuclear from mask_types if nuclear_counts False
if not nuclear_counts and "nuclear" in mask_types:
mask_types.remove("nuclear")

for mask_type in mask_types:
# load the segmentation labels in
fov_mask_name = fov_name + '_' + mask_type + ".tiff"
Expand All @@ -551,7 +540,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
compartments = ['whole_cell']
segmentation_labels = current_labels_cell.values

if nuclear_counts:
if nuclear_counts and mask_type == 'whole_cell':
nuclear_file = fov_name + '_nuclear.tiff'
current_labels_nuc = load_utils.load_imgs_from_dir(data_dir=segmentation_dir,
files=[nuclear_file],
xr_dim_name='compartments',
Expand Down
43 changes: 23 additions & 20 deletions templates/ez_segmenter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@
"outputs": [],
"source": [
"# What would you like to name your composite image\n",
"composite_name = \"amyloid\"\n",
"composite_name = \"microglia-composite\"\n",
"\n",
"# What channels would you like to add together?\n",
"to_add = [\"Amyloidbeta140\", \"Amyloidbeta142\", \"PanAmyloidbeta1724\"]\n",
"to_add = [\"Iba1\", \"CD45\"]\n",
"# What channels would you like to subtract?\n",
"to_subtract = [\"HistoneH3Lyo\", \"Background\"]\n",
"to_subtract = [\"Background\"]\n",
"\n",
"# What image type do you want returned?\n",
"image_type = \"signal\"\n",
Expand Down Expand Up @@ -366,7 +366,7 @@
"outputs": [],
"source": [
"fov_name = \"fov0\"\n",
"composite_name = \"amyloid\"\n",
"composite_name = \"microglia-composite\"\n",
"\n",
"# Show test composite image\n",
"ez_seg_display.display_channel_image(composite_dir, sub_folder_name, fov_name, composite_name, composite=True)"
Expand Down Expand Up @@ -451,22 +451,22 @@
"outputs": [],
"source": [
"# channel params\n",
"channel_to_segment = \"astrocyte\"\n",
"channel_to_segment = \"microglia-composite\"\n",
"channel_to_segment_path = composite_dir\n",
"path_sub_folder_name = None\n",
"\n",
"# mask params\n",
"mask_name = \"astrocyte-arms\"\n",
"mask_name = \"microglia-projections\"\n",
"object_shape = \"projection\"\n",
"\n",
"# blur/threshold params\n",
"blur = 1\n",
"threshold = 99\n",
"threshold = 90\n",
"hole_size = \"auto\"\n",
"\n",
"# fov params\n",
"fov_size = 400\n",
"min_pixels = 100\n",
"min_pixels = 200\n",
"max_pixels = 100000"
]
},
Expand Down Expand Up @@ -529,11 +529,11 @@
},
"outputs": [],
"source": [
"fov_name = \"fov1\"\n",
"channel_to_view = \"astrocyte\"\n",
"fov_name = \"fov0\"\n",
"channel_to_view = \"microglia-composite\"\n",
"channel_to_view_dir = composite_dir\n",
"path_sub_folder_name = None\n",
"mask_to_view = \"astrocyte-arms\"\n",
"mask_to_view = \"microglia-projections\"\n",
"mask_to_view_dir = ez_masks_dir\n",
"\n",
"# Show test segmentation image\n",
Expand Down Expand Up @@ -571,8 +571,7 @@
"* `cell_dir`: the final mask directory\n",
"* `cell_mask_suffix`: Suffix name of the cell mask files. Usually \"whole_cell\"\n",
"* `merged_masks_dir`: the directory to store the merged masks"
],
"outputs": []
]
},
{
"cell_type": "code",
Expand All @@ -584,8 +583,8 @@
},
"outputs": [],
"source": [
"merge_masks_list = [\"microglia-arms\", \"astrocyte-arms\"]\n",
"percent_overlap = 30\n",
"merge_masks_list = [\"microglia-projections\"]\n",
"percent_overlap = 10\n",
"expansion_factor = 10\n",
"\n",
"# Overwrite if different from above\n",
Expand Down Expand Up @@ -669,8 +668,8 @@
},
"outputs": [],
"source": [
"fov_name = \"fov3\"\n",
"merge_mask_view = \"microglia-arms\"\n",
"fov_name = \"fov0\"\n",
"merge_mask_view = \"microglia-projections\"\n",
"object_mask_dir = ez_masks_dir\n",
"cell_mask_dir = cell_dir\n",
"cell_mask_suffix = \"whole_cell\"\n",
Expand Down Expand Up @@ -705,7 +704,7 @@
"outputs": [],
"source": [
"# Enter the names of masks you would like to include in the final mask directory, e.g. [\"merged\", \"final_cells_remaining\", \"plaques\", \"tangles\"].\n",
"mask_names = [\"merged\", \"final_cells_remaining\", \"plaques\"]\n",
"mask_names = [\"microglia-projections_merged\", \"final_whole_cell_remaining\", \"plaques\"]\n",
"\n",
"# Name of the final mask destination folder\n",
"final_mask_dir = os.path.join(segmentation_dir, \"final_mask_dir\")\n",
Expand Down Expand Up @@ -772,7 +771,10 @@
"table_name = \"cell_and_objects\"\n",
"\n",
"# set to True to add nuclear cell properties to the expression matrix\n",
"nuclear_counts = False"
"nuclear_counts = False\n",
"\n",
"# Enter the names of masks you would like to include in the final cell table if different than mask_names above, e.g. [\"astrocyte-arms_merged\", \"microglia-arms_merged\", \"final_cells_remaining\", \"amyloid-plaques\"].\n",
"mask_names = [\"microglia-projections_merged\", \"final_whole_cell_remaining\", \"plaques\"]"
]
},
{
Expand Down Expand Up @@ -806,6 +808,7 @@
" batch_size=5,\n",
" nuclear_counts=nuclear_counts,\n",
" fast_extraction=fast_extraction,\n",
" mask_types=mask_names\n",
")"
]
},
Expand Down Expand Up @@ -908,7 +911,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.11.8"
}
},
"nbformat": 4,
Expand Down
54 changes: 21 additions & 33 deletions tests/segmentation/marker_quantification_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,15 +736,15 @@ def test_generate_cell_table_tree_loading():
nuclear_counts=True)

# setting nuclear_counts True generates data for both whole_cell and nuclear
# so there should be double the number of rows
assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0] * 2
# so there should be double the number of columns, but not rows
assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0]
assert norm_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
nuc_cell_table_cols=norm_data_nuc.columns.values
)

assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0] * 2
assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0]
assert arcsinh_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
Expand Down Expand Up @@ -824,14 +824,14 @@ def test_generate_cell_table_mibitiff_loading():

# setting nuclear_counts True generates data for both whole_cell and nuclear
# so there should be double the number of rows
assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0] * 2
assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0]
assert norm_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
nuc_cell_table_cols=norm_data_nuc.columns.values
)

assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0] * 2
assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0]
assert arcsinh_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
Expand Down Expand Up @@ -879,15 +879,19 @@ def test_generate_cell_table_extractions():
nuclear_counts=True
)

# verify total intensity extraction, same for whole_cell and nuclear mask types
for mask_type in ["whole_cell", "nuclear"]:
assert np.all(
default_norm_data.loc[
(default_norm_data[settings.CELL_LABEL] == 1) &
(default_norm_data["mask_type"] == mask_type)
][chans].values
== np.arange(9).reshape(3, 3)
)
# verify total intensity extraction
assert np.all(
default_norm_data.loc[
(default_norm_data[settings.CELL_LABEL] == 1) &
(default_norm_data["mask_type"] == "whole_cell")
][chans].values
== np.arange(9).reshape(3, 3)
)
assert np.unique(default_norm_data.mask_type) == ["whole_cell"]

# check for nuclear extractions
nuc_columns = [col for col in default_norm_data.columns if '_nuclear' in col]
assert nuc_columns

# define a specific threshold for positive pixel extraction
thresh_kwargs = {
Expand All @@ -911,25 +915,9 @@ def test_generate_cell_table_extractions():
assert np.all(positive_pixel_data_wc.iloc[:4][['chan0', 'chan1']].values == 0)
assert np.all(positive_pixel_data_wc.iloc[4:][chans].values == 1)

# verify thresh kwarg passes through and nuclear counts True
positive_pixel_data, _ = marker_quantification.generate_cell_table(
segmentation_dir=temp_dir, tiff_dir=tiff_dir,
img_sub_folder=img_sub_folder, is_mibitiff=False,
extraction='positive_pixel', nuclear_counts=True, **thresh_kwargs
)

# check explicitly for nuclear mask types
positive_pixel_data_nuc = positive_pixel_data[
positive_pixel_data["mask_type"] == "nuclear"
]
assert np.all(positive_pixel_data_nuc.iloc[:4][['chan0', 'chan1']].values == 0)
assert np.all(positive_pixel_data_nuc.iloc[4:][chans].values == 1)
assert positive_pixel_data_nuc.shape[0] == positive_pixel_data.shape[0] / 2
assert positive_pixel_data_nuc.shape[1] == positive_pixel_data.shape[1]
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
nuc_cell_table_cols=positive_pixel_data_nuc.columns.values
)
# check that nuclear counts not extracted
nuc_columns = [col for col in positive_pixel_data_wc.columns if '_nuclear' in col]
assert not nuc_columns


def test_get_existing_mask_types():
Expand Down
Loading

0 comments on commit a3c08d6

Please sign in to comment.