Manual mask names (#1126)

* mask_types arg and remove nuclear mask rows * docstring fix * Updated ez_seg notebook to include mask_types variable at cell table generation. * cell table test fix * why do we even still have these mibitiff functions * fix mask names in nb * aesthetics * Updated ez nb to match new example dataset. * Added updates to nb that weren't pushed last time. * updated tests for new ezseg changes --------- Co-authored-by: bryjc <bryjcannon@gmail.com> Co-authored-by: Sricharan Reddy Varra <srivarra@stanford.edu> Co-authored-by: Sricharan Reddy Varra <sricharanvarra@gmail.com>
angelolab · Apr 9, 2024 · a3c08d6 · a3c08d6
1 parent dac262e
commit a3c08d6
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 107 deletions.
diff --git a/src/ark/segmentation/marker_quantification.py b/src/ark/segmentation/marker_quantification.py
@@ -455,7 +455,7 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts
 def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
                         is_mibitiff=False, fovs=None,
                         extraction='total_intensity', nuclear_counts=False,
-                        fast_extraction=False, **kwargs):
+                        fast_extraction=False, mask_types=['whole_cell'], **kwargs):
     """This function takes the segmented data and computes the expression matrices batch-wise
     while also validating inputs
 
@@ -478,6 +478,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
             set to True, the compartments coordinate in segmentation_labels must contain 'nuclear'
         fast_extraction (bool):
             if set, skips the custom regionprops and expensive base regionprops extraction steps
+        mask_types (list):
+            list of masks to extract data for, defaults to ['whole_cell']
         **kwargs:
             arbitrary keyword arguments for signal and regionprops extraction
 
@@ -526,19 +528,6 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
                                                         img_sub_folder=img_sub_folder,
                                                         fovs=[fov_name])
 
-        # define the files for whole cell and nuclear
-        whole_cell_file = fov_name + '_whole_cell.tiff'
-        nuclear_file = fov_name + '_nuclear.tiff'
-
-        # for each label given in the argument, read in that mask for the fov, and proceed with
-        # label and table appending
-        mask_files = io_utils.list_files(segmentation_dir, substrs=fov_name)
-        mask_types = get_existing_mask_types(fov_names=fovs, mask_names=mask_files)
-
-        # remove nuclear from mask_types if nuclear_counts False
-        if not nuclear_counts and "nuclear" in mask_types:
-            mask_types.remove("nuclear")
-
         for mask_type in mask_types:
             # load the segmentation labels in
             fov_mask_name = fov_name + '_' + mask_type + ".tiff"
@@ -551,7 +540,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
             compartments = ['whole_cell']
             segmentation_labels = current_labels_cell.values
 
-            if nuclear_counts:
+            if nuclear_counts and mask_type == 'whole_cell':
+                nuclear_file = fov_name + '_nuclear.tiff'
                 current_labels_nuc = load_utils.load_imgs_from_dir(data_dir=segmentation_dir,
                                                                    files=[nuclear_file],
                                                                    xr_dim_name='compartments',

diff --git a/templates/ez_segmenter.ipynb b/templates/ez_segmenter.ipynb
@@ -287,12 +287,12 @@
    "outputs": [],
    "source": [
     "# What would you like to name your composite image\n",
-    "composite_name = \"amyloid\"\n",
+    "composite_name = \"microglia-composite\"\n",
     "\n",
     "# What channels would you like to add together?\n",
-    "to_add = [\"Amyloidbeta140\", \"Amyloidbeta142\", \"PanAmyloidbeta1724\"]\n",
+    "to_add = [\"Iba1\", \"CD45\"]\n",
     "# What channels would you like to subtract?\n",
-    "to_subtract = [\"HistoneH3Lyo\", \"Background\"]\n",
+    "to_subtract = [\"Background\"]\n",
     "\n",
     "# What image type do you want returned?\n",
     "image_type = \"signal\"\n",
@@ -366,7 +366,7 @@
    "outputs": [],
    "source": [
     "fov_name = \"fov0\"\n",
-    "composite_name = \"amyloid\"\n",
+    "composite_name = \"microglia-composite\"\n",
     "\n",
     "# Show test composite image\n",
     "ez_seg_display.display_channel_image(composite_dir, sub_folder_name, fov_name, composite_name, composite=True)"
@@ -451,22 +451,22 @@
    "outputs": [],
    "source": [
     "# channel params\n",
-    "channel_to_segment = \"astrocyte\"\n",
+    "channel_to_segment = \"microglia-composite\"\n",
     "channel_to_segment_path = composite_dir\n",
     "path_sub_folder_name = None\n",
     "\n",
     "# mask params\n",
-    "mask_name = \"astrocyte-arms\"\n",
+    "mask_name = \"microglia-projections\"\n",
     "object_shape = \"projection\"\n",
     "\n",
     "# blur/threshold params\n",
     "blur = 1\n",
-    "threshold = 99\n",
+    "threshold = 90\n",
     "hole_size = \"auto\"\n",
     "\n",
     "# fov params\n",
     "fov_size = 400\n",
-    "min_pixels = 100\n",
+    "min_pixels = 200\n",
     "max_pixels = 100000"
    ]
   },
@@ -529,11 +529,11 @@
    },
    "outputs": [],
    "source": [
-    "fov_name = \"fov1\"\n",
-    "channel_to_view = \"astrocyte\"\n",
+    "fov_name = \"fov0\"\n",
+    "channel_to_view = \"microglia-composite\"\n",
     "channel_to_view_dir = composite_dir\n",
     "path_sub_folder_name = None\n",
-    "mask_to_view = \"astrocyte-arms\"\n",
+    "mask_to_view = \"microglia-projections\"\n",
     "mask_to_view_dir = ez_masks_dir\n",
     "\n",
     "# Show test segmentation image\n",
@@ -571,8 +571,7 @@
     "* `cell_dir`: the final mask directory\n",
     "* `cell_mask_suffix`: Suffix name of the cell mask files. Usually \"whole_cell\"\n",
     "* `merged_masks_dir`: the directory to store the merged masks"
-   ],
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
@@ -584,8 +583,8 @@
    },
    "outputs": [],
    "source": [
-    "merge_masks_list = [\"microglia-arms\", \"astrocyte-arms\"]\n",
-    "percent_overlap = 30\n",
+    "merge_masks_list = [\"microglia-projections\"]\n",
+    "percent_overlap = 10\n",
     "expansion_factor = 10\n",
     "\n",
     "# Overwrite if different from above\n",
@@ -669,8 +668,8 @@
    },
    "outputs": [],
    "source": [
-    "fov_name = \"fov3\"\n",
-    "merge_mask_view = \"microglia-arms\"\n",
+    "fov_name = \"fov0\"\n",
+    "merge_mask_view = \"microglia-projections\"\n",
     "object_mask_dir = ez_masks_dir\n",
     "cell_mask_dir = cell_dir\n",
     "cell_mask_suffix = \"whole_cell\"\n",
@@ -705,7 +704,7 @@
    "outputs": [],
    "source": [
     "# Enter the names of masks you would like to include in the final mask directory, e.g. [\"merged\", \"final_cells_remaining\", \"plaques\", \"tangles\"].\n",
-    "mask_names = [\"merged\", \"final_cells_remaining\", \"plaques\"]\n",
+    "mask_names = [\"microglia-projections_merged\", \"final_whole_cell_remaining\", \"plaques\"]\n",
     "\n",
     "# Name of the final mask destination folder\n",
     "final_mask_dir = os.path.join(segmentation_dir, \"final_mask_dir\")\n",
@@ -772,7 +771,10 @@
     "table_name = \"cell_and_objects\"\n",
     "\n",
     "# set to True to add nuclear cell properties to the expression matrix\n",
-    "nuclear_counts = False"
+    "nuclear_counts = False\n",
+    "\n",
+    "# Enter the names of masks you would like to include in the final cell table if different than mask_names above, e.g. [\"astrocyte-arms_merged\", \"microglia-arms_merged\", \"final_cells_remaining\", \"amyloid-plaques\"].\n",
+    "mask_names = [\"microglia-projections_merged\", \"final_whole_cell_remaining\", \"plaques\"]"
    ]
   },
   {
@@ -806,6 +808,7 @@
     "    batch_size=5,\n",
     "    nuclear_counts=nuclear_counts,\n",
     "    fast_extraction=fast_extraction,\n",
+    "    mask_types=mask_names\n",
     ")"
    ]
   },
@@ -908,7 +911,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.11.8"
   }
  },
  "nbformat": 4,

diff --git a/tests/segmentation/marker_quantification_test.py b/tests/segmentation/marker_quantification_test.py
@@ -736,15 +736,15 @@ def test_generate_cell_table_tree_loading():
             nuclear_counts=True)
 
         # setting nuclear_counts True generates data for both whole_cell and nuclear
-        # so there should be double the number of rows
-        assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0] * 2
+        # so there should be double the number of columns, but not rows
+        assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0]
         assert norm_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
         misc_utils.verify_in_list(
             nuclear_col='nc_ratio',
             nuc_cell_table_cols=norm_data_nuc.columns.values
         )
 
-        assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0] * 2
+        assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0]
         assert arcsinh_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
         misc_utils.verify_in_list(
             nuclear_col='nc_ratio',
@@ -824,14 +824,14 @@ def test_generate_cell_table_mibitiff_loading():
 
         # setting nuclear_counts True generates data for both whole_cell and nuclear
         # so there should be double the number of rows
-        assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0] * 2
+        assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0]
         assert norm_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
         misc_utils.verify_in_list(
             nuclear_col='nc_ratio',
             nuc_cell_table_cols=norm_data_nuc.columns.values
         )
 
-        assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0] * 2
+        assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0]
         assert arcsinh_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
         misc_utils.verify_in_list(
             nuclear_col='nc_ratio',
@@ -879,15 +879,19 @@ def test_generate_cell_table_extractions():
             nuclear_counts=True
         )
 
-        # verify total intensity extraction, same for whole_cell and nuclear mask types
-        for mask_type in ["whole_cell", "nuclear"]:
-            assert np.all(
-                default_norm_data.loc[
-                    (default_norm_data[settings.CELL_LABEL] == 1) &
-                    (default_norm_data["mask_type"] == mask_type)
-                ][chans].values
-                == np.arange(9).reshape(3, 3)
-            )
+        # verify total intensity extraction
+        assert np.all(
+            default_norm_data.loc[
+                (default_norm_data[settings.CELL_LABEL] == 1) &
+                (default_norm_data["mask_type"] == "whole_cell")
+            ][chans].values
+            == np.arange(9).reshape(3, 3)
+        )
+        assert np.unique(default_norm_data.mask_type) == ["whole_cell"]
+
+        # check for nuclear extractions
+        nuc_columns = [col for col in default_norm_data.columns if '_nuclear' in col]
+        assert nuc_columns
 
         # define a specific threshold for positive pixel extraction
         thresh_kwargs = {
@@ -911,25 +915,9 @@ def test_generate_cell_table_extractions():
         assert np.all(positive_pixel_data_wc.iloc[:4][['chan0', 'chan1']].values == 0)
         assert np.all(positive_pixel_data_wc.iloc[4:][chans].values == 1)
 
-        # verify thresh kwarg passes through and nuclear counts True
-        positive_pixel_data, _ = marker_quantification.generate_cell_table(
-            segmentation_dir=temp_dir, tiff_dir=tiff_dir,
-            img_sub_folder=img_sub_folder, is_mibitiff=False,
-            extraction='positive_pixel', nuclear_counts=True, **thresh_kwargs
-        )
-
-        # check explicitly for nuclear mask types
-        positive_pixel_data_nuc = positive_pixel_data[
-            positive_pixel_data["mask_type"] == "nuclear"
-        ]
-        assert np.all(positive_pixel_data_nuc.iloc[:4][['chan0', 'chan1']].values == 0)
-        assert np.all(positive_pixel_data_nuc.iloc[4:][chans].values == 1)
-        assert positive_pixel_data_nuc.shape[0] == positive_pixel_data.shape[0] / 2
-        assert positive_pixel_data_nuc.shape[1] == positive_pixel_data.shape[1]
-        misc_utils.verify_in_list(
-            nuclear_col='nc_ratio',
-            nuc_cell_table_cols=positive_pixel_data_nuc.columns.values
-        )
+        # check that nuclear counts not extracted
+        nuc_columns = [col for col in positive_pixel_data_wc.columns if '_nuclear' in col]
+        assert not nuc_columns
 
 
 def test_get_existing_mask_types():