Skip to content

Commit

Permalink
Rename _feature_0 to _whole_cell and _feature_1 to _nuclear (#…
Browse files Browse the repository at this point in the history
…786)

* Initial commit of replacing _feature_0 with _whole_cell and _feature_1 with _nuclear

* Make sure dataset tests include _whole_cell and _nuclear

* Update distance matrix column to be dist_whole_cell

* Patch up marker quantification loading

* Ensure channels for stitch_images test are being passed in correctly

* Rename segmentation label saving to _whole_cell and _nuclear

* Define both the whole cell and nuclear suffix renaming choices for create_deepcell_output

* Change revision to match current PR on HuggingFace with new suffix names

* Patch up calc_dist_matrix suffix

* PYCODESTYLE in data_utils.py

* Patch up deepcell_utils

* Use new HuggingFace commit hash without hidden old files

* Change legacy example_dataset extensions to _whole_cell.tiff

* Make sure neighborhood mask uses _whole_cell.tiff

* Update comment to _whole_cell.tiff too

* Aesthetics

* Patch _feature_0 to _whole_cell to pass neighborhood mask generation test

* Change suffix in the README

* Refer to main branch for HuggingFace
  • Loading branch information
alex-l-kong authored Nov 11, 2022
1 parent f5d2c3d commit f25f809
Show file tree
Hide file tree
Showing 28 changed files with 144 additions and 113 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,11 @@ segmentation/cell_table/
**Deepcell Output:** This compartment stores the segmentation images after running deepcell.
```sh
segmentation/deepcell_output/
├── fov0_feature_0.tiff
├── fov0_feature_1.tiff
├── fov0_whole_cell.tiff
├── fov0_nuclear.tiff
├── ...
├── fov10_feature_0.tiff
└── fov10_feature_1.tiff
├── fov10_whole_cell.tiff
└── fov10_nuclear.tiff
```

**Example Pixel Output:** This compartment stores feather files, csvs and pixel masks generated by pixel clustering.
Expand Down
4 changes: 2 additions & 2 deletions ark/analysis/spatial_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def generate_channel_spatial_enrichment_stats(label_dir, dist_mat_dir, marker_thresholds, all_data,
suffix='_feature_0',
suffix='_whole_cell',
xr_channel_name='segmentation_label', **kwargs):
"""Wrapper function for batching calls to `calculate_channel_spatial_enrichment` over fovs
Expand Down Expand Up @@ -234,7 +234,7 @@ def calculate_channel_spatial_enrichment(fov, dist_matrix, marker_thresholds, al


def generate_cluster_spatial_enrichment_stats(label_dir, dist_mat_dir, all_data,
suffix='_feature_0',
suffix='_whole_cell',
xr_channel_name='segmentation_label', **kwargs):
""" Wrapper function for batching calls to `calculate_cluster_spatial_enrichment` over fovs
Expand Down
12 changes: 6 additions & 6 deletions ark/analysis/spatial_analysis_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def test_generate_channel_spatial_enrichment_stats():

with tempfile.TemporaryDirectory() as label_dir, tempfile.TemporaryDirectory() as dist_mat_dir:
test_utils._write_labels(label_dir, ["fov8", "fov9"], ["segmentation_label"], (10, 10),
'', True, np.uint8, suffix='_feature_0')
'', True, np.uint8, suffix='_whole_cell')

spatial_analysis_utils.calc_dist_matrix(label_dir, dist_mat_dir)
label_maps = load_utils.load_imgs_from_dir(label_dir, trim_suffix="_feature_0",
label_maps = load_utils.load_imgs_from_dir(label_dir, trim_suffix="_whole_cell",
xr_channel_names=["segmentation_label"])
all_data = test_utils.spoof_cell_table_from_labels(label_maps)

Expand Down Expand Up @@ -76,10 +76,10 @@ def test_generate_cluster_spatial_enrichment_stats():
# only the number of elements returned and the included_fovs argument needs testing
with tempfile.TemporaryDirectory() as label_dir, tempfile.TemporaryDirectory() as dist_mat_dir:
test_utils._write_labels(label_dir, ["fov8", "fov9"], ["segmentation_label"], (10, 10),
'', True, np.uint8, suffix='_feature_0')
'', True, np.uint8, suffix='_whole_cell')

spatial_analysis_utils.calc_dist_matrix(label_dir, dist_mat_dir)
label_maps = load_utils.load_imgs_from_dir(label_dir, trim_suffix="_feature_0",
label_maps = load_utils.load_imgs_from_dir(label_dir, trim_suffix="_whole_cell",
xr_channel_names=["segmentation_label"])
all_data = test_utils.spoof_cell_table_from_labels(label_maps)

Expand Down Expand Up @@ -341,13 +341,13 @@ def test_calculate_cluster_spatial_enrichment():
_, _ = \
spatial_analysis.calculate_cluster_spatial_enrichment(
'fov8', all_data_hack, dist_mat_hack['fov8'],
bootstrap_num=dist_lim, dist_lim=dist_lim, distance_cols=['dist_feature_0']
bootstrap_num=dist_lim, dist_lim=dist_lim, distance_cols=['dist_whole_cell']
)

_, _ = \
spatial_analysis.calculate_cluster_spatial_enrichment(
'fov9', all_data_hack, dist_mat_hack['fov9'],
bootstrap_num=dist_lim, dist_lim=dist_lim, distance_cols=['dist_feature_0']
bootstrap_num=dist_lim, dist_lim=dist_lim, distance_cols=['dist_whole_cell']
)

# error checking
Expand Down
4 changes: 2 additions & 2 deletions ark/phenotyping/pixel_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def filter_with_nuclear_mask(fovs, tiff_dir, seg_dir, channel,
fovs=[fov], channels=[channel]).values[0, :, :, 0]

# load the segmented image in
seg_img = imread(os.path.join(seg_dir, fov + '_feature_1.tiff'))[0, ...]
seg_img = imread(os.path.join(seg_dir, fov + '_nuclear.tiff'))[0, ...]

# mask out the nucleus
if exclude:
Expand Down Expand Up @@ -538,7 +538,7 @@ def preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix


def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
img_sub_folder="TIFs", seg_suffix='_feature_0.tiff',
img_sub_folder="TIFs", seg_suffix='_whole_cell.tiff',
pixel_cluster_prefix='pixel_cluster_prefix',
pixel_output_dir='pixel_output_dir',
data_dir='pixel_mat_data',
Expand Down
8 changes: 4 additions & 4 deletions ark/phenotyping/pixel_cluster_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ def test_filter_with_nuclear_mask(sub_dir, exclude, capsys):
nuclear_coords[fov] = (nuclear_x, nuclear_y)

# save the nuclear segmetation
file_name = fov + "_feature_1.tiff"
file_name = fov + "_nuclear.tiff"
io.imsave(os.path.join(seg_dir, file_name), rand_img,
check_contrast=False)

Expand Down Expand Up @@ -781,7 +781,7 @@ def test_preprocess_fov(mocker):
# create sample segmentation data
for fov in ['fov0', 'fov1']:
rand_img = np.random.randint(0, 16, size=(10, 10))
file_name = fov + "_feature_0.tiff"
file_name = fov + "_whole_cell.tiff"
io.imsave(os.path.join(seg_dir, file_name), rand_img,
check_contrast=False)

Expand All @@ -795,7 +795,7 @@ def test_preprocess_fov(mocker):
# NOTE: don't test the return value, leave that for test_create_pixel_matrix
pixel_cluster_utils.preprocess_fov(
temp_dir, tiff_dir, 'pixel_mat_data', 'pixel_mat_subsetted',
seg_dir, '_feature_0.tiff', 'TIFs', False, ['chan0', 'chan1', 'chan2'],
seg_dir, '_whole_cell.tiff', 'TIFs', False, ['chan0', 'chan1', 'chan2'],
2, 0.1, 1, 42, channel_norm_df, 'fov0'
)

Expand Down Expand Up @@ -902,7 +902,7 @@ def test_create_pixel_matrix_base(fovs, chans, sub_dir, seg_dir_include,
# create sample segmentation data
for fov in fovs:
rand_img = np.random.randint(0, 16, size=(10, 10))
file_name = fov + "_feature_0.tiff"
file_name = fov + "_whole_cell.tiff"
io.imsave(os.path.join(seg_dir, file_name), rand_img,
check_contrast=False)
# otherwise, set seg_dir to None
Expand Down
4 changes: 2 additions & 2 deletions ark/phenotyping/post_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ def create_mantis_project(cell_table, fovs, seg_dir, pop_col, mask_dir, image_di

# label and save the cell mask for each FOV
for fov in fovs:
whole_cell_file = [fov + '_feature_0.tiff' for fov in fovs]
whole_cell_file = [fov + '_whole_cell.tiff' for fov in fovs]

# load the segmentation labels in for the FOV
label_map = load_utils.load_imgs_from_dir(
data_dir=seg_dir, files=whole_cell_file, xr_dim_name='compartments',
xr_channel_names=['whole_cell'], trim_suffix='_feature_0'
xr_channel_names=['whole_cell'], trim_suffix='_whole_cell'
).loc[fov, ...]

# use label_cells_by_cluster to create cell masks
Expand Down
4 changes: 2 additions & 2 deletions ark/phenotyping/post_cluster_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_create_mantis_project(tmp_path):
# create random segmentation masks
for fov in fovs:
data = np.random.randint(0, 5, 100).reshape(10, 10)
io.imsave(os.path.join(seg_dir, fov + '_feature_0.tiff'), data, check_contrast=False)
io.imsave(os.path.join(seg_dir, fov + '_whole_cell.tiff'), data, check_contrast=False)

# create cell table with two clusters
cell_label = np.tile(np.arange(1, 5), len(fovs))
Expand All @@ -81,5 +81,5 @@ def test_create_mantis_project(tmp_path):
assert set(np.unique(mask)) == set([0, 1, 2])

# mask should be non-zero in the same places as original
seg = io.imread(os.path.join(seg_dir, fov + '_feature_0.tiff'))
seg = io.imread(os.path.join(seg_dir, fov + '_whole_cell.tiff'))
assert np.array_equal(mask > 0, seg > 0)
8 changes: 4 additions & 4 deletions ark/segmentation/marker_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,15 +497,15 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
fovs=[fov_name])

# define the files for whole cell and nuclear
whole_cell_file = fov_name + '_feature_0.tiff'
nuclear_file = fov_name + '_feature_1.tiff'
whole_cell_file = fov_name + '_whole_cell.tiff'
nuclear_file = fov_name + '_nuclear.tiff'

# load the segmentation labels in
current_labels_cell = load_utils.load_imgs_from_dir(data_dir=segmentation_dir,
files=[whole_cell_file],
xr_dim_name='compartments',
xr_channel_names=['whole_cell'],
trim_suffix='_feature_0')
trim_suffix='_whole_cell')

compartments = ['whole_cell']
segmentation_labels = current_labels_cell.values
Expand All @@ -515,7 +515,7 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
files=[nuclear_file],
xr_dim_name='compartments',
xr_channel_names=['nuclear'],
trim_suffix='_feature_1')
trim_suffix='_nuclear')
compartments = ['whole_cell', 'nuclear']
segmentation_labels = np.concatenate((current_labels_cell.values,
current_labels_nuc.values), axis=-1)
Expand Down
18 changes: 9 additions & 9 deletions ark/segmentation/marker_quantification_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,20 +613,20 @@ def test_generate_cell_table_tree_loading():
for fov in range(cell_masks_40.shape[0]):
fov_whole_cell = cell_masks_40[fov, :, :, 0]
fov_nuclear = cell_masks_40[fov, :, :, 1]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov),
io.imsave(os.path.join(temp_dir, 'fov%d_whole_cell.tiff' % fov),
fov_whole_cell,
check_contrast=False)
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % fov),
io.imsave(os.path.join(temp_dir, 'fov%d_nuclear.tiff' % fov),
fov_nuclear,
check_contrast=False)

for fov in range(cell_masks_20.shape[0]):
fov_whole_cell = cell_masks_20[fov, :, :, 0]
fov_nuclear = cell_masks_20[fov, :, :, 1]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % (fov + fov_size_split)),
io.imsave(os.path.join(temp_dir, 'fov%d_whole_cell.tiff' % (fov + fov_size_split)),
fov_whole_cell,
check_contrast=False)
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % (fov + fov_size_split)),
io.imsave(os.path.join(temp_dir, 'fov%d_nuclear.tiff' % (fov + fov_size_split)),
fov_nuclear,
check_contrast=False)

Expand Down Expand Up @@ -682,7 +682,7 @@ def test_generate_cell_table_tree_loading():


# TODO: consider removing since MIBItiffs are being phased out
def test_generate_cell_table_loading():
def test_generate_cell_table_mibitiff_loading():
# is_mibitiff True case, load from mibitiff file structure
with tempfile.TemporaryDirectory() as temp_dir:
# define 3 fovs and 2 mibitiff_imgs
Expand Down Expand Up @@ -718,9 +718,9 @@ def test_generate_cell_table_loading():
for fov in range(cell_masks.shape[0]):
fov_whole_cell = cell_masks[fov, :, :, 0]
fov_nuclear = cell_masks[fov, :, :, 1]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov), fov_whole_cell,
io.imsave(os.path.join(temp_dir, 'fov%d_whole_cell.tiff' % fov), fov_whole_cell,
check_contrast=False)
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % fov), fov_nuclear,
io.imsave(os.path.join(temp_dir, 'fov%d_nuclear.tiff' % fov), fov_nuclear,
check_contrast=False)

# generate sample norm and arcsinh data for all fovs
Expand Down Expand Up @@ -797,9 +797,9 @@ def test_generate_cell_table_extractions():
for fov in range(cell_masks.shape[0]):
fov_whole_cell = cell_masks[fov, :, :, 0]
fov_nuclear = cell_masks[fov, :, :, 1]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov), fov_whole_cell,
io.imsave(os.path.join(temp_dir, 'fov%d_whole_cell.tiff' % fov), fov_whole_cell,
check_contrast=False)
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % fov), fov_nuclear,
io.imsave(os.path.join(temp_dir, 'fov%d_nuclear.tiff' % fov), fov_nuclear,
check_contrast=False)

default_norm_data, _ = marker_quantification.generate_cell_table(
Expand Down
17 changes: 9 additions & 8 deletions ark/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ def label_cells_by_cluster(fov, all_data, label_map, fov_col=settings.FOV_ID,


def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
cell_cluster_col='cell_meta_cluster', seg_suffix='_feature_0.tiff'):
cell_cluster_col='cell_meta_cluster',
seg_suffix='_whole_cell.tiff'):
"""For a fov, create a mask labeling each cell with their SOM or meta cluster label
Args:
Expand All @@ -152,7 +153,7 @@ def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
Whether to assign SOM or meta clusters.
Needs to be `'cell_som_cluster'` or `'cell_meta_cluster'`
seg_suffix (str):
The suffix that the segmentation images use. Defaults to `'_feature_0.tiff'`.
The suffix that the segmentation images use. Defaults to `'_whole_cell.tiff'`.
Returns:
numpy.ndarray:
Expand Down Expand Up @@ -199,7 +200,7 @@ def generate_and_save_cell_cluster_masks(fovs: List[str],
seg_dir: Union[pathlib.Path, str],
cell_data_name: Union[pathlib.Path, str],
cell_cluster_col: str = 'cell_meta_cluster',
seg_suffix: str = '_feature_0.tiff',
seg_suffix: str = '_whole_cell.tiff',
sub_dir: str = None,
name_suffix: str = ''):
"""Generates cell cluster masks and saves them for downstream analysis.
Expand All @@ -219,7 +220,7 @@ def generate_and_save_cell_cluster_masks(fovs: List[str],
Whether to assign SOM or meta clusters. Needs to be `'cell_som_cluster'` or
`'cell_meta_cluster'`. Defaults to `'cell_meta_cluster'`.
seg_suffix (str, optional):
The suffix that the segmentation images use. Defaults to `'_feature_0.tiff'`.
The suffix that the segmentation images use. Defaults to `'_whole_cell.tiff'`.
sub_dir (str, optional):
The subdirectory to save the images in. If specified images are saved to
`"data_dir/sub_dir"`. If `sub_dir = None` the images are saved to `"data_dir"`.
Expand Down Expand Up @@ -381,7 +382,7 @@ def generate_and_save_neighborhood_cluster_masks(fovs: List[str],
save_dir: Union[pathlib.Path, str],
neighborhood_data: pd.DataFrame,
seg_dir: str,
seg_suffix: str = '_feature_0.tiff',
seg_suffix: str = '_whole_cell.tiff',
xr_channel_name='segmentation_label',
sub_dir: str = None,
name_suffix: str = ''):
Expand All @@ -397,7 +398,7 @@ def generate_and_save_neighborhood_cluster_masks(fovs: List[str],
seg_dir (str):
The path to the segmentation data.
seg_suffix (str):
The suffix that the segmentation images use. Defaults to `'_feature_0.tiff'`.
The suffix that the segmentation images use. Defaults to `'_whole_cell.tiff'`.
xr_channel_name (str):
Channel name for segmented data array.
sub_dir (str, optional):
Expand Down Expand Up @@ -603,8 +604,8 @@ def stitch_images_by_shape(data_dir, stitched_dir, img_sub_folder=None, channels

# retrieve valid fov names
if segmentation:
fovs = ns.natsorted(io_utils.list_files(data_dir, substrs='_feature_0.tiff'))
fovs = io_utils.extract_delimited_names(fovs, delimiter='_feature_0.tiff')
fovs = ns.natsorted(io_utils.list_files(data_dir, substrs='_whole_cell.tiff'))
fovs = io_utils.extract_delimited_names(fovs, delimiter='_whole_cell.tiff')
elif clustering:
fovs = ns.natsorted(io_utils.list_files(data_dir, substrs=f'_{clustering}_mask.tiff'))
fovs = io_utils.extract_delimited_names(fovs, delimiter=f'_{clustering}_mask.tiff')
Expand Down
10 changes: 5 additions & 5 deletions ark/utils/data_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_generate_cell_cluster_mask():

# generate a sample segmentation mask
cell_mask = np.random.randint(low=0, high=5, size=(40, 40), dtype="int16")
io.imsave(os.path.join(temp_dir, '%s_feature_0.tiff' % fov), cell_mask,
io.imsave(os.path.join(temp_dir, '%s_whole_cell.tiff' % fov), cell_mask,
check_contrast=False)

# bad consensus path passed
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_generate_and_save_cell_cluster_masks(sub_dir, name_suffix):
fov_index = fov if fov < fov_size_split else fov_size_split - fov
fov_mask = cell_masks_40 if fov < fov_size_split else cell_masks_20
fov_whole_cell = fov_mask[fov_index, :, :, 0]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov), fov_whole_cell,
io.imsave(os.path.join(temp_dir, 'fov%d_whole_cell.tiff' % fov), fov_whole_cell,
check_contrast=False)

# create a sample cell consensus file based on SOM cluster assignments
Expand Down Expand Up @@ -287,7 +287,7 @@ def test_generate_and_save_cell_cluster_masks(sub_dir, name_suffix):
seg_dir=temp_dir,
cell_data_name='cluster_consensus_som.feather',
cell_cluster_col='cell_som_cluster',
seg_suffix='_feature_0.tiff',
seg_suffix='_whole_cell.tiff',
sub_dir=sub_dir,
name_suffix=name_suffix
)
Expand Down Expand Up @@ -483,7 +483,7 @@ def test_generate_and_save_neighborhood_cluster_masks(sub_dir, name_suffix):

for fov in fovs:
io.imsave(
os.path.join(temp_dir, 'seg_dir', fov + '_feature_0.tiff'),
os.path.join(temp_dir, 'seg_dir', fov + '_whole_cell.tiff'),
sample_label_maps.loc[fov, ...].values
)

Expand Down Expand Up @@ -731,7 +731,7 @@ def test_stitch_images_by_shape(segmentation, clustering, subdir, fovs):
os.makedirs(data_dir)

if segmentation:
chans = ['feature_0', 'feature_1']
chans = ['nuclear', 'whole_cell']
elif clustering:
chans = [clustering + '_mask']
else:
Expand Down
Loading

0 comments on commit f25f809

Please sign in to comment.