Skip to content

Commit

Permalink
Standardize usage of .tiff extension (#801)
Browse files Browse the repository at this point in the history
* Start conversion to .tiff extension

* Change revision hash to reflect new data download

* Update the HuggingFace hash

* Remove .tif extension in pixel clustering notebook

* Fix .tif to .tiff extensions in tests (part I)

* Fix .tif to .tiff extensions in tests (part II)

* Fix .tif to .tiff extensions in tests (part III)

* Documentation-related tiff fixes

* Simplify .tiff extension

* Clarify comment about DeepCell .tif saving

* Clarify that deepcell input files are now tiff

* Remove .tif support for load_imgs_from_dir and load_imgs_from_tree

* Remove MIBItiff .tif assumption

* Change .tif extension in example_dataset

* Remove old .tif channel files

* Delete repo example_dataset files no longer needed (thank you HuggingFace)

* Re-add mibitiff_inputs folder back (needed to test backwards compatibility for load_imgs_from_mibitiff)

* Change tiff_utils test to ensure input_data is no longer used

* Change HuggingFace revision back to main
  • Loading branch information
alex-l-kong authored Nov 2, 2022
1 parent ef2ab82 commit 0105298
Show file tree
Hide file tree
Showing 56 changed files with 146 additions and 19,528 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -276,11 +276,11 @@ segmentation/cell_table/
**Deepcell Output:** This compartment stores the segmentation images after running deepcell.
```sh
segmentation/deepcell_output/
├── fov0_feature_0.tif
├── fov0_feature_1.tif
├── fov0_feature_0.tiff
├── fov0_feature_1.tiff
├── ...
├── fov10_feature_0.tif
└── fov10_feature_1.tif
├── fov10_feature_0.tiff
└── fov10_feature_1.tiff
```

**Example Pixel Output:** This compartment stores feather files, csvs and pixel masks generated by pixel clustering.
Expand Down
8 changes: 4 additions & 4 deletions ark/analysis/spatial_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def generate_channel_spatial_enrichment_stats(label_dir, marker_thresholds, all_
all_data (pandas.DataFrame):
data including fovs, cell labels, and cell expression matrix for all markers
suffix (str):
suffix for tif file names
suffix for tiff file names
xr_channel_name (str):
channel name for label data array
**kwargs (dict):
Expand All @@ -39,7 +39,7 @@ def generate_channel_spatial_enrichment_stats(label_dir, marker_thresholds, all_
"""

# parse files in label_dir
all_label_names = io_utils.list_files(label_dir, substrs=['.tif'])
all_label_names = io_utils.list_files(label_dir, substrs=['.tiff'])

included_fovs = kwargs.get('included_fovs', None)
if included_fovs:
Expand Down Expand Up @@ -237,7 +237,7 @@ def generate_cluster_spatial_enrichment_stats(label_dir, all_data, suffix='_feat
all_data (pandas.DataFrame):
data including fovs, cell labels, and cell expression matrix for all markers
suffix (str):
suffix for tif file names
suffix for tiff file names
xr_channel_name (str):
channel name for label data array
**kwargs (dict):
Expand All @@ -254,7 +254,7 @@ def generate_cluster_spatial_enrichment_stats(label_dir, all_data, suffix='_feat
"""

# parse files in label_dir
all_label_names = io_utils.list_files(label_dir, substrs=['.tif'])
all_label_names = io_utils.list_files(label_dir, substrs=['.tiff'])

included_fovs = kwargs.get('included_fovs', None)
if included_fovs:
Expand Down
4 changes: 2 additions & 2 deletions ark/phenotyping/pixel_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def filter_with_nuclear_mask(fovs, tiff_dir, seg_dir, channel,
fovs=[fov], channels=[channel]).values[0, :, :, 0]

# load the segmented image in
seg_img = imread(os.path.join(seg_dir, fov + '_feature_1.tif'))[0, ...]
seg_img = imread(os.path.join(seg_dir, fov + '_feature_1.tiff'))[0, ...]

# mask out the nucleus
if exclude:
Expand Down Expand Up @@ -538,7 +538,7 @@ def preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix


def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
img_sub_folder="TIFs", seg_suffix='_feature_0.tif',
img_sub_folder="TIFs", seg_suffix='_feature_0.tiff',
pixel_cluster_prefix='pixel_cluster_prefix',
pixel_output_dir='pixel_output_dir',
data_dir='pixel_mat_data',
Expand Down
8 changes: 4 additions & 4 deletions ark/phenotyping/pixel_cluster_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ def test_filter_with_nuclear_mask(sub_dir, exclude, capsys):
nuclear_coords[fov] = (nuclear_x, nuclear_y)

# save the nuclear segmetation
file_name = fov + "_feature_1.tif"
file_name = fov + "_feature_1.tiff"
io.imsave(os.path.join(seg_dir, file_name), rand_img,
check_contrast=False)

Expand Down Expand Up @@ -781,7 +781,7 @@ def test_preprocess_fov(mocker):
# create sample segmentation data
for fov in ['fov0', 'fov1']:
rand_img = np.random.randint(0, 16, size=(10, 10))
file_name = fov + "_feature_0.tif"
file_name = fov + "_feature_0.tiff"
io.imsave(os.path.join(seg_dir, file_name), rand_img,
check_contrast=False)

Expand All @@ -795,7 +795,7 @@ def test_preprocess_fov(mocker):
# NOTE: don't test the return value, leave that for test_create_pixel_matrix
pixel_cluster_utils.preprocess_fov(
temp_dir, tiff_dir, 'pixel_mat_data', 'pixel_mat_subsetted',
seg_dir, '_feature_0.tif', 'TIFs', False, ['chan0', 'chan1', 'chan2'],
seg_dir, '_feature_0.tiff', 'TIFs', False, ['chan0', 'chan1', 'chan2'],
2, 0.1, 1, 42, channel_norm_df, 'fov0'
)

Expand Down Expand Up @@ -902,7 +902,7 @@ def test_create_pixel_matrix_base(fovs, chans, sub_dir, seg_dir_include,
# create sample segmentation data
for fov in fovs:
rand_img = np.random.randint(0, 16, size=(10, 10))
file_name = fov + "_feature_0.tif"
file_name = fov + "_feature_0.tiff"
io.imsave(os.path.join(seg_dir, file_name), rand_img,
check_contrast=False)
# otherwise, set seg_dir to None
Expand Down
2 changes: 1 addition & 1 deletion ark/phenotyping/post_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def create_mantis_project(cell_table, fovs, seg_dir, pop_col, mask_dir, image_di

# label and save the cell mask for each FOV
for fov in fovs:
whole_cell_file = [fov + '_feature_0.tif' for fov in fovs]
whole_cell_file = [fov + '_feature_0.tiff' for fov in fovs]

# load the segmentation labels in for the FOV
label_map = load_utils.load_imgs_from_dir(
Expand Down
4 changes: 2 additions & 2 deletions ark/phenotyping/post_cluster_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_create_mantis_project(tmp_path):
# create random segmentation masks
for fov in fovs:
data = np.random.randint(0, 5, 100).reshape(10, 10)
io.imsave(os.path.join(seg_dir, fov + '_feature_0.tif'), data, check_contrast=False)
io.imsave(os.path.join(seg_dir, fov + '_feature_0.tiff'), data, check_contrast=False)

# create cell table with two clusters
cell_label = np.tile(np.arange(1, 5), len(fovs))
Expand All @@ -81,5 +81,5 @@ def test_create_mantis_project(tmp_path):
assert set(np.unique(mask)) == set([0, 1, 2])

# mask should be non-zero in the same places as original
seg = io.imread(os.path.join(seg_dir, fov + '_feature_0.tif'))
seg = io.imread(os.path.join(seg_dir, fov + '_feature_0.tiff'))
assert np.array_equal(mask > 0, seg > 0)
4 changes: 2 additions & 2 deletions ark/segmentation/marker_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,8 +497,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
fovs=[fov_name])

# define the files for whole cell and nuclear
whole_cell_file = fov_name + '_feature_0.tif'
nuclear_file = fov_name + '_feature_1.tif'
whole_cell_file = fov_name + '_feature_0.tiff'
nuclear_file = fov_name + '_feature_1.tiff'

# load the segmentation labels in
current_labels_cell = load_utils.load_imgs_from_dir(data_dir=segmentation_dir,
Expand Down
20 changes: 10 additions & 10 deletions ark/segmentation/marker_quantification_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ def test_generate_cell_table_tree_loading():

# define a subset of fovs with file extensions
fovs_subset_ext = fovs[:2]
fovs_subset_ext[0] = str(fovs_subset_ext[0]) + ".tif"
fovs_subset_ext[0] = str(fovs_subset_ext[0]) + ".tiff"
fovs_subset_ext[1] = str(fovs_subset_ext[1]) + ".tiff"

# generate sample segmentation_masks
Expand All @@ -613,20 +613,20 @@ def test_generate_cell_table_tree_loading():
for fov in range(cell_masks_40.shape[0]):
fov_whole_cell = cell_masks_40[fov, :, :, 0]
fov_nuclear = cell_masks_40[fov, :, :, 1]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % fov),
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov),
fov_whole_cell,
check_contrast=False)
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tif' % fov),
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % fov),
fov_nuclear,
check_contrast=False)

for fov in range(cell_masks_20.shape[0]):
fov_whole_cell = cell_masks_20[fov, :, :, 0]
fov_nuclear = cell_masks_20[fov, :, :, 1]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % (fov + fov_size_split)),
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % (fov + fov_size_split)),
fov_whole_cell,
check_contrast=False)
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tif' % (fov + fov_size_split)),
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % (fov + fov_size_split)),
fov_nuclear,
check_contrast=False)

Expand Down Expand Up @@ -693,7 +693,7 @@ def test_generate_cell_table_loading():

# define a subset of fovs with file extensions
fovs_subset_ext = fovs[:2]
fovs_subset_ext[0] = str(fovs_subset_ext[0]) + ".tif"
fovs_subset_ext[0] = str(fovs_subset_ext[0]) + ".tiff"
fovs_subset_ext[1] = str(fovs_subset_ext[1]) + ".tiff"

tiff_dir = os.path.join(temp_dir, "mibitiff_inputs")
Expand All @@ -718,9 +718,9 @@ def test_generate_cell_table_loading():
for fov in range(cell_masks.shape[0]):
fov_whole_cell = cell_masks[fov, :, :, 0]
fov_nuclear = cell_masks[fov, :, :, 1]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % fov), fov_whole_cell,
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov), fov_whole_cell,
check_contrast=False)
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tif' % fov), fov_nuclear,
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % fov), fov_nuclear,
check_contrast=False)

# generate sample norm and arcsinh data for all fovs
Expand Down Expand Up @@ -797,9 +797,9 @@ def test_generate_cell_table_extractions():
for fov in range(cell_masks.shape[0]):
fov_whole_cell = cell_masks[fov, :, :, 0]
fov_nuclear = cell_masks[fov, :, :, 1]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % fov), fov_whole_cell,
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov), fov_whole_cell,
check_contrast=False)
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tif' % fov), fov_nuclear,
io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % fov), fov_nuclear,
check_contrast=False)

default_norm_data, _ = marker_quantification.generate_cell_table(
Expand Down
20 changes: 10 additions & 10 deletions ark/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def label_cells_by_cluster(fov, all_data, label_map, fov_col=settings.FOV_ID,


def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
cell_cluster_col='cell_meta_cluster', seg_suffix='_feature_0.tif'):
cell_cluster_col='cell_meta_cluster', seg_suffix='_feature_0.tiff'):
"""For a fov, create a mask labeling each cell with their SOM or meta cluster label
Args:
Expand All @@ -152,7 +152,7 @@ def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
Whether to assign SOM or meta clusters.
Needs to be `'cell_som_cluster'` or `'cell_meta_cluster'`
seg_suffix (str):
The suffix that the segmentation images use
The suffix that the segmentation images use. Defaults to `'_feature_0.tiff'`.
Returns:
numpy.ndarray:
Expand Down Expand Up @@ -199,7 +199,7 @@ def generate_and_save_cell_cluster_masks(fovs: List[str],
seg_dir: Union[pathlib.Path, str],
cell_data_name: Union[pathlib.Path, str],
cell_cluster_col: str = 'cell_meta_cluster',
seg_suffix: str = '_feature_0.tif',
seg_suffix: str = '_feature_0.tiff',
sub_dir: str = None,
name_suffix: str = ''):
"""Generates cell cluster masks and saves them for downstream analysis.
Expand All @@ -219,7 +219,7 @@ def generate_and_save_cell_cluster_masks(fovs: List[str],
Whether to assign SOM or meta clusters. Needs to be `'cell_som_cluster'` or
`'cell_meta_cluster'`. Defaults to `'cell_meta_cluster'`.
seg_suffix (str, optional):
The suffix that the segmentation images use. Defaults to `'_feature_0.tif'`.
The suffix that the segmentation images use. Defaults to `'_feature_0.tiff'`.
sub_dir (str, optional):
The subdirectory to save the images in. If specified images are saved to
`"data_dir/sub_dir"`. If `sub_dir = None` the images are saved to `"data_dir"`.
Expand Down Expand Up @@ -480,7 +480,7 @@ def generate_deepcell_input(data_dir, tiff_dir, nuc_channels, mem_channels, fovs
if mem_channels:
out[1] = np.sum(data_xr.loc[fov_name, :, :, mem_channels].values, axis=2)

save_path = os.path.join(data_dir, f"{fov_name}.tif")
save_path = os.path.join(data_dir, f"{fov_name}.tiff")
io.imsave(save_path, out, plugin='tifffile', check_contrast=False)


Expand Down Expand Up @@ -591,11 +591,11 @@ def stitch_images_by_shape(data_dir, stitched_dir, img_sub_folder=None, channels

# retrieve valid fov names
if segmentation:
fovs = ns.natsorted(io_utils.list_files(data_dir, substrs='_feature_0.tif'))
fovs = io_utils.extract_delimited_names(fovs, delimiter='_feature_0.tif')
fovs = ns.natsorted(io_utils.list_files(data_dir, substrs='_feature_0.tiff'))
fovs = io_utils.extract_delimited_names(fovs, delimiter='_feature_0.tiff')
elif clustering:
fovs = ns.natsorted(io_utils.list_files(data_dir, substrs=f'_{clustering}_mask.tif'))
fovs = io_utils.extract_delimited_names(fovs, delimiter=f'_{clustering}_mask.tif')
fovs = ns.natsorted(io_utils.list_files(data_dir, substrs=f'_{clustering}_mask.tiff'))
fovs = io_utils.extract_delimited_names(fovs, delimiter=f'_{clustering}_mask.tiff')
else:
fovs = ns.natsorted(io_utils.list_folders(data_dir))
# ignore previous toffy stitching in fov directory
Expand All @@ -622,7 +622,7 @@ def stitch_images_by_shape(data_dir, stitched_dir, img_sub_folder=None, channels
if not segmentation and not clustering:
channel_imgs = io_utils.list_files(
dir_name=os.path.join(data_dir, fovs[0], img_sub_folder),
substrs=['.tif', '.jpg', '.png'])
substrs=['.tiff', '.jpg', '.png'])
else:
channel_imgs = io_utils.list_files(data_dir, substrs=fovs[0])
channel_imgs = [chan.split(fovs[0] + '_')[1] for chan in channel_imgs]
Expand Down
28 changes: 14 additions & 14 deletions ark/utils/data_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_generate_cell_cluster_mask():

# generate a sample segmentation mask
cell_mask = np.random.randint(low=0, high=5, size=(40, 40), dtype="int16")
io.imsave(os.path.join(temp_dir, '%s_feature_0.tif' % fov), cell_mask,
io.imsave(os.path.join(temp_dir, '%s_feature_0.tiff' % fov), cell_mask,
check_contrast=False)

# bad consensus path passed
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_generate_and_save_cell_cluster_masks(sub_dir, name_suffix):
fov_index = fov if fov < fov_size_split else fov_size_split - fov
fov_mask = cell_masks_40 if fov < fov_size_split else cell_masks_20
fov_whole_cell = fov_mask[fov_index, :, :, 0]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % fov), fov_whole_cell,
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov), fov_whole_cell,
check_contrast=False)

# create a sample cell consensus file based on SOM cluster assignments
Expand Down Expand Up @@ -287,7 +287,7 @@ def test_generate_and_save_cell_cluster_masks(sub_dir, name_suffix):
seg_dir=temp_dir,
cell_data_name='cluster_consensus_som.feather',
cell_cluster_col='cell_som_cluster',
seg_suffix='_feature_0.tif',
seg_suffix='_feature_0.tiff',
sub_dir=sub_dir,
name_suffix=name_suffix
)
Expand Down Expand Up @@ -324,13 +324,13 @@ def test_generate_pixel_cluster_mask():
# generate sample fov folder with one channel value, no sub folder
channel_data = np.random.randint(low=0, high=5, size=(40, 40), dtype="int16")
os.mkdir(os.path.join(temp_dir, 'fov0'))
io.imsave(os.path.join(temp_dir, 'fov0', 'chan0.tif'), channel_data,
io.imsave(os.path.join(temp_dir, 'fov0', 'chan0.tiff'), channel_data,
check_contrast=False)

# bad consensus path passed
with pytest.raises(FileNotFoundError):
data_utils.generate_pixel_cluster_mask(
fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'), 'bad_consensus_path'
fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'), 'bad_consensus_path'
)

# create a dummy consensus directory
Expand All @@ -350,20 +350,20 @@ def test_generate_pixel_cluster_mask():
# bad cluster column provided
with pytest.raises(ValueError):
data_utils.generate_pixel_cluster_mask(
fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'),
fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'),
'pixel_mat_consensus', 'bad_cluster'
)

# bad fov provided
with pytest.raises(ValueError):
data_utils.generate_pixel_cluster_mask(
'fov1', temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'),
'fov1', temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'),
'pixel_mat_consensus', 'pixel_som_cluster'
)

# test on SOM assignments
pixel_masks = data_utils.generate_pixel_cluster_mask(
fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'),
fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'),
'pixel_mat_consensus', 'pixel_som_cluster'
)

Expand All @@ -375,7 +375,7 @@ def test_generate_pixel_cluster_mask():

# test on meta assignments
pixel_masks = data_utils.generate_pixel_cluster_mask(
fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'),
fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'),
'pixel_mat_consensus', 'pixel_meta_cluster'
)

Expand Down Expand Up @@ -415,7 +415,7 @@ def test_generate_and_save_pixel_cluster_masks(sub_dir, name_suffix):
os.mkdir(os.path.join(temp_dir, fov))

io.imsave(
os.path.join(temp_dir, fov, 'chan0.tif'),
os.path.join(temp_dir, fov, 'chan0.tiff'),
channel_data,
check_contrast=False
)
Expand All @@ -434,7 +434,7 @@ def test_generate_and_save_pixel_cluster_masks(sub_dir, name_suffix):
base_dir=temp_dir,
save_dir=os.path.join(temp_dir, 'pixel_masks'),
tiff_dir=temp_dir,
chan_file='chan0.tif',
chan_file='chan0.tiff',
pixel_data_dir='pixel_mat_consensus',
pixel_cluster_col='pixel_meta_cluster',
sub_dir=sub_dir,
Expand Down Expand Up @@ -525,9 +525,9 @@ def test_generate_deepcell_input():
nucs = ['nuc2']
mems = ['mem2']

fov1path = os.path.join(temp_dir, 'fov1.tif')
fov2path = os.path.join(temp_dir, 'fov2.tif')
fov3path = os.path.join(temp_dir, 'fov3.tif')
fov1path = os.path.join(temp_dir, 'fov1.tiff')
fov2path = os.path.join(temp_dir, 'fov2.tiff')
fov3path = os.path.join(temp_dir, 'fov3.tiff')

data_utils.generate_deepcell_input(
data_dir=temp_dir, tiff_dir=tiff_dir, nuc_channels=nucs, mem_channels=mems,
Expand Down
Loading

0 comments on commit 0105298

Please sign in to comment.