Standardize usage of .tiff extension (#801)

* Start conversion to .tiff extension * Change revision hash to reflect new data download * Update the HuggingFace hash * Remove .tif extension in pixel clustering notebook * Fix .tif to .tiff extensions in tests (part I) * Fix .tif to .tiff extensions in tests (part II) * Fix .tif to .tiff extensions in tests (part III) * Documentation-related tiff fixes * Simplify .tiff extension * Clarify comment about DeepCell .tif saving * Clarify that deepcell input files are now tiff * Remove .tif support for load_imgs_from_dir and load_imgs_from_tree * Remove MIBItiff .tif assumption * Change .tif extension in example_dataset * Remove old .tif channel files * Delete repo example_dataset files no longer needed (thank you HuggingFace) * Re-add mibitiff_inputs folder back (needed to test backwards compatibility for load_imgs_from_mibitiff) * Change tiff_utils test to ensure input_data is no longer used * Change HuggingFace revision back to main
angelolab · Nov 2, 2022 · 0105298 · 0105298
1 parent ef2ab82
commit 0105298
Show file tree

Hide file tree

Showing 56 changed files with 146 additions and 19,528 deletions.
diff --git a/README.md b/README.md
@@ -276,11 +276,11 @@ segmentation/cell_table/
 **Deepcell Output:** This compartment stores the segmentation images after running deepcell.
 ```sh
 segmentation/deepcell_output/
-├── fov0_feature_0.tif
-├── fov0_feature_1.tif
+├── fov0_feature_0.tiff
+├── fov0_feature_1.tiff
 ├── ...
-├── fov10_feature_0.tif
-└── fov10_feature_1.tif
+├── fov10_feature_0.tiff
+└── fov10_feature_1.tiff
 ```
 
 **Example Pixel Output:** This compartment stores feather files, csvs and pixel masks generated by pixel clustering.

diff --git a/ark/analysis/spatial_analysis.py b/ark/analysis/spatial_analysis.py
@@ -22,7 +22,7 @@ def generate_channel_spatial_enrichment_stats(label_dir, marker_thresholds, all_
         all_data (pandas.DataFrame):
             data including fovs, cell labels, and cell expression matrix for all markers
         suffix (str):
-            suffix for tif file names
+            suffix for tiff file names
         xr_channel_name (str):
             channel name for label data array
         **kwargs (dict):
@@ -39,7 +39,7 @@ def generate_channel_spatial_enrichment_stats(label_dir, marker_thresholds, all_
     """
 
     # parse files in label_dir
-    all_label_names = io_utils.list_files(label_dir, substrs=['.tif'])
+    all_label_names = io_utils.list_files(label_dir, substrs=['.tiff'])
 
     included_fovs = kwargs.get('included_fovs', None)
     if included_fovs:
@@ -237,7 +237,7 @@ def generate_cluster_spatial_enrichment_stats(label_dir, all_data, suffix='_feat
         all_data (pandas.DataFrame):
             data including fovs, cell labels, and cell expression matrix for all markers
         suffix (str):
-            suffix for tif file names
+            suffix for tiff file names
         xr_channel_name (str):
             channel name for label data array
         **kwargs (dict):
@@ -254,7 +254,7 @@ def generate_cluster_spatial_enrichment_stats(label_dir, all_data, suffix='_feat
     """
 
     # parse files in label_dir
-    all_label_names = io_utils.list_files(label_dir, substrs=['.tif'])
+    all_label_names = io_utils.list_files(label_dir, substrs=['.tiff'])
 
     included_fovs = kwargs.get('included_fovs', None)
     if included_fovs:

diff --git a/ark/phenotyping/pixel_cluster_utils.py b/ark/phenotyping/pixel_cluster_utils.py
@@ -268,7 +268,7 @@ def filter_with_nuclear_mask(fovs, tiff_dir, seg_dir, channel,
                                              fovs=[fov], channels=[channel]).values[0, :, :, 0]
 
         # load the segmented image in
-        seg_img = imread(os.path.join(seg_dir, fov + '_feature_1.tif'))[0, ...]
+        seg_img = imread(os.path.join(seg_dir, fov + '_feature_1.tiff'))[0, ...]
 
         # mask out the nucleus
         if exclude:
@@ -538,7 +538,7 @@ def preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix
 
 
 def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
-                        img_sub_folder="TIFs", seg_suffix='_feature_0.tif',
+                        img_sub_folder="TIFs", seg_suffix='_feature_0.tiff',
                         pixel_cluster_prefix='pixel_cluster_prefix',
                         pixel_output_dir='pixel_output_dir',
                         data_dir='pixel_mat_data',

diff --git a/ark/phenotyping/pixel_cluster_utils_test.py b/ark/phenotyping/pixel_cluster_utils_test.py
@@ -520,7 +520,7 @@ def test_filter_with_nuclear_mask(sub_dir, exclude, capsys):
             nuclear_coords[fov] = (nuclear_x, nuclear_y)
 
             # save the nuclear segmetation
-            file_name = fov + "_feature_1.tif"
+            file_name = fov + "_feature_1.tiff"
             io.imsave(os.path.join(seg_dir, file_name), rand_img,
                       check_contrast=False)
 
@@ -781,7 +781,7 @@ def test_preprocess_fov(mocker):
         # create sample segmentation data
         for fov in ['fov0', 'fov1']:
             rand_img = np.random.randint(0, 16, size=(10, 10))
-            file_name = fov + "_feature_0.tif"
+            file_name = fov + "_feature_0.tiff"
             io.imsave(os.path.join(seg_dir, file_name), rand_img,
                       check_contrast=False)
 
@@ -795,7 +795,7 @@ def test_preprocess_fov(mocker):
         # NOTE: don't test the return value, leave that for test_create_pixel_matrix
         pixel_cluster_utils.preprocess_fov(
             temp_dir, tiff_dir, 'pixel_mat_data', 'pixel_mat_subsetted',
-            seg_dir, '_feature_0.tif', 'TIFs', False, ['chan0', 'chan1', 'chan2'],
+            seg_dir, '_feature_0.tiff', 'TIFs', False, ['chan0', 'chan1', 'chan2'],
             2, 0.1, 1, 42, channel_norm_df, 'fov0'
         )
 
@@ -902,7 +902,7 @@ def test_create_pixel_matrix_base(fovs, chans, sub_dir, seg_dir_include,
             # create sample segmentation data
             for fov in fovs:
                 rand_img = np.random.randint(0, 16, size=(10, 10))
-                file_name = fov + "_feature_0.tif"
+                file_name = fov + "_feature_0.tiff"
                 io.imsave(os.path.join(seg_dir, file_name), rand_img,
                           check_contrast=False)
         # otherwise, set seg_dir to None

diff --git a/ark/phenotyping/post_cluster_utils.py b/ark/phenotyping/post_cluster_utils.py
@@ -73,7 +73,7 @@ def create_mantis_project(cell_table, fovs, seg_dir, pop_col, mask_dir, image_di
 
     # label and save the cell mask for each FOV
     for fov in fovs:
-        whole_cell_file = [fov + '_feature_0.tif' for fov in fovs]
+        whole_cell_file = [fov + '_feature_0.tiff' for fov in fovs]
 
         # load the segmentation labels in for the FOV
         label_map = load_utils.load_imgs_from_dir(

diff --git a/ark/phenotyping/post_cluster_utils_test.py b/ark/phenotyping/post_cluster_utils_test.py
@@ -59,7 +59,7 @@ def test_create_mantis_project(tmp_path):
     # create random segmentation masks
     for fov in fovs:
         data = np.random.randint(0, 5, 100).reshape(10, 10)
-        io.imsave(os.path.join(seg_dir, fov + '_feature_0.tif'), data, check_contrast=False)
+        io.imsave(os.path.join(seg_dir, fov + '_feature_0.tiff'), data, check_contrast=False)
 
     # create cell table with two clusters
     cell_label = np.tile(np.arange(1, 5), len(fovs))
@@ -81,5 +81,5 @@ def test_create_mantis_project(tmp_path):
         assert set(np.unique(mask)) == set([0, 1, 2])
 
         # mask should be non-zero in the same places as original
-        seg = io.imread(os.path.join(seg_dir, fov + '_feature_0.tif'))
+        seg = io.imread(os.path.join(seg_dir, fov + '_feature_0.tiff'))
         assert np.array_equal(mask > 0, seg > 0)
diff --git a/ark/segmentation/marker_quantification.py b/ark/segmentation/marker_quantification.py
@@ -497,8 +497,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
                                                         fovs=[fov_name])
 
         # define the files for whole cell and nuclear
-        whole_cell_file = fov_name + '_feature_0.tif'
-        nuclear_file = fov_name + '_feature_1.tif'
+        whole_cell_file = fov_name + '_feature_0.tiff'
+        nuclear_file = fov_name + '_feature_1.tiff'
 
         # load the segmentation labels in
         current_labels_cell = load_utils.load_imgs_from_dir(data_dir=segmentation_dir,

diff --git a/ark/segmentation/marker_quantification_test.py b/ark/segmentation/marker_quantification_test.py
@@ -590,7 +590,7 @@ def test_generate_cell_table_tree_loading():
 
         # define a subset of fovs with file extensions
         fovs_subset_ext = fovs[:2]
-        fovs_subset_ext[0] = str(fovs_subset_ext[0]) + ".tif"
+        fovs_subset_ext[0] = str(fovs_subset_ext[0]) + ".tiff"
         fovs_subset_ext[1] = str(fovs_subset_ext[1]) + ".tiff"
 
         # generate sample segmentation_masks
@@ -613,20 +613,20 @@ def test_generate_cell_table_tree_loading():
         for fov in range(cell_masks_40.shape[0]):
             fov_whole_cell = cell_masks_40[fov, :, :, 0]
             fov_nuclear = cell_masks_40[fov, :, :, 1]
-            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % fov),
+            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov),
                       fov_whole_cell,
                       check_contrast=False)
-            io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tif' % fov),
+            io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % fov),
                       fov_nuclear,
                       check_contrast=False)
 
         for fov in range(cell_masks_20.shape[0]):
             fov_whole_cell = cell_masks_20[fov, :, :, 0]
             fov_nuclear = cell_masks_20[fov, :, :, 1]
-            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % (fov + fov_size_split)),
+            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % (fov + fov_size_split)),
                       fov_whole_cell,
                       check_contrast=False)
-            io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tif' % (fov + fov_size_split)),
+            io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % (fov + fov_size_split)),
                       fov_nuclear,
                       check_contrast=False)
 
@@ -693,7 +693,7 @@ def test_generate_cell_table_loading():
 
         # define a subset of fovs with file extensions
         fovs_subset_ext = fovs[:2]
-        fovs_subset_ext[0] = str(fovs_subset_ext[0]) + ".tif"
+        fovs_subset_ext[0] = str(fovs_subset_ext[0]) + ".tiff"
         fovs_subset_ext[1] = str(fovs_subset_ext[1]) + ".tiff"
 
         tiff_dir = os.path.join(temp_dir, "mibitiff_inputs")
@@ -718,9 +718,9 @@ def test_generate_cell_table_loading():
         for fov in range(cell_masks.shape[0]):
             fov_whole_cell = cell_masks[fov, :, :, 0]
             fov_nuclear = cell_masks[fov, :, :, 1]
-            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % fov), fov_whole_cell,
+            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov), fov_whole_cell,
                       check_contrast=False)
-            io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tif' % fov), fov_nuclear,
+            io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % fov), fov_nuclear,
                       check_contrast=False)
 
         # generate sample norm and arcsinh data for all fovs
@@ -797,9 +797,9 @@ def test_generate_cell_table_extractions():
         for fov in range(cell_masks.shape[0]):
             fov_whole_cell = cell_masks[fov, :, :, 0]
             fov_nuclear = cell_masks[fov, :, :, 1]
-            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % fov), fov_whole_cell,
+            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov), fov_whole_cell,
                       check_contrast=False)
-            io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tif' % fov), fov_nuclear,
+            io.imsave(os.path.join(temp_dir, 'fov%d_feature_1.tiff' % fov), fov_nuclear,
                       check_contrast=False)
 
         default_norm_data, _ = marker_quantification.generate_cell_table(

diff --git a/ark/utils/data_utils.py b/ark/utils/data_utils.py
@@ -136,7 +136,7 @@ def label_cells_by_cluster(fov, all_data, label_map, fov_col=settings.FOV_ID,
 
 
 def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
-                               cell_cluster_col='cell_meta_cluster', seg_suffix='_feature_0.tif'):
+                               cell_cluster_col='cell_meta_cluster', seg_suffix='_feature_0.tiff'):
     """For a fov, create a mask labeling each cell with their SOM or meta cluster label
 
     Args:
@@ -152,7 +152,7 @@ def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
             Whether to assign SOM or meta clusters.
             Needs to be `'cell_som_cluster'` or `'cell_meta_cluster'`
         seg_suffix (str):
-            The suffix that the segmentation images use
+            The suffix that the segmentation images use. Defaults to `'_feature_0.tiff'`.
 
     Returns:
         numpy.ndarray:
@@ -199,7 +199,7 @@ def generate_and_save_cell_cluster_masks(fovs: List[str],
                                          seg_dir: Union[pathlib.Path, str],
                                          cell_data_name: Union[pathlib.Path, str],
                                          cell_cluster_col: str = 'cell_meta_cluster',
-                                         seg_suffix: str = '_feature_0.tif',
+                                         seg_suffix: str = '_feature_0.tiff',
                                          sub_dir: str = None,
                                          name_suffix: str = ''):
     """Generates cell cluster masks and saves them for downstream analysis.
@@ -219,7 +219,7 @@ def generate_and_save_cell_cluster_masks(fovs: List[str],
             Whether to assign SOM or meta clusters. Needs to be `'cell_som_cluster'` or
             `'cell_meta_cluster'`. Defaults to `'cell_meta_cluster'`.
         seg_suffix (str, optional):
-            The suffix that the segmentation images use. Defaults to `'_feature_0.tif'`.
+            The suffix that the segmentation images use. Defaults to `'_feature_0.tiff'`.
         sub_dir (str, optional):
             The subdirectory to save the images in. If specified images are saved to
             `"data_dir/sub_dir"`. If `sub_dir = None` the images are saved to `"data_dir"`.
@@ -480,7 +480,7 @@ def generate_deepcell_input(data_dir, tiff_dir, nuc_channels, mem_channels, fovs
         if mem_channels:
             out[1] = np.sum(data_xr.loc[fov_name, :, :, mem_channels].values, axis=2)
 
-        save_path = os.path.join(data_dir, f"{fov_name}.tif")
+        save_path = os.path.join(data_dir, f"{fov_name}.tiff")
         io.imsave(save_path, out, plugin='tifffile', check_contrast=False)
 
 
@@ -591,11 +591,11 @@ def stitch_images_by_shape(data_dir, stitched_dir, img_sub_folder=None, channels
 
     # retrieve valid fov names
     if segmentation:
-        fovs = ns.natsorted(io_utils.list_files(data_dir, substrs='_feature_0.tif'))
-        fovs = io_utils.extract_delimited_names(fovs, delimiter='_feature_0.tif')
+        fovs = ns.natsorted(io_utils.list_files(data_dir, substrs='_feature_0.tiff'))
+        fovs = io_utils.extract_delimited_names(fovs, delimiter='_feature_0.tiff')
     elif clustering:
-        fovs = ns.natsorted(io_utils.list_files(data_dir, substrs=f'_{clustering}_mask.tif'))
-        fovs = io_utils.extract_delimited_names(fovs, delimiter=f'_{clustering}_mask.tif')
+        fovs = ns.natsorted(io_utils.list_files(data_dir, substrs=f'_{clustering}_mask.tiff'))
+        fovs = io_utils.extract_delimited_names(fovs, delimiter=f'_{clustering}_mask.tiff')
     else:
         fovs = ns.natsorted(io_utils.list_folders(data_dir))
         # ignore previous toffy stitching in fov directory
@@ -622,7 +622,7 @@ def stitch_images_by_shape(data_dir, stitched_dir, img_sub_folder=None, channels
     if not segmentation and not clustering:
         channel_imgs = io_utils.list_files(
             dir_name=os.path.join(data_dir, fovs[0], img_sub_folder),
-            substrs=['.tif', '.jpg', '.png'])
+            substrs=['.tiff', '.jpg', '.png'])
     else:
         channel_imgs = io_utils.list_files(data_dir, substrs=fovs[0])
         channel_imgs = [chan.split(fovs[0] + '_')[1] for chan in channel_imgs]

diff --git a/ark/utils/data_utils_test.py b/ark/utils/data_utils_test.py
@@ -136,7 +136,7 @@ def test_generate_cell_cluster_mask():
 
         # generate a sample segmentation mask
         cell_mask = np.random.randint(low=0, high=5, size=(40, 40), dtype="int16")
-        io.imsave(os.path.join(temp_dir, '%s_feature_0.tif' % fov), cell_mask,
+        io.imsave(os.path.join(temp_dir, '%s_feature_0.tiff' % fov), cell_mask,
                   check_contrast=False)
 
         # bad consensus path passed
@@ -238,7 +238,7 @@ def test_generate_and_save_cell_cluster_masks(sub_dir, name_suffix):
             fov_index = fov if fov < fov_size_split else fov_size_split - fov
             fov_mask = cell_masks_40 if fov < fov_size_split else cell_masks_20
             fov_whole_cell = fov_mask[fov_index, :, :, 0]
-            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % fov), fov_whole_cell,
+            io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tiff' % fov), fov_whole_cell,
                       check_contrast=False)
 
         # create a sample cell consensus file based on SOM cluster assignments
@@ -287,7 +287,7 @@ def test_generate_and_save_cell_cluster_masks(sub_dir, name_suffix):
                                              seg_dir=temp_dir,
                                              cell_data_name='cluster_consensus_som.feather',
                                              cell_cluster_col='cell_som_cluster',
-                                             seg_suffix='_feature_0.tif',
+                                             seg_suffix='_feature_0.tiff',
                                              sub_dir=sub_dir,
                                              name_suffix=name_suffix
                                              )
@@ -324,13 +324,13 @@ def test_generate_pixel_cluster_mask():
         # generate sample fov folder with one channel value, no sub folder
         channel_data = np.random.randint(low=0, high=5, size=(40, 40), dtype="int16")
         os.mkdir(os.path.join(temp_dir, 'fov0'))
-        io.imsave(os.path.join(temp_dir, 'fov0', 'chan0.tif'), channel_data,
+        io.imsave(os.path.join(temp_dir, 'fov0', 'chan0.tiff'), channel_data,
                   check_contrast=False)
 
         # bad consensus path passed
         with pytest.raises(FileNotFoundError):
             data_utils.generate_pixel_cluster_mask(
-                fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'), 'bad_consensus_path'
+                fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'), 'bad_consensus_path'
             )
 
         # create a dummy consensus directory
@@ -350,20 +350,20 @@ def test_generate_pixel_cluster_mask():
         # bad cluster column provided
         with pytest.raises(ValueError):
             data_utils.generate_pixel_cluster_mask(
-                fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'),
+                fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'),
                 'pixel_mat_consensus', 'bad_cluster'
             )
 
         # bad fov provided
         with pytest.raises(ValueError):
             data_utils.generate_pixel_cluster_mask(
-                'fov1', temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'),
+                'fov1', temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'),
                 'pixel_mat_consensus', 'pixel_som_cluster'
             )
 
         # test on SOM assignments
         pixel_masks = data_utils.generate_pixel_cluster_mask(
-            fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'),
+            fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'),
             'pixel_mat_consensus', 'pixel_som_cluster'
         )
 
@@ -375,7 +375,7 @@ def test_generate_pixel_cluster_mask():
 
         # test on meta assignments
         pixel_masks = data_utils.generate_pixel_cluster_mask(
-            fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tif'),
+            fov, temp_dir, temp_dir, os.path.join('fov0', 'chan0.tiff'),
             'pixel_mat_consensus', 'pixel_meta_cluster'
         )
 
@@ -415,7 +415,7 @@ def test_generate_and_save_pixel_cluster_masks(sub_dir, name_suffix):
                 os.mkdir(os.path.join(temp_dir, fov))
 
             io.imsave(
-                os.path.join(temp_dir, fov, 'chan0.tif'),
+                os.path.join(temp_dir, fov, 'chan0.tiff'),
                 channel_data,
                 check_contrast=False
             )
@@ -434,7 +434,7 @@ def test_generate_and_save_pixel_cluster_masks(sub_dir, name_suffix):
                                               base_dir=temp_dir,
                                               save_dir=os.path.join(temp_dir, 'pixel_masks'),
                                               tiff_dir=temp_dir,
-                                              chan_file='chan0.tif',
+                                              chan_file='chan0.tiff',
                                               pixel_data_dir='pixel_mat_consensus',
                                               pixel_cluster_col='pixel_meta_cluster',
                                               sub_dir=sub_dir,
@@ -525,9 +525,9 @@ def test_generate_deepcell_input():
             nucs = ['nuc2']
             mems = ['mem2']
 
-            fov1path = os.path.join(temp_dir, 'fov1.tif')
-            fov2path = os.path.join(temp_dir, 'fov2.tif')
-            fov3path = os.path.join(temp_dir, 'fov3.tif')
+            fov1path = os.path.join(temp_dir, 'fov1.tiff')
+            fov2path = os.path.join(temp_dir, 'fov2.tiff')
+            fov3path = os.path.join(temp_dir, 'fov3.tiff')
 
             data_utils.generate_deepcell_input(
                 data_dir=temp_dir, tiff_dir=tiff_dir, nuc_channels=nucs, mem_channels=mems,