Skip to content

Commit

Permalink
remove_file_extensions is more robust to various fov naming schemes (
Browse files Browse the repository at this point in the history
…#577)

* remove_file_extensions is more robust to fov naming schemes

* added feather to extension_types

* typo

Co-authored-by: Noah F. Greenwald <noahfgreenwald@gmail.com>
  • Loading branch information
srivarra and ngreenwald committed Jun 3, 2022
1 parent bd22839 commit daf0104
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 5 deletions.
23 changes: 18 additions & 5 deletions ark/utils/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from argparse import Namespace
import os
import pathlib
import warnings
Expand Down Expand Up @@ -112,16 +113,28 @@ def remove_file_extensions(files):
return

# remove the file extension
names = [os.path.splitext(name)[0] for name in files]

# identify names with '.' in them: these may not be processed correctly
bad_names = [name for name in names if '.' in name]
names = [os.path.splitext(name) for name in files]
names_corrected = []
extension_types = ["tiff", "tif", "png", "jpg", "jpeg", "tar", "gz", "csv", "feather"]
for name in names:
# We want everything after the "." for the extension
ext = name[-1][1:]
if (ext in extension_types) or (len(ext) == 0):
# If it is one of the extension types, only keep the filename.
# Or there is no extension and the names are similar to ["fov1", "fov2", "fov3", ...]
names_corrected.append(name[:-1][0])
else:
# If `ext` not one of the specified file types, keep the value after the "."
names_corrected.append(name[:-1][0] + "." + name[-1][1])

# identify names with '.' in them: these may not be processed correctly.
bad_names = [name for name in names_corrected if '.' in name]
if len(bad_names) > 0:
warnings.warn(f"These files still have \".\" in them after file extension removal: "
f"{','.join(bad_names)}, "
f"please double check that these are the correct names")

return names
return names_corrected


def extract_delimited_names(names, delimiter='_', delimiter_optional=True):
Expand Down
5 changes: 5 additions & 0 deletions ark/utils/io_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def test_list_files():
def test_remove_file_extensions():
# test a mixture of file paths and extensions
files = ['fov1.tiff', 'fov2.tif', 'fov3.png', 'fov4.jpg']
files2 = ['fov.1.tiff', 'fov.2.tiff', 'fov.3.png', 'fov.4']

assert iou.remove_file_extensions(None) is None
assert iou.remove_file_extensions([]) == []
Expand All @@ -153,6 +154,10 @@ def test_remove_file_extensions():
new_files = iou.remove_file_extensions(['fov5.tar.gz', 'fov6.sample.csv'])
assert new_files == ['fov5.tar', 'fov6.sample']

with pytest.warns(UserWarning):
new_files = iou.remove_file_extensions(files2)
assert new_files == ['fov.1', 'fov.2', 'fov.3', 'fov.4']


def test_extract_delimited_names():
filenames = [
Expand Down

0 comments on commit daf0104

Please sign in to comment.