Skip to content

Commit

Permalink
Merge d92899f into 144deae
Browse files Browse the repository at this point in the history
  • Loading branch information
srivarra committed Feb 25, 2022
2 parents 144deae + d92899f commit af3fcb1
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 26 deletions.
30 changes: 21 additions & 9 deletions ark/utils/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ def list_files(dir_name, substrs=None, exact_match=False):
matches = [file
for file in files
if any([
substr == os.path.splitext(file)[0]
for substr in substrs
substr == os.path.splitext(file)[0]
for substr in substrs
])]
else:
matches = [file
Expand Down Expand Up @@ -176,14 +176,17 @@ def extract_delimited_names(names, delimiter='_', delimiter_optional=True):
return names


def list_folders(dir_name, substrs=None):
def list_folders(dir_name, substrs=None, exact_match=False):
""" List all folders in a directory containing at least one given substring
Args:
dir_name (str):
Parent directory for folders of interest
substrs (str or list):
Substring matching criteria, defaults to None (all folders)
exact_match (bool):
If True, will match exact folder names (so 'C' will match only 'C/').
If False, will match substr pattern in folder (so 'C' will match 'C/' & 'C_DIREC/').
Returns:
list:
Expand All @@ -204,12 +207,21 @@ def list_folders(dir_name, substrs=None):
if type(substrs) is not list:
substrs = [substrs]

matches = [folder
for folder in folders
if any([
substr in folder
for substr in substrs
])]
# Exact match case
if exact_match:
matches = [folder
for folder in folders
if any([
substr == os.path.splitext(folder)[0]
for substr in substrs
])]
else:
matches = [folder
for folder in folders
if any([
substr in folder
for substr in substrs
])]

return matches

Expand Down
57 changes: 40 additions & 17 deletions ark/utils/io_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,7 @@ def test_list_files():

# test file name exact matching
with tempfile.TemporaryDirectory() as temp_dir:
filenames = [
'chan0.tif',
'chan.tif',
'c.tif'
]
filenames = ['chan0.tif', 'chan.tif', 'c.tif']
for filename in filenames:
pathlib.Path(os.path.join(temp_dir, filename)).touch()

Expand All @@ -129,12 +125,7 @@ def test_list_files():

def test_remove_file_extensions():
# test a mixture of file paths and extensions
files = [
'fov1.tiff',
'fov2.tif',
'fov3.png',
'fov4.jpg'
]
files = ['fov1.tiff', 'fov2.tif', 'fov3.png', 'fov4.jpg']

assert iou.remove_file_extensions(None) is None
assert iou.remove_file_extensions([]) == []
Expand Down Expand Up @@ -169,28 +160,60 @@ def test_extract_delimited_names():


def test_list_folders():
# Tests "Fuzzy Substring Matching",`exact_match` = False
with tempfile.TemporaryDirectory() as temp_dir:
# set up temp_dir subdirs
dirnames = [
'tf_txt',
'othertf_txt',
'test_csv',
'test_out',
'test_csv1',
'test_csv2',
'Ntest_csv',
]

dirnames.sort()
for dirname in dirnames:
os.mkdir(os.path.join(temp_dir, dirname))

# add extra file
pathlib.Path(os.path.join(temp_dir, 'test_badfile.txt')).touch()

# test substrs is None (default)
get_all = iou.list_folders(temp_dir)
assert get_all.sort() == dirnames.sort()
get_all = iou.list_folders(temp_dir, exact_match=False)
assert sorted(get_all) == dirnames

# test substrs is not list (single string)
get_txt = iou.list_folders(temp_dir, substrs='_txt')
assert get_txt.sort() == dirnames[0:2].sort()
get_txt = iou.list_folders(temp_dir, substrs='_txt', exact_match=False)
assert sorted(get_txt) == sorted(['othertf_txt', 'tf_txt'])

# test substrs is list
get_test_and_other = iou.list_folders(temp_dir, substrs=['test_', 'other'])
assert get_test_and_other.sort() == dirnames[1:].sort()
get_test_and_other = iou.list_folders(
temp_dir, substrs=['test_', 'other'], exact_match=False
)
assert sorted(get_test_and_other) == sorted(
['Ntest_csv', 'test_csv', 'test_csv1', 'test_csv2', 'test_out', 'othertf_txt']
)

# Tests "Exact Substring Matching", `exact_match` = True

# Test substrs is None (default)
get_all = iou.list_folders(temp_dir, exact_match=True)
assert sorted(get_all) == sorted(dirnames)

# Test exact substr is not list (single string)
get_othertf_txt = iou.list_folders(temp_dir, substrs='othertf_txt', exact_match=True)
assert get_othertf_txt == [dirnames[1]]

# Test substrs, querying two folders (exactly)
get_exact_n_substrs = iou.list_folders(
temp_dir, substrs=['tf_txt', 'othertf_txt'], exact_match=True
)
assert sorted(get_exact_n_substrs) == ['othertf_txt', 'tf_txt']

# Test the substr that the user specifies which is contained within multiple folders,
# and only the folder that exactly matches the substring, not the one that contains it,
# is returned when `exact_match=True`
get_test_o = iou.list_folders(temp_dir, substrs='test_csv', exact_match=True)
assert get_test_o == ["test_csv"]

0 comments on commit af3fcb1

Please sign in to comment.