Skip to content

Commit

Permalink
Added option to ignore hidden files and directories (#510)
Browse files Browse the repository at this point in the history
* added option to ignore hidden files in list_files and list_folders

* Changed test logic

Co-authored-by: Noah F. Greenwald <noahfgreenwald@gmail.com>
  • Loading branch information
srivarra and ngreenwald authored Mar 2, 2022
1 parent bbbbddc commit 60f2460
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 13 deletions.
18 changes: 16 additions & 2 deletions ark/utils/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def validate_paths(paths, data_prefix=True):
f'and to reference as \'../data/path_to_data/myfile.tif\'')


def list_files(dir_name, substrs=None, exact_match=False):
def list_files(dir_name, substrs=None, exact_match=False, ignore_hidden=True):
""" List all files in a directory containing at least one given substring
Args:
Expand All @@ -53,6 +53,9 @@ def list_files(dir_name, substrs=None, exact_match=False):
exact_match (bool):
If True, will match exact file names (so 'C' will match only 'C.tif')
If False, will match substr pattern in file (so 'C' will match 'C.tif' and 'CD30.tif')
ignore_hidden (bool):
If True, will ignore hidden files. If False, will allow hidden files to be
matched against the search substring.
Returns:
list:
Expand All @@ -65,6 +68,10 @@ def list_files(dir_name, substrs=None, exact_match=False):
else:
files = dir_name.lsfiles()

# Filter out hidden files
if ignore_hidden:
files = [file for file in files if not file.startswith('.')]

# default to return all files
if substrs is None:
return files
Expand Down Expand Up @@ -176,7 +183,7 @@ def extract_delimited_names(names, delimiter='_', delimiter_optional=True):
return names


def list_folders(dir_name, substrs=None, exact_match=False):
def list_folders(dir_name, substrs=None, exact_match=False, ignore_hidden=True):
""" List all folders in a directory containing at least one given substring
Args:
Expand All @@ -187,6 +194,9 @@ def list_folders(dir_name, substrs=None, exact_match=False):
exact_match (bool):
If True, will match exact folder names (so 'C' will match only 'C/').
If False, will match substr pattern in folder (so 'C' will match 'C/' & 'C_DIREC/').
ignore_hidden (bool):
If True, will ignore hidden directories. If False, will allow hidden directories to
be matched against the search substring.
Returns:
list:
Expand All @@ -199,6 +209,10 @@ def list_folders(dir_name, substrs=None, exact_match=False):
else:
folders = dir_name.lsdirs()

# Filter out hidden directories
if ignore_hidden:
folders = [folder for folder in folders if not folder.startswith('.')]

# default to return all files
if substrs is None:
return folders
Expand Down
47 changes: 36 additions & 11 deletions ark/utils/io_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def test_list_files():
'othertf.txt',
'test.out',
'test.csv',
'._fov-1-scan-1.json',
'._fov-1-scan-1_pulse_heights.csv'
]
for filename in filenames:
pathlib.Path(os.path.join(temp_dir, filename)).touch()
Expand All @@ -91,19 +93,25 @@ def test_list_files():

# test substrs is None (default)
get_all = iou.list_files(temp_dir)
assert sorted(get_all) == sorted(filenames)
assert sorted(get_all) == sorted(['tf.txt', 'othertf.txt', 'test.out', 'test.csv'])

# test substrs is not list (single string)
get_txt = iou.list_files(temp_dir, substrs='.txt')
assert sorted(get_txt) == sorted(filenames[0:2])
assert sorted(get_txt) == sorted(['othertf.txt', 'tf.txt'])

# test substrs is list
get_test_and_other = iou.list_files(temp_dir, substrs=['.txt', '.out'])
assert sorted(get_test_and_other) == sorted(filenames[:3])
assert sorted(get_test_and_other) == sorted(['tf.txt', 'othertf.txt', 'test.out'])

# Test hidden files
get_hidden_files = iou.list_files(temp_dir, substrs=['fov-1'], exact_match=False,
ignore_hidden=False)
assert sorted(get_hidden_files) == sorted(['._fov-1-scan-1.json',
'._fov-1-scan-1_pulse_heights.csv'])

# test file name exact matching
with tempfile.TemporaryDirectory() as temp_dir:
filenames = ['chan0.tif', 'chan.tif', 'c.tif']
filenames = ['.chan-metadata.tif', 'chan0.tif', 'chan.tif', 'c.tif']
for filename in filenames:
pathlib.Path(os.path.join(temp_dir, filename)).touch()

Expand All @@ -112,15 +120,20 @@ def test_list_files():

# test substrs is None (default)
get_all = iou.list_files(temp_dir, exact_match=True)
assert sorted(get_all) == sorted(filenames)
assert sorted(get_all) == sorted(['chan0.tif', 'chan.tif', 'c.tif'])

# test substrs is not list (single string)
get_txt = iou.list_files(temp_dir, substrs='c', exact_match=True)
assert sorted(get_txt) == [filenames[2]]
assert sorted(get_txt) == [filenames[3]]

# test substrs is list
get_test_and_other = iou.list_files(temp_dir, substrs=['c', 'chan'], exact_match=True)
assert sorted(get_test_and_other) == sorted(filenames[1:])
assert sorted(get_test_and_other) == sorted(['chan.tif', 'c.tif'])

# Test hidden files
get_hidden_files = iou.list_files(temp_dir, substrs=['.chan-metadata'], exact_match=True,
ignore_hidden=False)
assert sorted(get_hidden_files) == ['.chan-metadata.tif']


def test_remove_file_extensions():
Expand Down Expand Up @@ -171,6 +184,7 @@ def test_list_folders():
'test_csv1',
'test_csv2',
'Ntest_csv',
'.hidden_dir'
]

dirnames.sort()
Expand All @@ -182,7 +196,8 @@ def test_list_folders():

# test substrs is None (default)
get_all = iou.list_folders(temp_dir, exact_match=False)
assert sorted(get_all) == dirnames
assert sorted(get_all) == sorted(['tf_txt', 'othertf_txt', 'test_csv', 'test_out',
'test_csv1', 'test_csv2', 'Ntest_csv'])

# test substrs is not list (single string)
get_txt = iou.list_folders(temp_dir, substrs='_txt', exact_match=False)
Expand All @@ -196,15 +211,20 @@ def test_list_folders():
['Ntest_csv', 'test_csv', 'test_csv1', 'test_csv2', 'test_out', 'othertf_txt']
)

# Test hidden files
get_hidden_dirs = iou.list_folders(
temp_dir, substrs="hidden", exact_match=False, ignore_hidden=False)
assert get_hidden_dirs == [".hidden_dir"]

# Tests "Exact Substring Matching", `exact_match` = True

# Test substrs is None (default)
get_all = iou.list_folders(temp_dir, exact_match=True)
assert sorted(get_all) == sorted(dirnames)

assert sorted(get_all) == sorted(['tf_txt', 'othertf_txt', 'test_csv', 'test_out',
'test_csv1', 'test_csv2', 'Ntest_csv'])
# Test exact substr is not list (single string)
get_othertf_txt = iou.list_folders(temp_dir, substrs='othertf_txt', exact_match=True)
assert get_othertf_txt == [dirnames[1]]
assert get_othertf_txt == [dirnames[2]]

# Test substrs, querying two folders (exactly)
get_exact_n_substrs = iou.list_folders(
Expand All @@ -217,3 +237,8 @@ def test_list_folders():
# is returned when `exact_match=True`
get_test_o = iou.list_folders(temp_dir, substrs='test_csv', exact_match=True)
assert get_test_o == ["test_csv"]

# Test hidden files
get_hidden_dirs = iou.list_folders(
temp_dir, substrs=".hidden_dir", exact_match=True, ignore_hidden=False)
assert get_hidden_dirs == [".hidden_dir"]

0 comments on commit 60f2460

Please sign in to comment.