Skip to content

Commit

Permalink
Address code review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong committed Oct 30, 2020
1 parent 1af156e commit 63b587c
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 16 deletions.
5 changes: 0 additions & 5 deletions ark/segmentation/marker_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,6 @@ def generate_cell_table(segmentation_labels, tiff_dir, img_sub_folder,
# drop file extensions
fovs = io_utils.remove_file_extensions(fovs)

# if mibitiff, also need to remove delimiters
if is_mibitiff:
fovs = io_utils.extract_delimited_names(fovs)

# check segmentation_labels for given fovs (img loaders will fail otherwise)
misc_utils.verify_in_list(fovs=fovs,
segmentation_labels_fovs=segmentation_labels['fovs'].values)
Expand All @@ -301,7 +297,6 @@ def generate_cell_table(segmentation_labels, tiff_dir, img_sub_folder,
[fovs[i:i + batch_size] for i in range(0, cohort_len, batch_size)],
[filenames[i:i + batch_size] for i in range(0, cohort_len, batch_size)]
):
print(batch_files)
# and extract the image data for each batch
if is_mibitiff:
image_data = load_utils.load_imgs_from_mibitiff(data_dir=tiff_dir,
Expand Down
32 changes: 23 additions & 9 deletions ark/utils/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,25 +80,37 @@ def remove_file_extensions(files):
List of files to remove file extensions from.
Any element that doesn't have an extension is left unchanged
Raises:
UserWarning:
Some of the processed file names still contain a period
Returns:
list:
List of files without file extensions
"""

# make sure we don't try to split on an undefined list of files
# make sure we don't try to split on a non-existent list
if files is None:
return

# only get the file name and not the directory path leading up to it
names = [os.path.split(name)[1] for name in files]

# remove anything past and including the first '.' in each entry
names = [name.split('.')[0] for name in names]
# remove the file extension
names = [os.path.splitext(name)[0] for name in names]

# identify names with '.' in them: these may not be processed correctly
bad_names = [name for name in names if '.' in name]
if len(bad_names) > 0:
print(f"These files still have \".\" in them after file extension removal: "
f"{','.join(bad_names)}, "
f"please double check that these are the correct names")
warnings.warn("remaining periods in file names")

return names


def extract_delimited_names(names, delimiter='_', delimiter_optional=True):
def extract_delimited_names(names, delimiter=None, delimiter_optional=True):
"""For a given list of names, extract the delimited prefix
Examples (if delimiter='_'):
Expand All @@ -114,7 +126,7 @@ def extract_delimited_names(names, delimiter='_', delimiter_optional=True):
Character separator used to determine filename prefix. Defaults to '_'.
delimiter_optional (bool):
If False, function will return None if any of the files don't contain the delimiter.
Defaults to True.
Defaults to True. Ignored if delimiter is None.
Raises:
UserWarning:
Expand All @@ -131,15 +143,17 @@ def extract_delimited_names(names, delimiter='_', delimiter_optional=True):
return

# check for bad files/folders
if not delimiter_optional:
if delimiter is not None and not delimiter_optional:
no_delim = [
delimiter not in name
for name in names
]
if any(no_delim):
warnings.warn(f"The following files do not have the mandatory delimiter, "
f"'{delimiter}'...\n"
f"{[name for indx,name in enumerate(names) if no_delim[indx]]}")
print(f"The following files do not have the mandatory delimiter, "
f"'{delimiter}': "
f"{','.join([name for indx,name in enumerate(names) if no_delim[indx]])}")
warnings.warn("files without mandatory delimiter")

return None

# now split on the delimiter as well
Expand Down
8 changes: 6 additions & 2 deletions ark/utils/io_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def test_remove_file_extensions():

assert new_files == files_sans_ext

with pytest.warns(UserWarning):
new_files = iou.remove_file_extensions(['fov5.tar.gz', 'fov6.sample.csv'])
assert new_files == ['fov5.tar', 'fov6.sample']


def test_extract_delimited_names():
filenames = [
Expand All @@ -115,10 +119,10 @@ def test_extract_delimited_names():

# non-optional delimiter warning
with pytest.warns(UserWarning):
iou.extract_delimited_names(['fov2'], delimiter_optional=False)
iou.extract_delimited_names(['fov2'], delimiter='_', delimiter_optional=False)

# test regular files list
assert ['fov1', 'fov2'] == iou.extract_delimited_names(filenames)
assert ['fov1', 'fov2'] == iou.extract_delimited_names(filenames, delimiter='_')


def test_list_folders():
Expand Down

0 comments on commit 63b587c

Please sign in to comment.