Skip to content

Commit

Permalink
Increase validate paths usage (#796)
Browse files Browse the repository at this point in the history
* change to FileNotFoundError

* use validate paths function

* more validate paths switches

* missed a function

* typo

Co-authored-by: alex-l-kong <31424707+alex-l-kong@users.noreply.github.com>

Co-authored-by: alex-l-kong <31424707+alex-l-kong@users.noreply.github.com>
  • Loading branch information
camisowers and alex-l-kong authored Oct 31, 2022
1 parent 2a3ae44 commit dcc6338
Show file tree
Hide file tree
Showing 16 changed files with 64 additions and 211 deletions.
104 changes: 14 additions & 90 deletions ark/phenotyping/cell_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import scipy.stats as stats

from ark.analysis import visualize
from ark.utils import misc_utils
from ark.utils import misc_utils, io_utils


def compute_cell_cluster_count_avg(cell_cluster_path, pixel_cluster_col_prefix,
Expand Down Expand Up @@ -96,10 +96,7 @@ def compute_cell_cluster_channel_avg(fovs, channels, base_dir,
"""

# verify the cell table actually exists
if not os.path.exists(os.path.join(base_dir, weighted_cell_channel_name)):
raise FileNotFoundError(
"Weighted cell table %s not found in %s" % (weighted_cell_channel_name, base_dir)
)
io_utils.validate_paths(os.path.join(base_dir, weighted_cell_channel_name))

# verify the cell cluster col specified is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -445,15 +442,8 @@ def train_cell_som(fovs, channels, base_dir, pixel_data_dir, cell_table_path,
cluster_counts_norm_path = os.path.join(base_dir, cluster_counts_norm_name)
weights_path = os.path.join(base_dir, weights_name)

# if the cell table path does not exist
if not os.path.exists(cell_table_path):
raise FileNotFoundError('Cell table path %s does not exist' %
cell_table_path)

# if the pixel data with the SOM and meta labels path does not exist
if not os.path.exists(pixel_data_path):
raise FileNotFoundError('Pixel data dir %s does not exist in base_dir %s' %
(pixel_data_path, base_dir))
# check the cell table path and pixel data path exist
io_utils.validate_paths([cell_table_path, pixel_data_path])

# verify the cluster_col provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -550,17 +540,8 @@ def cluster_cells(base_dir, cluster_counts_norm_name='cluster_counts_norm.feathe
weights_path = os.path.join(base_dir, weights_name)
cell_data_path = os.path.join(base_dir, cell_data_name)

# if the path to the normalized pixel cluster counts per cell doesn't exist
if not os.path.exists(cluster_counts_norm_path):
raise FileNotFoundError(
'Normalized pixel cluster counts per cell file %s does not exist in base_dir %s' %
(cluster_counts_norm_name, base_dir)
)

# if the path to the weights file does not exist
if not os.path.exists(weights_path):
raise FileNotFoundError('Weights file %s does not exist in base_dir %s' %
(weights_name, base_dir))
# check the path to the normalized pixel cluster counts per cell and weights file exists
io_utils.validate_paths([cluster_counts_norm_path, weights_path])

# verify the pixel_cluster_col_prefix provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -678,26 +659,8 @@ def cell_consensus_cluster(fovs, channels, base_dir, pixel_cluster_col, max_k=20
weighted_channel_path = os.path.join(base_dir, weighted_cell_channel_name)
clust_to_meta_path = os.path.join(base_dir, clust_to_meta_name)

# if the path to the SOM clustered data doesn't exist
if not os.path.exists(cell_data_path):
raise FileNotFoundError(
'Cell data file %s does not exist in base_dir %s' %
(cell_data_name, base_dir)
)

# if the path to the average pixel cluster counts per cell cluster doesn't exist
if not os.path.exists(som_cluster_counts_avg_path):
raise FileNotFoundError(
'Average pix clust count per cell SOM cluster file %s does not exist in base_dir %s' %
(cell_som_cluster_count_avgs_name, base_dir)
)

# if the path to the weighted channel data doesn't exist
if not os.path.exists(weighted_channel_path):
raise FileNotFoundError(
'Weighted channel table %s does not exist in base_dir %s' %
(weighted_cell_channel_name, base_dir)
)
# check paths
io_utils.validate_paths([cell_data_path, som_cluster_counts_avg_path, weighted_channel_path])

# verify the pixel_cluster_col provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -866,41 +829,10 @@ def apply_cell_meta_cluster_remapping(fovs, channels, base_dir, cell_consensus_n
meta_cluster_channel_avgs_path = os.path.join(base_dir, cell_meta_cluster_channel_avg_name)

# file path validation
if not os.path.exists(cell_consensus_path):
raise FileNotFoundError('Cell consensus file %s does not exist in base_dir %s' %
(cell_consensus_name, base_dir))

if not os.path.exists(cell_remapped_path):
raise FileNotFoundError('Cell remapping file %s does not exist in base_dir %s' %
(cell_remapped_name, base_dir))

if not os.path.exists(som_cluster_counts_avgs_path):
raise FileNotFoundError(
'Average pix clust count per cell SOM cluster file %s does not exist in base_dir %s' %
(cell_som_cluster_count_avgs_name, base_dir)
)

if not os.path.exists(meta_cluster_counts_avgs_path):
raise FileNotFoundError(
'Average pix clust count per cell meta cluster file %s does not exist in base_dir %s' %
(cell_meta_cluster_count_avgs_name, base_dir)
)

if not os.path.exists(weighted_channel_path):
raise FileNotFoundError('Weighted channel table %s does not exist in base_dir %s' %
(weighted_cell_channel_name, base_dir))

if not os.path.exists(som_cluster_channel_avgs_path):
raise FileNotFoundError(
'Average weighted chan per cell SOM cluster file %s does not exist in base_dir %s' %
(cell_som_cluster_channel_avg_name, base_dir)
)

if not os.path.exists(meta_cluster_channel_avgs_path):
raise FileNotFoundError(
'Average weighted chan per cell meta cluster file %s does not exist in base_dir %s' %
(cell_meta_cluster_channel_avg_name, base_dir)
)
io_utils.validate_paths([cell_consensus_path, cell_remapped_path,
som_cluster_counts_avgs_path, meta_cluster_counts_avgs_path,
weighted_channel_path, som_cluster_channel_avgs_path,
meta_cluster_channel_avgs_path])

# verify the pixel_cluster_col provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -1061,9 +993,7 @@ def generate_weighted_channel_avg_heatmap(cell_cluster_channel_avg_path, cell_cl
"""

# file path validation
if not os.path.exists(cell_cluster_channel_avg_path):
raise FileNotFoundError('Channel average path %s does not exist' %
cell_cluster_channel_avg_path)
io_utils.validate_paths(cell_cluster_channel_avg_path)

# verify the cell_cluster_col provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -1138,13 +1068,7 @@ def add_consensus_labels_cell_table(base_dir, cell_table_path, cell_data_name):
cell_data_path = os.path.join(base_dir, cell_data_name)

# file path validation
if not os.path.exists(cell_table_path):
raise FileNotFoundError('Cell table file %s does not exist' %
cell_table_path)

if not os.path.exists(cell_data_path):
raise FileNotFoundError('Cell data file %s does not exist in base_dir %s' %
(cell_data_name, base_dir))
io_utils.validate_paths([cell_data_path, cell_data_path])

# read in the data, ensure sorted by FOV column just in case
cell_table = pd.read_csv(cell_table_path)
Expand Down
74 changes: 11 additions & 63 deletions ark/phenotyping/pixel_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,7 @@ def filter_with_nuclear_mask(fovs, tiff_dir, seg_dir, channel,
return

# raise an error if the provided seg_dir does not exist
if not os.path.exists(seg_dir):
raise FileNotFoundError('seg_dir %s does not exist' % seg_dir)
io_utils.validate_paths(seg_dir)

# convert to path-compatible format
if img_sub_folder is None:
Expand Down Expand Up @@ -603,17 +602,8 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
if subset_proportion <= 0 or subset_proportion > 1:
raise ValueError('Invalid subset percentage entered: must be in (0, 1]')

# if the base directory doesn't exist
if not os.path.exists(base_dir):
raise FileNotFoundError("base_dir %s does not exist" % base_dir)

# if the tiff dir doesn't exist
if not os.path.exists(tiff_dir):
raise FileNotFoundError("tiff_dir %s does not exist" % tiff_dir)

# if the pixel output dir doesn't exist
if not os.path.exists(os.path.join(base_dir, pixel_output_dir)):
raise FileNotFoundError("pixel_output_dir %s does not exist" % pixel_output_dir)
# path validation
io_utils.validate_paths([base_dir, tiff_dir, os.path.join(base_dir, pixel_output_dir)])

# create data_dir if it doesn't already exist
if not os.path.exists(os.path.join(base_dir, data_dir)):
Expand Down Expand Up @@ -794,9 +784,7 @@ def find_fovs_missing_col(base_dir, data_dir, missing_col):
temp_path = os.path.join(base_dir, data_dir + '_temp')

# verify the data path exists
if not os.path.exists(data_path):
raise FileNotFoundError('Data directory %s does not exist in base_dir %s' %
(data_dir, base_dir))
io_utils.validate_paths(data_path)

# if the temp path does not exist, either all the FOVs need to be run or none of them do
if not os.path.exists(temp_path):
Expand Down Expand Up @@ -883,9 +871,7 @@ def train_pixel_som(fovs, channels, base_dir,
return

# if path to the subsetted file does not exist
if not os.path.exists(subsetted_path):
raise FileNotFoundError('Pixel subsetted directory %s does not exist in base_dir %s' %
(subset_dir, base_dir))
io_utils.validate_paths(subsetted_path)

# verify that all provided fovs exist in the folder
files = io_utils.list_files(subsetted_path, substrs='.feather')
Expand Down Expand Up @@ -959,20 +945,8 @@ def cluster_pixels(fovs, channels, base_dir, data_dir='pixel_mat_data',
norm_vals_path = os.path.join(base_dir, norm_vals_name)
weights_path = os.path.join(base_dir, weights_name)

# if path to the preprocessed directory does not exist
if not os.path.exists(data_path):
raise FileNotFoundError('Pixel data directory %s does not exist in base_dir %s' %
(data_dir, base_dir))

# if path to the normalized values file does not exist
if not os.path.exists(norm_vals_path):
raise FileNotFoundError('Normalized values file %s does not exist in base_dir %s' %
(norm_vals_path, base_dir))

# if path to the weights file does not exist
if not os.path.exists(weights_path):
raise FileNotFoundError('Weights file %s does not exist in base_dir %s' %
(weights_name, base_dir))
# path validation
io_utils.validate_paths([data_path, norm_vals_path, weights_path])

# verify that all provided fovs exist in the folder
# NOTE: remove the channel and pixel normalization files as those are not pixel data
Expand Down Expand Up @@ -1126,19 +1100,8 @@ def pixel_consensus_cluster(fovs, channels, base_dir, max_k=20, cap=3,
som_cluster_avg_path = os.path.join(base_dir, pc_chan_avg_som_cluster_name)
clust_to_meta_path = os.path.join(base_dir, clust_to_meta_name)

# if the path to the SOM clustered data doesn't exist
if not os.path.exists(data_path):
raise FileNotFoundError(
'Data dir %s does not exist in base_dir %s' %
(data_dir, base_dir)
)

# if the path to the average channel expression per SOM cluster doesn't exist
if not os.path.exists(som_cluster_avg_path):
raise FileNotFoundError(
'Channel avg per SOM cluster file %s does not exist in base_dir %s' %
(pc_chan_avg_som_cluster_name, base_dir)
)
# path validation
io_utils.validate_paths([data_path, som_cluster_avg_path])

# if the path mapping SOM to meta clusters exists, don't re-run consensus clustering
if os.path.exists(clust_to_meta_path):
Expand Down Expand Up @@ -1318,23 +1281,8 @@ def apply_pixel_meta_cluster_remapping(fovs, channels, base_dir,
meta_cluster_avg_path = os.path.join(base_dir, pc_chan_avg_meta_cluster_name)

# file path validation
if not os.path.exists(pixel_data_path):
raise FileNotFoundError('Pixel data dir %s does not exist in base_dir %s' %
(pixel_data_dir, base_dir))

if not os.path.exists(pixel_remapped_path):
raise FileNotFoundError('Pixel remapping file %s does not exist in base_dir %s' %
(pixel_remapped_name, base_dir))

if not os.path.exists(som_cluster_avg_path):
raise FileNotFoundError(
'Channel average per SOM cluster file %s does not exist in base_dir %s' %
(pc_chan_avg_meta_cluster_name, base_dir))

if not os.path.exists(meta_cluster_avg_path):
raise FileNotFoundError(
'Channel average per meta cluster file %s does not exist in base_dir %s' %
(pc_chan_avg_meta_cluster_name, base_dir))
io_utils.validate_paths([pixel_data_path, pixel_remapped_path, som_cluster_avg_path,
meta_cluster_avg_path])

# read in the remapping
pixel_remapped_data = pd.read_csv(pixel_remapped_path)
Expand Down
6 changes: 3 additions & 3 deletions ark/segmentation/fiber_segmentation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_plot_fiber_segmentation_steps():
shutil.rmtree(os.path.join(temp_dir, 'image_data', fov))

# bad directory should raise an errors
with pytest.raises(ValueError):
with pytest.raises(FileNotFoundError):
_, _ = fiber_segmentation.plot_fiber_segmentation_steps('bad_dir', 'fov1', 'Collagen1')

# bad channel should raise an errors
Expand Down Expand Up @@ -46,10 +46,10 @@ def test_run_fiber_segmentation():
os.makedirs(out_dir)

# bad directories should raise an error
with pytest.raises(ValueError):
with pytest.raises(FileNotFoundError):
_ = fiber_segmentation.run_fiber_segmentation('bad_path', 'Collagen1', out_dir)

with pytest.raises(ValueError):
with pytest.raises(FileNotFoundError):
_ = fiber_segmentation.run_fiber_segmentation(img_dir, 'Collagen1', 'bad_path')

# bad subdirectory should raise an errors
Expand Down
24 changes: 5 additions & 19 deletions ark/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ def save_fov_mask(fov, data_dir, mask_data, sub_dir=None, name_suffix=''):
"""

# data_dir validation
if not os.path.exists(data_dir):
raise FileNotFoundError("data_dir %s does not exist" % data_dir)
io_utils.validate_paths(data_dir)

# ensure None is handled correctly in file path generation
if sub_dir is None:
Expand Down Expand Up @@ -161,12 +160,8 @@ def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
"""

# path checking
if not os.path.exists(seg_dir):
raise FileNotFoundError("seg_dir %s does not exist" % seg_dir)

if not os.path.exists(os.path.join(base_dir, cell_data_name)):
raise FileNotFoundError(
"Cell data file %s does not exist in base_dir %s" % (cell_data_name, base_dir))
cell_data_path = os.path.join(os.path.join(base_dir, cell_data_name))
io_utils.validate_paths([seg_dir, cell_data_path])

# verify the cluster_col provided is valid
verify_in_list(
Expand Down Expand Up @@ -277,17 +272,8 @@ def generate_pixel_cluster_mask(fov, base_dir, tiff_dir, chan_file_path,
"""

# path checking
if not os.path.exists(tiff_dir):
raise FileNotFoundError("tiff_dir %s does not exist")

if not os.path.exists(os.path.join(tiff_dir, chan_file_path)):
raise FileNotFoundError("chan_file_path %s does not exist in tiff_dir %s"
% (chan_file_path, tiff_dir))

if not os.path.exists(os.path.join(base_dir, pixel_data_dir)):
raise FileNotFoundError(
"Pixel data dir %s does not exist in base_dir %s" % (pixel_data_dir, base_dir)
)
io_utils.validate_paths([tiff_dir, os.path.join(tiff_dir, chan_file_path),
os.path.join(base_dir, pixel_data_dir)])

# verify the pixel_cluster_col provided is valid
verify_in_list(
Expand Down
2 changes: 1 addition & 1 deletion ark/utils/data_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ def test_stitch_images_by_shape(segmentation, clustering, subdir, fovs):
os.makedirs(data_dir)

# invalid directory is provided
with pytest.raises(ValueError):
with pytest.raises(FileNotFoundError):
data_utils.stitch_images_by_shape('not_a_dir', stitched_dir)

# no fov dirs should raise an error
Expand Down
6 changes: 3 additions & 3 deletions ark/utils/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,18 @@ def validate_paths(paths, data_prefix=False):
for path in paths:
# check data prefix
if data_prefix and not str(path).startswith('../data'):
raise ValueError(
raise FileNotFoundError(
f'The path, {path}, is not prefixed with \'../data\'.\n'
f'Be sure to add all images/files/data to the \'data\' folder, '
f'and to reference as \'../data/path_to_data/myfile.tif\'')

if not os.path.exists(path):
for parent in reversed(pathlib.Path(path).parents):
if not os.path.exists(parent):
raise ValueError(
raise FileNotFoundError(
f'A bad path, {path}, was provided.\n'
f'The folder, {parent.name}, could not be found...')
raise ValueError(
raise FileNotFoundError(
f'The file/path, {pathlib.Path(path).name}, could not be found...')


Expand Down
Loading

0 comments on commit dcc6338

Please sign in to comment.