Merge 367b5cb into d454d09

angelolab · Dec 7, 2022 · 8fc78fc · 8fc78fc
2 parents d454d09 + 367b5cb
commit 8fc78fc
Show file tree

Hide file tree

Showing 11 changed files with 52 additions and 42 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -9,13 +9,11 @@ exclude_lines =
     raise NotImplementedError
     (ArrowInvalid, OSError, IOError)
 
-
 ignore_errors = True
 fail_under = 45
 
 # show which lines are missing
 show_missing = False
 
 omit = 
-    segmentation/scratch.py
     **/*test*.py
diff --git a/ark/analysis/spatial_analysis.py b/ark/analysis/spatial_analysis.py
@@ -41,6 +41,9 @@ def generate_channel_spatial_enrichment_stats(label_dir, dist_mat_dir, marker_th
           cluster_names
     """
 
+    # Validate paths
+    io_utils.validate_paths([label_dir, dist_mat_dir])
+
     # parse files in label_dir
     all_label_names = io_utils.list_files(label_dir, substrs=[suffix + '.tiff'])
 

diff --git a/ark/phenotyping/cell_cluster_utils.py b/ark/phenotyping/cell_cluster_utils.py
@@ -9,8 +9,8 @@
 import scipy.stats as stats
 
 from ark.analysis import visualize
+from ark.utils import io_utils, misc_utils
 from ark.phenotyping import cluster_helpers
-from ark.utils import misc_utils, io_utils
 
 
 def compute_cell_cluster_count_avg(cell_cluster_path, pixel_cluster_col_prefix,
@@ -35,15 +35,18 @@ def compute_cell_cluster_count_avg(cell_cluster_path, pixel_cluster_col_prefix,
             Contains the average values for each column across cell SOM clusters
     """
 
+    # Validate paths
+    io_utils.validate_paths(cell_cluster_path)
+
     # verify the pixel cluster col prefix specified is valid
     misc_utils.verify_in_list(
-        provided_cluster_col=[pixel_cluster_col_prefix],
+        provided_cluster_col=pixel_cluster_col_prefix,
         valid_cluster_cols=['pixel_som_cluster', 'pixel_meta_cluster_rename']
     )
 
     # verify the cell cluster col prefix specified is valid
     misc_utils.verify_in_list(
-        provided_cluster_col=[cell_cluster_col],
+        provided_cluster_col=cell_cluster_col,
         valid_cluster_cols=['cell_som_cluster', 'cell_meta_cluster']
     )
 
@@ -95,9 +98,10 @@ def compute_cell_cluster_channel_avg(fovs, channels, base_dir,
         pandas.DataFrame:
             Each cell cluster mapped to the average expression for each marker
     """
+    weighted_cell_channel_name_path: str = os.path.join(base_dir, weighted_cell_channel_name)
 
     # verify the cell table actually exists
-    io_utils.validate_paths(os.path.join(base_dir, weighted_cell_channel_name))
+    io_utils.validate_paths(weighted_cell_channel_name_path)
 
     # verify the cell cluster col specified is valid
     misc_utils.verify_in_list(
@@ -106,7 +110,7 @@ def compute_cell_cluster_channel_avg(fovs, channels, base_dir,
     )
 
     # read the weighted cell channel table in
-    cell_table = pd.read_csv(os.path.join(base_dir, weighted_cell_channel_name))
+    cell_table = pd.read_csv(weighted_cell_channel_name_path)
 
     # subset on only the fovs the user has specified
     cell_table = cell_table[cell_table['fov'].isin(fovs)]
@@ -182,16 +186,16 @@ def compute_p2c_weighted_channel_avg(pixel_channel_avg, channels, cell_counts,
     # if no fovs provided make sure they're all iterated over
     if fovs is None:
         fovs = list(cell_counts['fov'].unique())
-
-    # verify that the fovs provided are valid
-    misc_utils.verify_in_list(
-        provided_fovs=fovs,
-        dataset_fovs=cell_counts['fov'].unique()
-    )
+    else:
+        # verify that the fovs provided are valid
+        misc_utils.verify_in_list(
+            provided_fovs=fovs,
+            dataset_fovs=cell_counts['fov'].unique()
+        )
 
     # verify the pixel_cluster_col provided is valid
     misc_utils.verify_in_list(
-        provided_cluster_col=[pixel_cluster_col],
+        provided_cluster_col=pixel_cluster_col,
         valid_cluster_cols=['pixel_som_cluster', 'pixel_meta_cluster_rename']
     )
 
@@ -211,7 +215,7 @@ def compute_p2c_weighted_channel_avg(pixel_channel_avg, channels, cell_counts,
     # sort the pixel_channel_avg table by pixel_cluster_col in ascending cluster order
     # NOTE: to handle numeric cluster names types, we need to cast the pixel_cluster_col values
     # to str to ensure the same sorting is used
-    if pixel_channel_avg[pixel_cluster_col].dtype == int:
+    if np.issubdtype(pixel_channel_avg[pixel_cluster_col].dtype, np.integer):
         pixel_channel_avg[pixel_cluster_col] = pixel_channel_avg[pixel_cluster_col].astype(str)
 
     pixel_channel_avg_sorted = pixel_channel_avg.sort_values(by=pixel_cluster_col)
@@ -448,7 +452,7 @@ def train_cell_som(fovs, channels, base_dir, pixel_data_dir, cell_table_path,
 
     # verify the cluster_col provided is valid
     misc_utils.verify_in_list(
-        provided_cluster_col=[pixel_cluster_col],
+        provided_cluster_col=pixel_cluster_col,
         valid_cluster_cols=['pixel_som_cluster', 'pixel_meta_cluster_rename']
     )
 

diff --git a/ark/phenotyping/cell_cluster_utils_test.py b/ark/phenotyping/cell_cluster_utils_test.py
@@ -78,15 +78,15 @@ def test_compute_cell_cluster_count_avg():
 
     with tempfile.TemporaryDirectory() as temp_dir:
         # error check: bad pixel_cluster_col_prefix specified
-        with pytest.raises(ValueError):
+        with pytest.raises(FileNotFoundError):
             cell_cluster_utils.compute_cell_cluster_count_avg(
                 'clustered_path', 'bad_cluster_col_prefix', 'cell_cluster_col', False
             )
 
         # error check: bad cell_cluster_col specified
         with pytest.raises(ValueError):
             cell_cluster_utils.compute_cell_cluster_count_avg(
-                'clustered_path', 'pixel_meta_cluster', 'bad_cluster_col', False
+                temp_dir, 'pixel_meta_cluster', 'bad_cluster_col', False
             )
 
         cluster_col_arr = [pixel_som_clusters, pixel_meta_clusters]

diff --git a/ark/phenotyping/post_cluster_utils.py b/ark/phenotyping/post_cluster_utils.py
@@ -1,4 +1,5 @@
 import os
+from typing import List
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -22,9 +23,8 @@ def plot_hist_thresholds(cell_table, populations, marker, pop_col='cell_meta_clu
     """
     all_populations = cell_table[pop_col].unique()
 
-    # input validation
-    if type(populations) != list:
-        raise ValueError("populations argument must be a list of populations to plot")
+    # Make populations a list if it is a string
+    populations: List[str] = misc_utils.make_iterable(populations, ignore_str=True)
 
     # check that provided populations are present in dataframe
     for pop in populations:
@@ -40,14 +40,15 @@ def plot_hist_thresholds(cell_table, populations, marker, pop_col='cell_meta_clu
 
     # plot each pop histogram
     pop_num = len(populations)
-    fig, ax = plt.subplots(pop_num, 1, figsize=[6.4, 2.2 * pop_num])
-    for i in range(pop_num):
-        plot_vals = cell_table.loc[cell_table[pop_col] == populations[i], marker].values
-        ax[i].hist(plot_vals, 50, density=True, facecolor='g', alpha=0.75, range=(0, x_max))
-        ax[i].set_title("Distribution of {} in {}".format(marker, populations[i]))
-
-        if threshold is not None:
-            ax[i].axvline(x=threshold)
+    fig, axes = plt.subplots(pop_num, 1, figsize=[6.4, 2.2 * pop_num], squeeze=False)
+    for ax, pop in zip(axes.flat, populations):
+        plot_vals = cell_table.loc[cell_table[pop_col] == pop, marker].values
+        ax.hist(plot_vals, 50, density=True, facecolor='g', alpha=0.75, range=(0, x_max))
+        ax.set_title("Distribution of {} in {}".format(marker, pop))
+
+        if threshold:
+            ax.axvline(x=threshold)
+
     plt.tight_layout()
 
 

diff --git a/ark/phenotyping/post_cluster_utils_test.py b/ark/phenotyping/post_cluster_utils_test.py
@@ -15,10 +15,10 @@ def test_plot_hist_thresholds():
 
     cell_table = pd.DataFrame({'cell_meta_cluster': pops, 'marker_1': marker_1})
 
-    # populations argument must be a list
-    with pytest.raises(ValueError, match='must be a list'):
-        post_cluster_utils.plot_hist_thresholds(cell_table=cell_table, populations='pop1',
-                                                marker='marker_1')
+    # populations argument must be a list, but`make_iterable` should convert a `str`
+    # argument to `List[str]`
+    post_cluster_utils.plot_hist_thresholds(cell_table=cell_table, populations='pop1',
+                                            marker='marker_1')
 
     # populations argument must contain entries from cell_table
     with pytest.raises(ValueError, match='Invalid population'):

diff --git a/ark/utils/example_dataset.py b/ark/utils/example_dataset.py
@@ -5,6 +5,8 @@
 
 import datasets
 
+from ark.utils.misc_utils import verify_in_list
+
 
 class ExampleDataset():
     def __init__(self, dataset: str, overwrite_existing: bool = True, cache_dir: str = None,
@@ -163,10 +165,11 @@ def get_example_dataset(dataset: str, save_dir: Union[str, pathlib.Path],
                       "pairwise_spatial_enrichment"]
 
     # Check the appropriate dataset name
-    if dataset not in valid_datasets:
+    try:
+        verify_in_list(dataset=dataset, valid_datasets=valid_datasets)
+    except ValueError:
         ValueError(f"The dataset <{dataset}> is not one of the valid datasets available. \
                     The following are available: { {*valid_datasets} }")
-
     example_dataset = ExampleDataset(dataset=dataset, overwrite_existing=overwrite_existing,
                                      cache_dir=None,
                                      revision="main")

diff --git a/ark/utils/io_utils.py b/ark/utils/io_utils.py
@@ -6,9 +6,11 @@
 
 from ark.settings import EXTENSION_TYPES
 
+from ark.utils import misc_utils
+
 
 def validate_paths(paths, data_prefix=False):
-    """Verifys that paths exist and don't leave Docker's scope
+    """Verifies that paths exist and don't leave Docker's scope
 
     Args:
         paths (str or list):
@@ -22,8 +24,7 @@ def validate_paths(paths, data_prefix=False):
     """
 
     # if given a single path, convert to list
-    if not isinstance(paths, list):
-        paths = [paths]
+    paths = misc_utils.make_iterable(paths, ignore_str=True)
 
     for path in paths:
         # check data prefix

diff --git a/ark/utils/metacluster_remap_gui/file_reader_test.py b/ark/utils/metacluster_remap_gui/file_reader_test.py
@@ -29,7 +29,7 @@ def test_can_read_csvs_prefix_trim(simple_full_cluster_data):
 
 def test_requires_valid_path(simple_full_cluster_data):
     with tempfile.TemporaryDirectory() as temp_dir:
-        simple_full_cluster_data.to_csv('sample.csv', index=False)
+        simple_full_cluster_data.to_csv(temp_dir + '/sample.csv', index=False)
 
         with pytest.raises(FileNotFoundError):
             metaclusterdata_from_files(os.path.join(temp_dir, 'bad_sample.csv'))

diff --git a/ark/utils/plot_utils.py b/ark/utils/plot_utils.py
@@ -263,13 +263,13 @@ def create_overlay(fov, segmentation_dir, data_dir,
         fov (str):
             The name of the fov to overlay
         segmentation_dir (str):
-            The path to the directory containing the segmentatation data
+            The path to the directory containing the segmentation data
         data_dir (str):
             The path to the directory containing the nuclear and whole cell image data
         img_overlay_chans (list):
             List of channels the user will overlay
         seg_overlay_comp (str):
-            The segmentted compartment the user will overlay
+            The segmented compartment the user will overlay
         alternate_segmentation (numpy.ndarray):
             2D numpy array of labeled cell objects
     Returns:

diff --git a/templates/4_Post_Clustering.ipynb b/templates/4_Post_Clustering.ipynb
@@ -495,7 +495,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.15 (default, Nov 11 2022, 16:55:28) \n[Clang 14.0.0 (clang-1400.0.29.202)]"
+   "version": "3.8.15"
   },
   "vscode": {
    "interpreter": {