Merge branch 'main' into docker_start_add

angelolab · Oct 7, 2022 · 2cc0be8 · 2cc0be8
2 parents d6c789d + d65584e
commit 2cc0be8
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 72 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -28,18 +28,35 @@ RUN apt-get install -y cmake
 # install base r requirements
 RUN apt-get install -y r-cran-data.table r-cran-doparallel r-cran-foreach r-cran-biocmanager r-cran-devtools
 
-# Install arrow from rspm
+# terminate Docker build if data.table, devtools, doParallel, or foreach fail to import
+RUN R -e "library(data.table)"
+RUN R -e "library(devtools)"
+RUN R -e "library(doParallel)"
+RUN R -e "library(foreach)"
+
+# install arrow from rspm
 RUN R -e "options(BioC_mirror = 'https://packagemanager.rstudio.com/all/__linux__/bullseye/latest', HTTPUserAgent = sprintf(\"R/%s R (%s)\", getRversion(), paste(getRversion(), R.version[\"platform\"], R.version[\"arch\"], R.version[\"os\"])))"
-RUN R -e "install.packages('arrow', repos = 'https://packagemanager.rstudio.com/all/__linux__/bullseye/latest')"
+RUN R -e "install.packages('arrow')"
+
+# terminate Docker build if arrow fails to import
+RUN R -e "library(arrow)"
 
-#install flowsom requirements
+# install flowsom requirements
 RUN apt-get install -y r-cran-igraph r-bioc-biocgenerics r-bioc-consensusclusterplus r-cran-dplyr r-cran-ggforce r-cran-ggplot2 r-cran-ggpubr r-cran-ggrepel r-cran-magrittr r-cran-pheatmap r-cran-rlang r-cran-rtsne r-cran-tidyr r-cran-xml r-cran-scattermore
-#install flowsom dependency requirements (eye-roll)
+
+# termiante Docker build if ConsensusClusterPlus fails to import
+RUN R -e "library(ConsensusClusterPlus)"
+
+# install flowsom dependency requirements (eye-roll)
 RUN apt-get install -y r-cran-rcppparallel r-bioc-biobase r-cran-matrixstats r-cran-png r-cran-jpeg r-cran-interp r-cran-mass r-bioc-graph r-bioc-rbgl r-cran-scales r-cran-digest r-cran-bh r-cran-rcpparmadillo r-cran-jsonlite r-cran-base64enc r-cran-plyr r-bioc-zlibbioc r-cran-hexbin r-cran-gridextra r-cran-yaml r-bioc-rhdf5lib r-cran-corpcor r-cran-runit r-cran-tibble r-cran-xml2 r-cran-tweenr r-cran-gtable r-cran-polyclip r-cran-tidyselect r-cran-withr r-cran-lifecycle r-cran-rcppeigen
 
-#RUN R -e "library(BiocManager); BiocManager::install('FlowSOM')"
+# install flowsom
+# RUN R -e "library(BiocManager); BiocManager::install('FlowSOM')"
 RUN R -e "library(devtools); devtools::install_github('angelolab/FlowSOM', upgrade = FALSE, upgrade_dependencies = FALSE)"
 
+# terminate Docker build if FlowSOM fails to import
+RUN R -e "library(FlowSOM)"
+
 # Install ark-analysis
 # copy over the requirements.txt, install dependencies, and README
 COPY setup.py pyproject.toml requirements.txt README.md start_jupyter.sh /opt/ark-analysis/
@@ -55,4 +72,4 @@ RUN cd /opt/ark-analysis && python -m pip install .
 WORKDIR /opt/ark-analysis
 
 # jupyter lab
-CMD bash start_jupyter.sh
+CMD bash start_jupyter.sh
diff --git a/ark/analysis/spatial_analysis.py b/ark/analysis/spatial_analysis.py
@@ -9,9 +9,9 @@
 from ark.utils import io_utils, load_utils, misc_utils, spatial_analysis_utils
 
 
-def batch_channel_spatial_enrichment(label_dir, marker_thresholds, all_data, batch_size=5,
-                                     suffix='_feature_0', xr_channel_name='segmentation_label',
-                                     **kwargs):
+def generate_channel_spatial_enrichment_stats(label_dir, marker_thresholds, all_data,
+                                              suffix='_feature_0',
+                                              xr_channel_name='segmentation_label', **kwargs):
     """Wrapper function for batching calls to `calculate_channel_spatial_enrichment` over fovs
 
     Args:
@@ -21,8 +21,6 @@ def batch_channel_spatial_enrichment(label_dir, marker_thresholds, all_data, bat
             threshold values for positive marker expression
         all_data (pandas.DataFrame):
             data including fovs, cell labels, and cell expression matrix for all markers
-        batch_size (int):
-            fov count to load into memory at a time
         suffix (str):
             suffix for tif file names
         xr_channel_name (str):
@@ -49,15 +47,12 @@ def batch_channel_spatial_enrichment(label_dir, marker_thresholds, all_data, bat
         all_label_names = \
             [all_label_names[i] for i, fov in enumerate(label_fovs) if fov in included_fovs]
 
-    batching_strategy = \
-        [all_label_names[i:i + batch_size] for i in range(0, len(all_label_names), batch_size)]
-
     # create containers for batched return values
     values = []
     stats_datasets = []
 
-    for batch_names in tqdm(batching_strategy, desc="Batch Completion", unit="batch"):
-        label_maps = load_utils.load_imgs_from_dir(label_dir, files=batch_names,
+    for label_name in tqdm(all_label_names, desc="Batch Completion", unit="batch"):
+        label_maps = load_utils.load_imgs_from_dir(label_dir, files=[label_name],
                                                    xr_channel_names=[xr_channel_name],
                                                    trim_suffix=suffix)
 
@@ -232,17 +227,15 @@ def calculate_channel_spatial_enrichment(dist_matrices_dict, marker_thresholds,
     return values, stats
 
 
-def batch_cluster_spatial_enrichment(label_dir, all_data, batch_size=5, suffix='_feature_0',
-                                     xr_channel_name='segmentation_label', **kwargs):
+def generate_cluster_spatial_enrichment_stats(label_dir, all_data, suffix='_feature_0',
+                                              xr_channel_name='segmentation_label', **kwargs):
     """ Wrapper function for batching calls to `calculate_cluster_spatial_enrichment` over fovs
 
     Args:
         label_dir (str | Pathlike):
             directory containing labeled tiffs
         all_data (pandas.DataFrame):
             data including fovs, cell labels, and cell expression matrix for all markers
-        batch_size (int):
-            fov count to load into memory at a time
         suffix (str):
             suffix for tif file names
         xr_channel_name (str):
@@ -269,15 +262,12 @@ def batch_cluster_spatial_enrichment(label_dir, all_data, batch_size=5, suffix='
         all_label_names = \
             [all_label_names[i] for i, fov in enumerate(label_fovs) if fov in included_fovs]
 
-    batching_strategy = \
-        [all_label_names[i:i + batch_size] for i in range(0, len(all_label_names), batch_size)]
-
     # create containers for batched return values
     values = []
     stats_datasets = []
 
-    for batch_names in tqdm(batching_strategy, desc="Batch Completion", unit="batch"):
-        label_maps = load_utils.load_imgs_from_dir(label_dir, files=batch_names,
+    for label_name in tqdm(all_label_names, desc="Batch Completion", unit="batch"):
+        label_maps = load_utils.load_imgs_from_dir(label_dir, files=[label_name],
                                                    xr_channel_names=[xr_channel_name],
                                                    trim_suffix=suffix)
 

diff --git a/ark/analysis/spatial_analysis_test.py b/ark/analysis/spatial_analysis_test.py
@@ -33,10 +33,9 @@
 ))
 
 
-def test_batch_channel_spatial_enrichment():
-
-    # since the functionality if channel spatial enrichment is tested later,
-    # only the batching needs to be tested
+def test_generate_channel_spatial_enrichment_stats():
+    # since the functionality of channel spatial enrichment is tested later,
+    # only the number of elements returned and the included_fovs argument needs testing
     marker_thresholds = test_utils._make_threshold_mat(in_utils=False)
 
     with tempfile.TemporaryDirectory() as label_dir:
@@ -52,42 +51,31 @@ def test_batch_channel_spatial_enrichment():
         all_data = test_utils.spoof_cell_table_from_labels(label_maps)
 
         vals_pos, stats_pos = \
-            spatial_analysis.calculate_channel_spatial_enrichment(
-                dist_mats, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
-                bootstrap_num=100, dist_lim=100)
-
-        vals_pos_batch, stats_pos_batch = \
-            spatial_analysis.batch_channel_spatial_enrichment(
+            spatial_analysis.generate_channel_spatial_enrichment_stats(
                 label_dir, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
-                bootstrap_num=100, dist_lim=100, batch_size=5)
-
-        vals_pos_batch_2, stats_pos_batch_2 = \
-            spatial_analysis.batch_channel_spatial_enrichment(
-                label_dir, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
-                bootstrap_num=100, dist_lim=100, batch_size=1
+                bootstrap_num=100, dist_lim=100
             )
 
-        np.testing.assert_equal(vals_pos[0][0], vals_pos_batch[0][0])
-        np.testing.assert_equal(vals_pos[1][0], vals_pos_batch[1][0])
-
-        # batch function should match for multi batch process
-        np.testing.assert_equal(vals_pos[0][0], vals_pos_batch_2[0][0])
-        np.testing.assert_equal(vals_pos[1][0], vals_pos_batch_2[1][0])
+        # both fov8 and fov9 should be returned
+        assert len(vals_pos) == 2
 
         vals_pos_fov8, stats_pos_fov8 = \
-            spatial_analysis.batch_channel_spatial_enrichment(
+            spatial_analysis.generate_channel_spatial_enrichment_stats(
                 label_dir, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
-                bootstrap_num=100, dist_lim=100, batch_size=5, included_fovs=["fov8"]
+                bootstrap_num=100, dist_lim=100, included_fovs=["fov8"]
             )
 
+        # the fov8 values in vals_pos_fov8 should be the same as in vals_pos
         np.testing.assert_equal(vals_pos_fov8[0][0], vals_pos[0][0])
+
+        # only fov8 should be returned
         assert len(vals_pos_fov8) == 1
 
 
-def test_batch_cluster_spatial_enrichment():
+def test_generate_cluster_spatial_enrichment_stats():
 
     # since the functionality if channel spatial enrichment is tested later,
-    # only the batching needs to be tested
+    # only the number of elements returned and the included_fovs argument needs testing
     with tempfile.TemporaryDirectory() as label_dir:
         test_utils._write_labels(label_dir, ["fov8", "fov9"], ["segmentation_label"], (10, 10),
                                  '', True, np.uint8, suffix='_feature_0')
@@ -100,30 +88,22 @@ def test_batch_cluster_spatial_enrichment():
         all_data = test_utils.spoof_cell_table_from_labels(label_maps)
 
         vals_pos, stats_pos = \
-            spatial_analysis.calculate_cluster_spatial_enrichment(
-                all_data, dist_mats, bootstrap_num=100, dist_lim=100)
-
-        vals_pos_batch, stats_pos_batch = \
-            spatial_analysis.batch_cluster_spatial_enrichment(
-                label_dir, all_data, bootstrap_num=100, dist_lim=100, batch_size=5)
-
-        vals_pos_batch_2, stats_pos_batch_2 = \
-            spatial_analysis.batch_cluster_spatial_enrichment(
-                label_dir, all_data, bootstrap_num=100, dist_lim=100, batch_size=1)
-
-        np.testing.assert_equal(vals_pos[0][0], vals_pos_batch[0][0])
-        np.testing.assert_equal(vals_pos[1][0], vals_pos_batch[1][0])
+            spatial_analysis.generate_cluster_spatial_enrichment_stats(
+                label_dir, all_data, bootstrap_num=100, dist_lim=100
+            )
 
-        # batch function should match for multi batch process
-        np.testing.assert_equal(vals_pos[0][0], vals_pos_batch_2[0][0])
-        np.testing.assert_equal(vals_pos[1][0], vals_pos_batch_2[1][0])
+        # both fov8 and fov9 should be returned
+        assert len(vals_pos) == 2
 
         vals_pos_fov8, stats_pos_fov8 = \
-            spatial_analysis.batch_cluster_spatial_enrichment(
-                label_dir, all_data, bootstrap_num=100, dist_lim=100, batch_size=5,
-                included_fovs=["fov8"])
+            spatial_analysis.generate_cluster_spatial_enrichment_stats(
+                label_dir, all_data, bootstrap_num=100, dist_lim=100, included_fovs=["fov8"]
+            )
 
+        # the fov8 values in vals_pos_fov8 should be the same as in vals_pos
         np.testing.assert_equal(vals_pos_fov8[0][0], vals_pos[0][0])
+
+        # only fov8 should be returned
         assert len(vals_pos_fov8) == 1
 
 

diff --git a/templates/example_pairwise_spatial_enrichment.ipynb b/templates/example_pairwise_spatial_enrichment.ipynb
@@ -177,7 +177,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "values_channel, stats_channel = spatial_analysis.batch_channel_spatial_enrichment(\n",
+    "values_channel, stats_channel = spatial_analysis.generate_channel_spatial_enrichment_stats(\n",
     "    deepcell_output, marker_thresholds, all_data, excluded_channels=excluded_channels,\n",
     "    bootstrap_num=5)"
    ]
@@ -212,7 +212,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "values_cluster, stats_cluster = spatial_analysis.batch_cluster_spatial_enrichment(\n",
+    "values_cluster, stats_cluster = spatial_analysis.generate_cluster_spatial_enrichment_stats(\n",
     "    deepcell_output, all_data, bootstrap_num=5)"
    ]
   },