Skip to content

Commit

Permalink
Merge branch 'main' into docker_start_add
Browse files Browse the repository at this point in the history
  • Loading branch information
ngreenwald authored Oct 7, 2022
2 parents d6c789d + d65584e commit 2cc0be8
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 72 deletions.
29 changes: 23 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,35 @@ RUN apt-get install -y cmake
# install base r requirements
RUN apt-get install -y r-cran-data.table r-cran-doparallel r-cran-foreach r-cran-biocmanager r-cran-devtools

# Install arrow from rspm
# terminate Docker build if data.table, devtools, doParallel, or foreach fail to import
RUN R -e "library(data.table)"
RUN R -e "library(devtools)"
RUN R -e "library(doParallel)"
RUN R -e "library(foreach)"

# install arrow from rspm
RUN R -e "options(BioC_mirror = 'https://packagemanager.rstudio.com/all/__linux__/bullseye/latest', HTTPUserAgent = sprintf(\"R/%s R (%s)\", getRversion(), paste(getRversion(), R.version[\"platform\"], R.version[\"arch\"], R.version[\"os\"])))"
RUN R -e "install.packages('arrow', repos = 'https://packagemanager.rstudio.com/all/__linux__/bullseye/latest')"
RUN R -e "install.packages('arrow')"

# terminate Docker build if arrow fails to import
RUN R -e "library(arrow)"

#install flowsom requirements
# install flowsom requirements
RUN apt-get install -y r-cran-igraph r-bioc-biocgenerics r-bioc-consensusclusterplus r-cran-dplyr r-cran-ggforce r-cran-ggplot2 r-cran-ggpubr r-cran-ggrepel r-cran-magrittr r-cran-pheatmap r-cran-rlang r-cran-rtsne r-cran-tidyr r-cran-xml r-cran-scattermore
#install flowsom dependency requirements (eye-roll)

# termiante Docker build if ConsensusClusterPlus fails to import
RUN R -e "library(ConsensusClusterPlus)"

# install flowsom dependency requirements (eye-roll)
RUN apt-get install -y r-cran-rcppparallel r-bioc-biobase r-cran-matrixstats r-cran-png r-cran-jpeg r-cran-interp r-cran-mass r-bioc-graph r-bioc-rbgl r-cran-scales r-cran-digest r-cran-bh r-cran-rcpparmadillo r-cran-jsonlite r-cran-base64enc r-cran-plyr r-bioc-zlibbioc r-cran-hexbin r-cran-gridextra r-cran-yaml r-bioc-rhdf5lib r-cran-corpcor r-cran-runit r-cran-tibble r-cran-xml2 r-cran-tweenr r-cran-gtable r-cran-polyclip r-cran-tidyselect r-cran-withr r-cran-lifecycle r-cran-rcppeigen

#RUN R -e "library(BiocManager); BiocManager::install('FlowSOM')"
# install flowsom
# RUN R -e "library(BiocManager); BiocManager::install('FlowSOM')"
RUN R -e "library(devtools); devtools::install_github('angelolab/FlowSOM', upgrade = FALSE, upgrade_dependencies = FALSE)"

# terminate Docker build if FlowSOM fails to import
RUN R -e "library(FlowSOM)"

# Install ark-analysis
# copy over the requirements.txt, install dependencies, and README
COPY setup.py pyproject.toml requirements.txt README.md start_jupyter.sh /opt/ark-analysis/
Expand All @@ -55,4 +72,4 @@ RUN cd /opt/ark-analysis && python -m pip install .
WORKDIR /opt/ark-analysis

# jupyter lab
CMD bash start_jupyter.sh
CMD bash start_jupyter.sh
28 changes: 9 additions & 19 deletions ark/analysis/spatial_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from ark.utils import io_utils, load_utils, misc_utils, spatial_analysis_utils


def batch_channel_spatial_enrichment(label_dir, marker_thresholds, all_data, batch_size=5,
suffix='_feature_0', xr_channel_name='segmentation_label',
**kwargs):
def generate_channel_spatial_enrichment_stats(label_dir, marker_thresholds, all_data,
suffix='_feature_0',
xr_channel_name='segmentation_label', **kwargs):
"""Wrapper function for batching calls to `calculate_channel_spatial_enrichment` over fovs
Args:
Expand All @@ -21,8 +21,6 @@ def batch_channel_spatial_enrichment(label_dir, marker_thresholds, all_data, bat
threshold values for positive marker expression
all_data (pandas.DataFrame):
data including fovs, cell labels, and cell expression matrix for all markers
batch_size (int):
fov count to load into memory at a time
suffix (str):
suffix for tif file names
xr_channel_name (str):
Expand All @@ -49,15 +47,12 @@ def batch_channel_spatial_enrichment(label_dir, marker_thresholds, all_data, bat
all_label_names = \
[all_label_names[i] for i, fov in enumerate(label_fovs) if fov in included_fovs]

batching_strategy = \
[all_label_names[i:i + batch_size] for i in range(0, len(all_label_names), batch_size)]

# create containers for batched return values
values = []
stats_datasets = []

for batch_names in tqdm(batching_strategy, desc="Batch Completion", unit="batch"):
label_maps = load_utils.load_imgs_from_dir(label_dir, files=batch_names,
for label_name in tqdm(all_label_names, desc="Batch Completion", unit="batch"):
label_maps = load_utils.load_imgs_from_dir(label_dir, files=[label_name],
xr_channel_names=[xr_channel_name],
trim_suffix=suffix)

Expand Down Expand Up @@ -232,17 +227,15 @@ def calculate_channel_spatial_enrichment(dist_matrices_dict, marker_thresholds,
return values, stats


def batch_cluster_spatial_enrichment(label_dir, all_data, batch_size=5, suffix='_feature_0',
xr_channel_name='segmentation_label', **kwargs):
def generate_cluster_spatial_enrichment_stats(label_dir, all_data, suffix='_feature_0',
xr_channel_name='segmentation_label', **kwargs):
""" Wrapper function for batching calls to `calculate_cluster_spatial_enrichment` over fovs
Args:
label_dir (str | Pathlike):
directory containing labeled tiffs
all_data (pandas.DataFrame):
data including fovs, cell labels, and cell expression matrix for all markers
batch_size (int):
fov count to load into memory at a time
suffix (str):
suffix for tif file names
xr_channel_name (str):
Expand All @@ -269,15 +262,12 @@ def batch_cluster_spatial_enrichment(label_dir, all_data, batch_size=5, suffix='
all_label_names = \
[all_label_names[i] for i, fov in enumerate(label_fovs) if fov in included_fovs]

batching_strategy = \
[all_label_names[i:i + batch_size] for i in range(0, len(all_label_names), batch_size)]

# create containers for batched return values
values = []
stats_datasets = []

for batch_names in tqdm(batching_strategy, desc="Batch Completion", unit="batch"):
label_maps = load_utils.load_imgs_from_dir(label_dir, files=batch_names,
for label_name in tqdm(all_label_names, desc="Batch Completion", unit="batch"):
label_maps = load_utils.load_imgs_from_dir(label_dir, files=[label_name],
xr_channel_names=[xr_channel_name],
trim_suffix=suffix)

Expand Down
70 changes: 25 additions & 45 deletions ark/analysis/spatial_analysis_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@
))


def test_batch_channel_spatial_enrichment():

# since the functionality if channel spatial enrichment is tested later,
# only the batching needs to be tested
def test_generate_channel_spatial_enrichment_stats():
# since the functionality of channel spatial enrichment is tested later,
# only the number of elements returned and the included_fovs argument needs testing
marker_thresholds = test_utils._make_threshold_mat(in_utils=False)

with tempfile.TemporaryDirectory() as label_dir:
Expand All @@ -52,42 +51,31 @@ def test_batch_channel_spatial_enrichment():
all_data = test_utils.spoof_cell_table_from_labels(label_maps)

vals_pos, stats_pos = \
spatial_analysis.calculate_channel_spatial_enrichment(
dist_mats, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
bootstrap_num=100, dist_lim=100)

vals_pos_batch, stats_pos_batch = \
spatial_analysis.batch_channel_spatial_enrichment(
spatial_analysis.generate_channel_spatial_enrichment_stats(
label_dir, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
bootstrap_num=100, dist_lim=100, batch_size=5)

vals_pos_batch_2, stats_pos_batch_2 = \
spatial_analysis.batch_channel_spatial_enrichment(
label_dir, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
bootstrap_num=100, dist_lim=100, batch_size=1
bootstrap_num=100, dist_lim=100
)

np.testing.assert_equal(vals_pos[0][0], vals_pos_batch[0][0])
np.testing.assert_equal(vals_pos[1][0], vals_pos_batch[1][0])

# batch function should match for multi batch process
np.testing.assert_equal(vals_pos[0][0], vals_pos_batch_2[0][0])
np.testing.assert_equal(vals_pos[1][0], vals_pos_batch_2[1][0])
# both fov8 and fov9 should be returned
assert len(vals_pos) == 2

vals_pos_fov8, stats_pos_fov8 = \
spatial_analysis.batch_channel_spatial_enrichment(
spatial_analysis.generate_channel_spatial_enrichment_stats(
label_dir, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
bootstrap_num=100, dist_lim=100, batch_size=5, included_fovs=["fov8"]
bootstrap_num=100, dist_lim=100, included_fovs=["fov8"]
)

# the fov8 values in vals_pos_fov8 should be the same as in vals_pos
np.testing.assert_equal(vals_pos_fov8[0][0], vals_pos[0][0])

# only fov8 should be returned
assert len(vals_pos_fov8) == 1


def test_batch_cluster_spatial_enrichment():
def test_generate_cluster_spatial_enrichment_stats():

# since the functionality if channel spatial enrichment is tested later,
# only the batching needs to be tested
# only the number of elements returned and the included_fovs argument needs testing
with tempfile.TemporaryDirectory() as label_dir:
test_utils._write_labels(label_dir, ["fov8", "fov9"], ["segmentation_label"], (10, 10),
'', True, np.uint8, suffix='_feature_0')
Expand All @@ -100,30 +88,22 @@ def test_batch_cluster_spatial_enrichment():
all_data = test_utils.spoof_cell_table_from_labels(label_maps)

vals_pos, stats_pos = \
spatial_analysis.calculate_cluster_spatial_enrichment(
all_data, dist_mats, bootstrap_num=100, dist_lim=100)

vals_pos_batch, stats_pos_batch = \
spatial_analysis.batch_cluster_spatial_enrichment(
label_dir, all_data, bootstrap_num=100, dist_lim=100, batch_size=5)

vals_pos_batch_2, stats_pos_batch_2 = \
spatial_analysis.batch_cluster_spatial_enrichment(
label_dir, all_data, bootstrap_num=100, dist_lim=100, batch_size=1)

np.testing.assert_equal(vals_pos[0][0], vals_pos_batch[0][0])
np.testing.assert_equal(vals_pos[1][0], vals_pos_batch[1][0])
spatial_analysis.generate_cluster_spatial_enrichment_stats(
label_dir, all_data, bootstrap_num=100, dist_lim=100
)

# batch function should match for multi batch process
np.testing.assert_equal(vals_pos[0][0], vals_pos_batch_2[0][0])
np.testing.assert_equal(vals_pos[1][0], vals_pos_batch_2[1][0])
# both fov8 and fov9 should be returned
assert len(vals_pos) == 2

vals_pos_fov8, stats_pos_fov8 = \
spatial_analysis.batch_cluster_spatial_enrichment(
label_dir, all_data, bootstrap_num=100, dist_lim=100, batch_size=5,
included_fovs=["fov8"])
spatial_analysis.generate_cluster_spatial_enrichment_stats(
label_dir, all_data, bootstrap_num=100, dist_lim=100, included_fovs=["fov8"]
)

# the fov8 values in vals_pos_fov8 should be the same as in vals_pos
np.testing.assert_equal(vals_pos_fov8[0][0], vals_pos[0][0])

# only fov8 should be returned
assert len(vals_pos_fov8) == 1


Expand Down
4 changes: 2 additions & 2 deletions templates/example_pairwise_spatial_enrichment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
"metadata": {},
"outputs": [],
"source": [
"values_channel, stats_channel = spatial_analysis.batch_channel_spatial_enrichment(\n",
"values_channel, stats_channel = spatial_analysis.generate_channel_spatial_enrichment_stats(\n",
" deepcell_output, marker_thresholds, all_data, excluded_channels=excluded_channels,\n",
" bootstrap_num=5)"
]
Expand Down Expand Up @@ -212,7 +212,7 @@
"metadata": {},
"outputs": [],
"source": [
"values_cluster, stats_cluster = spatial_analysis.batch_cluster_spatial_enrichment(\n",
"values_cluster, stats_cluster = spatial_analysis.generate_cluster_spatial_enrichment_stats(\n",
" deepcell_output, all_data, bootstrap_num=5)"
]
},
Expand Down

0 comments on commit 2cc0be8

Please sign in to comment.