Skip to content

Commit

Permalink
Merge branch 'main' into optimize_df_merging
Browse files Browse the repository at this point in the history
  • Loading branch information
camisowers committed Oct 10, 2022
2 parents 69e15c6 + 7b64d1f commit 9d96d6c
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 82 deletions.
29 changes: 23 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,35 @@ RUN apt-get install -y cmake
# install base r requirements
RUN apt-get install -y r-cran-data.table r-cran-doparallel r-cran-foreach r-cran-biocmanager r-cran-devtools

# Install arrow from rspm
# terminate Docker build if data.table, devtools, doParallel, or foreach fail to import
RUN R -e "library(data.table)"
RUN R -e "library(devtools)"
RUN R -e "library(doParallel)"
RUN R -e "library(foreach)"

# install arrow from rspm
RUN R -e "options(BioC_mirror = 'https://packagemanager.rstudio.com/all/__linux__/bullseye/latest', HTTPUserAgent = sprintf(\"R/%s R (%s)\", getRversion(), paste(getRversion(), R.version[\"platform\"], R.version[\"arch\"], R.version[\"os\"])))"
RUN R -e "install.packages('arrow', repos = 'https://packagemanager.rstudio.com/all/__linux__/bullseye/latest')"
RUN R -e "install.packages('arrow')"

# terminate Docker build if arrow fails to import
RUN R -e "library(arrow)"

#install flowsom requirements
# install flowsom requirements
RUN apt-get install -y r-cran-igraph r-bioc-biocgenerics r-bioc-consensusclusterplus r-cran-dplyr r-cran-ggforce r-cran-ggplot2 r-cran-ggpubr r-cran-ggrepel r-cran-magrittr r-cran-pheatmap r-cran-rlang r-cran-rtsne r-cran-tidyr r-cran-xml r-cran-scattermore
#install flowsom dependency requirements (eye-roll)

# termiante Docker build if ConsensusClusterPlus fails to import
RUN R -e "library(ConsensusClusterPlus)"

# install flowsom dependency requirements (eye-roll)
RUN apt-get install -y r-cran-rcppparallel r-bioc-biobase r-cran-matrixstats r-cran-png r-cran-jpeg r-cran-interp r-cran-mass r-bioc-graph r-bioc-rbgl r-cran-scales r-cran-digest r-cran-bh r-cran-rcpparmadillo r-cran-jsonlite r-cran-base64enc r-cran-plyr r-bioc-zlibbioc r-cran-hexbin r-cran-gridextra r-cran-yaml r-bioc-rhdf5lib r-cran-corpcor r-cran-runit r-cran-tibble r-cran-xml2 r-cran-tweenr r-cran-gtable r-cran-polyclip r-cran-tidyselect r-cran-withr r-cran-lifecycle r-cran-rcppeigen

#RUN R -e "library(BiocManager); BiocManager::install('FlowSOM')"
# install flowsom
# RUN R -e "library(BiocManager); BiocManager::install('FlowSOM')"
RUN R -e "library(devtools); devtools::install_github('angelolab/FlowSOM', upgrade = FALSE, upgrade_dependencies = FALSE)"

# terminate Docker build if FlowSOM fails to import
RUN R -e "library(FlowSOM)"

# Install ark-analysis
# copy over the requirements.txt, install dependencies, and README
COPY setup.py pyproject.toml requirements.txt README.md start_jupyter.sh /opt/ark-analysis/
Expand All @@ -55,4 +72,4 @@ RUN cd /opt/ark-analysis && python -m pip install .
WORKDIR /opt/ark-analysis

# jupyter lab
CMD bash start_jupyter.sh
CMD bash start_jupyter.sh
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[![Build Status](https://travis-ci.com/angelolab/ark-analysis.svg?branch=main)](https://travis-ci.com/angelolab/ark-analysis)
[![Coverage Status](https://coveralls.io/repos/github/angelolab/ark-analysis/badge.svg?branch=main)](https://coveralls.io/github/angelolab/ark-analysis?branch=main)
![Docker Image Version (latest by date)](https://img.shields.io/docker/v/angelolab/ark-analysis?arch=amd64&color=%23469ae5&label=Docker%20Version&sort=date)
[![Read the Docs](https://readthedocs.org/projects/ark-analysis/badge/?version=latest)](https://ark-analysis.readthedocs.io/en/latest/)

# ark-analysis

Expand Down
28 changes: 9 additions & 19 deletions ark/analysis/spatial_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from ark.utils import io_utils, load_utils, misc_utils, spatial_analysis_utils


def batch_channel_spatial_enrichment(label_dir, marker_thresholds, all_data, batch_size=5,
suffix='_feature_0', xr_channel_name='segmentation_label',
**kwargs):
def generate_channel_spatial_enrichment_stats(label_dir, marker_thresholds, all_data,
suffix='_feature_0',
xr_channel_name='segmentation_label', **kwargs):
"""Wrapper function for batching calls to `calculate_channel_spatial_enrichment` over fovs
Args:
Expand All @@ -21,8 +21,6 @@ def batch_channel_spatial_enrichment(label_dir, marker_thresholds, all_data, bat
threshold values for positive marker expression
all_data (pandas.DataFrame):
data including fovs, cell labels, and cell expression matrix for all markers
batch_size (int):
fov count to load into memory at a time
suffix (str):
suffix for tif file names
xr_channel_name (str):
Expand All @@ -49,15 +47,12 @@ def batch_channel_spatial_enrichment(label_dir, marker_thresholds, all_data, bat
all_label_names = \
[all_label_names[i] for i, fov in enumerate(label_fovs) if fov in included_fovs]

batching_strategy = \
[all_label_names[i:i + batch_size] for i in range(0, len(all_label_names), batch_size)]

# create containers for batched return values
values = []
stats_datasets = []

for batch_names in tqdm(batching_strategy, desc="Batch Completion", unit="batch"):
label_maps = load_utils.load_imgs_from_dir(label_dir, files=batch_names,
for label_name in tqdm(all_label_names, desc="Batch Completion", unit="batch"):
label_maps = load_utils.load_imgs_from_dir(label_dir, files=[label_name],
xr_channel_names=[xr_channel_name],
trim_suffix=suffix)

Expand Down Expand Up @@ -232,17 +227,15 @@ def calculate_channel_spatial_enrichment(dist_matrices_dict, marker_thresholds,
return values, stats


def batch_cluster_spatial_enrichment(label_dir, all_data, batch_size=5, suffix='_feature_0',
xr_channel_name='segmentation_label', **kwargs):
def generate_cluster_spatial_enrichment_stats(label_dir, all_data, suffix='_feature_0',
xr_channel_name='segmentation_label', **kwargs):
""" Wrapper function for batching calls to `calculate_cluster_spatial_enrichment` over fovs
Args:
label_dir (str | Pathlike):
directory containing labeled tiffs
all_data (pandas.DataFrame):
data including fovs, cell labels, and cell expression matrix for all markers
batch_size (int):
fov count to load into memory at a time
suffix (str):
suffix for tif file names
xr_channel_name (str):
Expand All @@ -269,15 +262,12 @@ def batch_cluster_spatial_enrichment(label_dir, all_data, batch_size=5, suffix='
all_label_names = \
[all_label_names[i] for i, fov in enumerate(label_fovs) if fov in included_fovs]

batching_strategy = \
[all_label_names[i:i + batch_size] for i in range(0, len(all_label_names), batch_size)]

# create containers for batched return values
values = []
stats_datasets = []

for batch_names in tqdm(batching_strategy, desc="Batch Completion", unit="batch"):
label_maps = load_utils.load_imgs_from_dir(label_dir, files=batch_names,
for label_name in tqdm(all_label_names, desc="Batch Completion", unit="batch"):
label_maps = load_utils.load_imgs_from_dir(label_dir, files=[label_name],
xr_channel_names=[xr_channel_name],
trim_suffix=suffix)

Expand Down
70 changes: 25 additions & 45 deletions ark/analysis/spatial_analysis_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@
))


def test_batch_channel_spatial_enrichment():

# since the functionality if channel spatial enrichment is tested later,
# only the batching needs to be tested
def test_generate_channel_spatial_enrichment_stats():
# since the functionality of channel spatial enrichment is tested later,
# only the number of elements returned and the included_fovs argument needs testing
marker_thresholds = test_utils._make_threshold_mat(in_utils=False)

with tempfile.TemporaryDirectory() as label_dir:
Expand All @@ -52,42 +51,31 @@ def test_batch_channel_spatial_enrichment():
all_data = test_utils.spoof_cell_table_from_labels(label_maps)

vals_pos, stats_pos = \
spatial_analysis.calculate_channel_spatial_enrichment(
dist_mats, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
bootstrap_num=100, dist_lim=100)

vals_pos_batch, stats_pos_batch = \
spatial_analysis.batch_channel_spatial_enrichment(
spatial_analysis.generate_channel_spatial_enrichment_stats(
label_dir, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
bootstrap_num=100, dist_lim=100, batch_size=5)

vals_pos_batch_2, stats_pos_batch_2 = \
spatial_analysis.batch_channel_spatial_enrichment(
label_dir, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
bootstrap_num=100, dist_lim=100, batch_size=1
bootstrap_num=100, dist_lim=100
)

np.testing.assert_equal(vals_pos[0][0], vals_pos_batch[0][0])
np.testing.assert_equal(vals_pos[1][0], vals_pos_batch[1][0])

# batch function should match for multi batch process
np.testing.assert_equal(vals_pos[0][0], vals_pos_batch_2[0][0])
np.testing.assert_equal(vals_pos[1][0], vals_pos_batch_2[1][0])
# both fov8 and fov9 should be returned
assert len(vals_pos) == 2

vals_pos_fov8, stats_pos_fov8 = \
spatial_analysis.batch_channel_spatial_enrichment(
spatial_analysis.generate_channel_spatial_enrichment_stats(
label_dir, marker_thresholds, all_data, excluded_channels=EXCLUDE_CHANNELS,
bootstrap_num=100, dist_lim=100, batch_size=5, included_fovs=["fov8"]
bootstrap_num=100, dist_lim=100, included_fovs=["fov8"]
)

# the fov8 values in vals_pos_fov8 should be the same as in vals_pos
np.testing.assert_equal(vals_pos_fov8[0][0], vals_pos[0][0])

# only fov8 should be returned
assert len(vals_pos_fov8) == 1


def test_batch_cluster_spatial_enrichment():
def test_generate_cluster_spatial_enrichment_stats():

# since the functionality if channel spatial enrichment is tested later,
# only the batching needs to be tested
# only the number of elements returned and the included_fovs argument needs testing
with tempfile.TemporaryDirectory() as label_dir:
test_utils._write_labels(label_dir, ["fov8", "fov9"], ["segmentation_label"], (10, 10),
'', True, np.uint8, suffix='_feature_0')
Expand All @@ -100,30 +88,22 @@ def test_batch_cluster_spatial_enrichment():
all_data = test_utils.spoof_cell_table_from_labels(label_maps)

vals_pos, stats_pos = \
spatial_analysis.calculate_cluster_spatial_enrichment(
all_data, dist_mats, bootstrap_num=100, dist_lim=100)

vals_pos_batch, stats_pos_batch = \
spatial_analysis.batch_cluster_spatial_enrichment(
label_dir, all_data, bootstrap_num=100, dist_lim=100, batch_size=5)

vals_pos_batch_2, stats_pos_batch_2 = \
spatial_analysis.batch_cluster_spatial_enrichment(
label_dir, all_data, bootstrap_num=100, dist_lim=100, batch_size=1)

np.testing.assert_equal(vals_pos[0][0], vals_pos_batch[0][0])
np.testing.assert_equal(vals_pos[1][0], vals_pos_batch[1][0])
spatial_analysis.generate_cluster_spatial_enrichment_stats(
label_dir, all_data, bootstrap_num=100, dist_lim=100
)

# batch function should match for multi batch process
np.testing.assert_equal(vals_pos[0][0], vals_pos_batch_2[0][0])
np.testing.assert_equal(vals_pos[1][0], vals_pos_batch_2[1][0])
# both fov8 and fov9 should be returned
assert len(vals_pos) == 2

vals_pos_fov8, stats_pos_fov8 = \
spatial_analysis.batch_cluster_spatial_enrichment(
label_dir, all_data, bootstrap_num=100, dist_lim=100, batch_size=5,
included_fovs=["fov8"])
spatial_analysis.generate_cluster_spatial_enrichment_stats(
label_dir, all_data, bootstrap_num=100, dist_lim=100, included_fovs=["fov8"]
)

# the fov8 values in vals_pos_fov8 should be the same as in vals_pos
np.testing.assert_equal(vals_pos_fov8[0][0], vals_pos[0][0])

# only fov8 should be returned
assert len(vals_pos_fov8) == 1


Expand Down
6 changes: 1 addition & 5 deletions docs/_rtd/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,7 @@ next_release_vX.Y.Z
1. Bump the `VERSION` Variable in `setup.py` to `X.Y.Z`. View the [draft release notes](https://github.com/angelolab/ark-analysis/releases) to read the current bugfixes, enhancements and more.
1. If, in the release notes draft there are PRs that are not categorized, label them appropriately (usually based on the label of their respective Issue).
2. Make sure that all tests pass for `Ark` on Travis-CI.
3. In the `ark-analysis/start_docker.sh` script, change the image tag from
```sh
docker run -it "${run_params[@]}" angelolab/ark-analysis:vA.B.C
-> docker run -it "${run_params[@]}" angelolab/ark-analysis:vX.Y.Z
```
3. In the `ark-analysis/start_docker.sh` script, change the `VERSION` variable from `vA.B.C` to `vX.Y.Z`
4. Request a review and merge the `Ark` branch.
5. Next head to the most recent Drafted Release Notes:
1. Double check that the tag is the appropriate version name.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

CYTHON_MACROS = [('CYTHON_TRACE', '1')] if CYTHON_DEBUG else None

VERSION = '0.4.2'
VERSION = '0.4.3'

PKG_FOLDER = path.abspath(path.join(__file__, pardir))

Expand Down
17 changes: 13 additions & 4 deletions start_docker.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env bash

# define the version number, this needs to be updated every new Docker release
VERSION='v0.4.3'

# check for template developer flag
JUPYTER_DIR='scripts'
update=0
Expand Down Expand Up @@ -30,21 +33,23 @@ do
esac
done

# find lowest open port available
PORT=8888

# update the notebooks in the scripts folder if flag set
if [ $update -ne 0 ]
then
bash update_notebooks.sh -u
else
bash update_notebooks.sh
fi

# find lowest open port available
PORT=8888

until [[ $(docker container ls | grep 0.0.0.0:$PORT | wc -l) -eq 0 ]]
do
((PORT=$PORT+1))
done

# define the run parameters
run_params=(
-p $PORT:$PORT
-e JUPYTER_PORT=$PORT
Expand All @@ -68,4 +73,8 @@ run_params=(
)
[[ ! -z "$external" ]] && run_params+=(-v "$external:/data/external")

docker run -it "${run_params[@]}" angelolab/ark-analysis:v0.4.2
# remove the old Docker container if one exists, as it may contain different external volumes
docker rm -f $VERSION > /dev/null 2>&1 || true

# create the Docker container
docker run -it "${run_params[@]}" --name $VERSION angelolab/ark-analysis:$VERSION
4 changes: 2 additions & 2 deletions templates/example_pairwise_spatial_enrichment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
"metadata": {},
"outputs": [],
"source": [
"values_channel, stats_channel = spatial_analysis.batch_channel_spatial_enrichment(\n",
"values_channel, stats_channel = spatial_analysis.generate_channel_spatial_enrichment_stats(\n",
" deepcell_output, marker_thresholds, all_data, excluded_channels=excluded_channels,\n",
" bootstrap_num=5)"
]
Expand Down Expand Up @@ -212,7 +212,7 @@
"metadata": {},
"outputs": [],
"source": [
"values_cluster, stats_cluster = spatial_analysis.batch_cluster_spatial_enrichment(\n",
"values_cluster, stats_cluster = spatial_analysis.generate_cluster_spatial_enrichment_stats(\n",
" deepcell_output, all_data, bootstrap_num=5)"
]
},
Expand Down

0 comments on commit 9d96d6c

Please sign in to comment.