Skip to content

Commit

Permalink
Issue #421 Bystro Python API Dockerfile & Ancestry API/CLI & Reduce A…
Browse files Browse the repository at this point in the history
…ncestry Memory Usage (#449)

* Adds docker file for Bystro's python library
* Creates ancestry api and cli code for calculating ancestry scores
* Removes unneeded dependencies from Cargo.toml, to speed up builds
* Improves Makefile by introducing the ability to make production builds
and install from wheel.
* Reduce ancestry memory usage by reading in sample chunks
* Cache ancestry scores to local disk to reduce S3 fetching

To test what is here:

```
docker pull akotlar/bystro-api
docker run -v /path/to/local/data:/data  akotlar/bystro-api ancestry score --in /data/trio.trim.vep.vcf.gz --assembly hg19 
```


[trio.trim.vep.vcf.gz](https://github.com/bystrogenomics/bystro/files/14730266/trio.trim.vep.vcf.gz)

The api function is a port of ancestry/listener.py handler_fn.
  • Loading branch information
akotlar committed Mar 25, 2024
1 parent 2744351 commit a6e3972
Show file tree
Hide file tree
Showing 20 changed files with 870 additions and 2,293 deletions.
10 changes: 8 additions & 2 deletions .github/workflows/conda-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ jobs:
auto-activate-base: true
activate-environment: true
python-version: ${{ matrix.python-version }}
- name: Build Bystro using Maturin
- name: Install Bystro using Maturin
run: |
. .initialize_conda_env.sh
make build
make install
echo "PATH=$CONDA/bin:$PATH" >> $GITHUB_ENV
shell: bash
- name: Verify Bystro wheel exists
run: |
Expand All @@ -30,3 +31,8 @@ jobs:
exit 1
fi
echo "Bystro wheel found! Build succeeded."
- name: Verify bystro-api is installed
run: |
set -e
bystro-api --help
echo "bystro-api installed successfully"
60 changes: 60 additions & 0 deletions Dockerfile.python
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# ---- Build Golang Binaries ----
FROM golang:1.21.3 AS go-builder

# Set the environment variable for Go binaries. This makes sure the binaries are saved to a defined path.
ENV GOBIN=/app/bin

# Install the specific versions of the Go programs
RUN go install github.com/akotlar/bystro-stats@1.0.0
RUN go install github.com/bystrogenomics/bystro-vcf@2.1.1
RUN go install github.com/akotlar/bystro-snp@1.0.0
RUN go install github.com/mikefarah/yq@2.4.1

COPY ./go /app/bystro-go-tools

RUN cd /app/bystro-go-tools && go install bystro/cmd/opensearch

# Use Ubuntu as the base image to match the GitHub Actions environment
FROM python:3.11.8-bookworm

# Copy the compiled Go binaries from the builder stage
COPY --from=go-builder /app/bin/ /app/bin/

# Add app/bin to PATH
ENV PATH="/app/bin:${PATH}"

# # Install common dependencies and utilities
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
git \
wget curl\
openssl libcurl4-openssl-dev libssl-dev \
tar lz4 pigz tabix unzip \
patch \
awscli \
unzip \
libssl-dev \
pkg-config \
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
&& . $HOME/.cargo/env

# Make sure Cargo is in the PATH
ENV PATH="/root/.cargo/bin:${PATH}"
ENV PATH="/usr/local/go/bin:${PATH}"

# Set up the workspace
WORKDIR /workspace

# Copy the entire monorepo
COPY ./python ./python

# Install Python and dependencies
RUN pip install --upgrade pip && pip install -r python/requirements.txt -r python/requirements-dev.txt

COPY ./Makefile ./

RUN make install-python

ENTRYPOINT ["bystro-api"]

CMD ["--help"]
19 changes: 15 additions & 4 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,24 @@ source "$HOME/.cargo/env"
source .initialize_conda_env.sh;
```

to install the Python package dependencies. Then, run:

The easiest way to get started:
```sh
# Installs the Bystro Python library and cli, and starts the Ray cluster
# alternatively you could have run `make install && make ray-start-local`
# or just `make install` if you only wished to install Bystro
make run-local
```
# Build the Python package for local use
make build
- This will create a local Ray server, which is needed for some Bystro operations
- To stop Ray: `ray stop`


If you are developing/contributing to the Bystro library, for a faster build use:
```sh
make develop
```

to intall the Bystro Python library.
To start a local beanstalkd listener, use either `make serve-local` or `make serve-dev`, depending on whether you are in a product environment, or development.

Follow the instructions below to install the Bystro annotator:

Expand Down
42 changes: 32 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,36 @@
# Assumes you have run ". .initialize-conda-env.sh"; since each make command runs in a separate subshell we need this to happen first
ray-start-local:
ray stop && ray start --head --disable-usage-stats

build:
cd python && maturin build --release && cd ../
ray-stop-local:
ray stop

develop:
cd go/cmd/opensearch && go build && cd ../../../
cd python && maturin develop && cd ../
build-python:
(cd python && maturin build --release)

# Ray must be started with make serve-dev
# without ray start, make serve-dev will succeed, but the handlers that rely on Ray will fail to start
serve-dev: develop
ray stop && ray start --head
build-python-dev:
(cd python && maturin develop)

install-python: build-python
pip install python/target/wheels/*.whl

install-go:
(cd ./go && go install bystro/cmd/opensearch)

install: install-python install-go

uninstall:
pip uninstall -y bystro
(cd ./go && go clean -i bystro/cmd/opensearch)

develop: install-go build-python-dev ray-start-local

run-local: install ray-start-local

pm2:
pm2 delete all 2> /dev/null || true && pm2 start startup.yml

# Currently assumes that Perl package has been separately installed
serve-local: ray-stop-local run-local pm2

# Currently assumes that Perl package has been separately installed
serve-dev: ray-stop-local develop pm2
Loading

0 comments on commit a6e3972

Please sign in to comment.