Skip to content

Commit

Permalink
Merge pull request #91 from genxnetwork/develop
Browse files Browse the repository at this point in the history
GRAPE v1.7 Release
  • Loading branch information
Jahysama committed Jan 13, 2023
2 parents 599fcd6 + ba5e16c commit cce7e42
Show file tree
Hide file tree
Showing 34 changed files with 920 additions and 204 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Run Python Tests

on:
pull_request:
branches: [ master ]
types: [ opened, reopened ]

workflow_dispatch:

jobs:
test:
# Self-hosted runner
runs-on: self-hosted
timeout-minutes: 1000
if: |
!github.event.pull_request.head.repo.fork &&
github.head_ref == 'develop'
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: 3.9.13
- run: |
python -m pip install --upgrade pip
pip install -r test/requirements.txt
- run: |
pytest test/test.py
6 changes: 3 additions & 3 deletions .github/workflows/verify_files.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@ name: verify-files
on:
schedule:
- cron: "0 0 * * *"

workflow_dispatch:

jobs:
verify-files:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2

- name: Setup python
run: "python -m pip install pyyaml"

Expand Down
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# Changelog

## [v1.7] - 2023-01-12

### Added

- All conda environments are now built in Dockerfile and Snakemake doesn't need to create them for every workflow run from `.yaml` files.
- Multiple tests were added, covering all GRAPE flows. Test cases are stored at `grape/test-cases`.

### Changed

- Phased affymetrix chip is now stored within the bundle to speed up the simulation flow, because of this `intersect` rule in `pedsim` simulation workflow was moved to `reference` downloading workflow.

### Fixed

- Fixed `ibis` detecting empty IBD segments causing pipeline teardown.

## [v1.6] - 2022-05-20

### Added
Expand Down
8 changes: 5 additions & 3 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ reference:
url: https://ftp-trace.ncbi.nih.gov/hapmap/recombination/2011-01_phaseII_B37/genetic_map_HapMapII_GRCh37.tar.gz
filesize: 37730100
md5: 1bc10a34d985e68e1f38ceb137b87929
phased_ref:
file: 1000genome/phased/chr{chrom}.phased.vcf.gz
vcfRef:
file: 1000genome/bcf/1000genome_chr{chrom}.bcf
url: https://dataset1000genomes.blob.core.windows.net/dataset/release/20130502/supporting/bcf_files/ALL.chr$chrom.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.bcf
Expand Down Expand Up @@ -96,9 +98,9 @@ reference:
md5: 39e6e8620d616362875f2538eae2f279
bundle:
file: ref.tar.gz
url: https://bioinformatics.file.core.windows.net/bundles/ref.tar.gz
filesize: 18582485368
md5: 67278f83139f375e22bd56544d523fa3
url: https://bioinformatics.file.core.windows.net/bundles/ref_v2.tar.gz
filesize: 19411359855
md5: 300fa3e768b677958a2b8e270115c6d9
bundle_min:
file: ref_min.tar.gz
url: https://bioinformatics.file.core.windows.net/bundles/ref_min.tar.gz
Expand Down
6 changes: 3 additions & 3 deletions containers/snakemake/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@ ENV DEBIAN_FRONTEND noninteractive
RUN apt-get clean && apt-get update && apt-get install -y ca-certificates libseccomp-dev squashfs-tools \
&& rm -rf /tmp/*

ADD envs/snakemake.yaml envs/snakemake.yaml
ADD envs envs

ENV PATH /opt/conda/bin:${PATH}
ENV LANG C.UTF-8
ENV SHELL /bin/bash

RUN apt-get install -y wget bzip2 gnupg2 git libgomp1 && \
RUN apt-get update && apt-get install -y wget bzip2 gnupg2 git libgomp1 && \
wget -nv https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \
rm Miniconda3-latest-Linux-x86_64.sh && \
conda install -c conda-forge mamba && \
mamba env create -f envs/snakemake.yaml && \
for e in envs/*; do mamba env create -f $e ; done && \
conda clean --all -y

# Intall Minimac3
Expand Down
4 changes: 3 additions & 1 deletion envs/evaluation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ dependencies:
- seaborn==0.10.1
- matplotlib==3.3.1
- pandas==1.1.1
- pydot==1.4.2
- pydot==1.4.2
- mmh3==3.0.0
- numpy==1.20
3 changes: 2 additions & 1 deletion envs/interpolation.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
name: interpolation
channels:
- bioconda
- conda-forge
Expand All @@ -7,4 +8,4 @@ dependencies:
- samtools==1.9
- openssl==1.0.2u
- pandas==1.1.1
- numpy==1.19.1
- numpy==1.19.1
1 change: 1 addition & 0 deletions envs/postprocess.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
name: postprocess
channels:
- bioconda
- conda-forge
Expand Down
6 changes: 5 additions & 1 deletion envs/snakemake.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ channels:
- defaults
dependencies:
- python>=3.5
- snakemake==7.15.2
- matplotlib==3.3.1
- pandas==1.1.1
- numpy==1.19.1
Expand All @@ -15,3 +14,8 @@ dependencies:
- scikit-bio==0.5.6
- docutils==0.16
- mmh3==3.0.0
- pip:
- "--editable=git+https://github.com/Jahysama/snakemake.git#egg=snakemake"
#Fork of a snakemake is used here because of not working conda envs inside python scripts
#Please check out https://github.com/snakemake/snakemake/pull/1812 for more info

3 changes: 2 additions & 1 deletion envs/vcf_to_ped.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: vcf_to_ped2
name: vcf_to_ped
channels:
- conda-forge
- defaults
dependencies:
- scikit-allel==1.3.2
- numpy==1.20
2 changes: 1 addition & 1 deletion envs/weight-mask.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ name: weight-mask
channels:
- conda-forge
dependencies:
- numpy
- numpy==1.20
- scikit-learn
- matplotlib
2 changes: 1 addition & 1 deletion launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def get_parser_args():
raise ValueError('If --impute is present, then --phase must also be present')

if args.command != 'reference' and args.use_bundle:
raise ValueError('--bundle option only available for reference downloading')
raise ValueError('--use-bundle option only available for reference downloading')

if args.num_batches > args.cores:
raise ValueError('Number of batches is bigger than number cores, please change --num-batches value to be lower or equal --cores')
Expand Down
10 changes: 5 additions & 5 deletions rules/filter.smk
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ rule vcf_stats:
params:
samples='vcf/{batch}_merged_lifted.vcf.samples'
conda:
'../envs/bcftools.yaml'
'bcftools'
shell:
"""
bcftools query --list-samples {input.vcf} > {params.samples}
Expand All @@ -29,7 +29,7 @@ rule select_bad_samples:
alt_hom_samples = config['alt_hom_samples'],
het_samples = config['het_samples']
conda:
'../envs/evaluation.yaml'
'evaluation'
script:
'../scripts/select_bad_samples.py'

Expand All @@ -43,7 +43,7 @@ rule plink_filter:
bim = temp('plink/{batch}_merged_filter.bim'),
fam = temp('plink/{batch}_merged_filter.fam')
conda:
'../envs/plink.yaml'
'plink'
params:
input = '{batch}_merged',
out = '{batch}_merged_filter',
Expand Down Expand Up @@ -90,7 +90,7 @@ rule plink_clean_up:
input = 'plink/{batch}_merged_filter',
out = 'plink/{batch}_merged_mapped'
conda:
'../envs/plink.yaml'
'plink'
log:
'logs/plink/{batch}_plink_clean_up.log'
benchmark:
Expand Down Expand Up @@ -128,7 +128,7 @@ rule prepare_vcf:
input = 'plink/{batch}_merged_mapped',
vcf = 'vcf/{batch}_merged_mapped_sorted.vcf.gz'
conda:
'../envs/bcf_plink.yaml'
'bcf_plink'
log:
plink='logs/plink/{batch}_prepare_vcf.log',
vcf='logs/vcf/{batch}_prepare_vcf.log'
Expand Down
8 changes: 4 additions & 4 deletions rules/imputation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rule imputation_filter:
# TODO: because "The option is currently used only for the compression of the output stream"
# threads: workflow.cores
conda:
'../envs/bcftools.yaml'
'bcftools'
log:
'logs/impute/{batch}_imputation_filter-{chrom}.log'
benchmark:
Expand Down Expand Up @@ -63,7 +63,7 @@ rule merge_imputation_filter:
mode=config['mode'],
merged_imputed = 'background/' + card + '_merged_imputed.vcf.gz'
conda:
'../envs/bcftools.yaml'
'bcftools'
log:
'logs/vcf/' + card + '_merge_imputation_filter.log'
benchmark:
Expand Down Expand Up @@ -103,7 +103,7 @@ rule convert_imputed_to_plink:
params:
out = 'plink/{batch}_merged_imputed'
conda:
'../envs/plink.yaml'
'plink'
log:
'logs/plink/{batch}_convert_imputed_to_plink.log'
benchmark:
Expand All @@ -126,7 +126,7 @@ rule merge_convert_imputed_to_plink:
background = 'background/{batch}_merged_imputed',
out = 'plink/client/{batch}_merged_imputed'
conda:
'../envs/plink.yaml'
'plink'
log:
'logs/plink/{batch}_convert_imputed_to_plink.log'
benchmark:
Expand Down
2 changes: 1 addition & 1 deletion rules/phasing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ rule merge_phased:
list='vcf/{batch}_phased.merge.list',
mode=config['mode']
conda:
'../envs/bcftools.yaml'
'bcftools'
log:
'logs/vcf/{batch}_merged_phased.log'
benchmark:
Expand Down
Loading

0 comments on commit cce7e42

Please sign in to comment.