From 1294ed36f2e2f23f4a4824b1032aef31931200a3 Mon Sep 17 00:00:00 2001 From: Andre Rendeiro Date: Tue, 10 Sep 2019 17:05:37 +0200 Subject: [PATCH] Update bedtools version to 2.27.1 --- .travis.yml | 3 +-- Dockerfile | 8 +++++--- docs/source/intro.rst | 9 +++------ ngs_toolkit/__init__.py | 17 +++++++++++++++++ ngs_toolkit/atacseq.py | 4 ++-- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 48336df..6a908de 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,8 +18,7 @@ services: - xvfb before_install: - - wget http://ftp.debian.org/debian/pool/main/b/bedtools/bedtools_2.21.0-1_amd64.deb - - sudo dpkg -i bedtools_2.21.0-1_amd64.deb + - sudo apt-get -y install bedtools=2.27.1+dfsg-4 - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 - sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' - sudo apt-get update diff --git a/Dockerfile b/Dockerfile index 9af1313..909948a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,9 +35,11 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* # Install specific bedtools version -RUN wget http://ftp.debian.org/debian/pool/main/b/bedtools/bedtools_2.21.0-1_amd64.deb \ - && dpkg -i bedtools_2.21.0-1_amd64.deb \ - && rm bedtools_2.21.0-1_amd64.deb +RUN apt-get update \ + && apt-get install -t unstable -y --no-install-recommends \ + bedtools=2.27.1+dfsg-4 \ + && rm -rf /tmp/downloaded_packages/ /tmp/*.rds \ + && rm -rf /var/lib/apt/lists/* RUN apt-get update \ && apt-get install -t unstable -y --no-install-recommends \ diff --git a/docs/source/intro.rst b/docs/source/intro.rst index e6de8fb..920a637 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -44,7 +44,7 @@ Install non-Python dependencies: .. code-block:: bash - conda install -y bedtools==2.20.1 + conda install -y bedtools==2.27.1 conda install -y ucsc-twobittofa conda install -y bioconductor-deseq2 conda install -y bioconductor-cqn @@ -62,18 +62,15 @@ And then install the ``ngs-toolkit`` library with pip (available only through Py ``ngs_toolkit`` makes use of some non-Python dependencies. + - `bedtools `_: version should be at least 2.27.1 + The following are highly recommended only for some data or analysis types: - - `bedtools `_: required for some ATAC/ChIP-seq functions. It is underlying the Python interface library to bedtools (pybedtools) which can be installed without bedtools. - `R `_ and some bioconductor libraries (optional): - - `cqn `_ (optional): used for GC-content aware normalization of NGS data. - `DESeq2 `_ (optional): used for differential testing of genes/regulatory elements. - `Kent tools `_ (optional): the 'twoBitToFa' binary from UCSC's Kent bioinformatics toolkit is used to convert between the 2bit and FASTA formats. -.. note:: - ``bedtools`` version should be below 2.24.0 (2.20.1 is used for testing) - Testing ============================= diff --git a/ngs_toolkit/__init__.py b/ngs_toolkit/__init__.py index cd6bbcb..8a6d332 100644 --- a/ngs_toolkit/__init__.py +++ b/ngs_toolkit/__init__.py @@ -197,9 +197,26 @@ def setup_timestamping(): exclude_functions=["from_dataframe"]) +def check_bedtools_version(): + import pybedtools + + # not existing + v = ".".join([str(x) for x in pybedtools.helpers.settings.bedtools_version]) + msg = "Bedtools does not seem to be installed or is not in $PATH." + if v == '': + raise Exception(msg) + + # too low version + msg = "Bedtools version '{}' is smaller than 2.27.".format(v) + msg = " Please upgrade to newer version." + if not pybedtools.helpers.settings._v_2_27_plus: + raise Exception(msg) + + # setup _LOGGER = setup_logger() _CONFIG = setup_config() +check_bedtools_version() setup_graphic_preferences() setup_timestamping() diff --git a/ngs_toolkit/atacseq.py b/ngs_toolkit/atacseq.py index bac7ed5..3e9bbc9 100644 --- a/ngs_toolkit/atacseq.py +++ b/ngs_toolkit/atacseq.py @@ -731,14 +731,14 @@ def measure_coverage( output_dir, s.name + ".{}_coverage.sh".format(peak_set_name) ) cmd = ( - "date\nbedtools coverage -counts -abam {bam} -b {bed} > {out}\ndate" + "date\nbedtools coverage -counts -a {bed} -b {bam} > {out}\ndate" .format(bam=s.aligned_filtered_bam, bed=sites.fn, out=output_file)) submit_job( cmd, job_file, jobname=job_name, logfile=log_file, - cores=1, mem=8000, time="04:00:00") + cores=1, mem=8000, time="04:00:00", **kwargs) def collect_coverage( self,