Skip to content

Commit

Permalink
Merge branch 'master' into ct-update-docs-for-hosted-dbs
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkinsc committed May 26, 2016
2 parents cbe6486 + b3b0112 commit 460c2f4
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 103 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ htmlcov/
nosetests.xml
pytest.xml
coverage.xml
.coverage*

test/input/TestVPhaser2/in.bam.bti

Expand Down
5 changes: 3 additions & 2 deletions metagenomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import sys
import util.cmd
import util.file
import util.misc
import tools.kraken
import tools.krona
import tools.diamond
Expand Down Expand Up @@ -427,7 +428,7 @@ def kraken(inBam, db, outReport=None, outReads=None,
tmp_reads = util.file.mkstempfname('.kraken')
opts = {
'--paired': None,
'--threads': numThreads,
'--threads': min(int(numThreads), util.misc.available_cpu_count()),
}
# Could be optimized in 3.5 piping directly to kraken-filter.
kraken_tool.classify(db, [tmp_fastq1, tmp_fastq2], tmp_reads, options=opts)
Expand Down Expand Up @@ -480,7 +481,7 @@ def parser_kraken(parser=argparse.ArgumentParser()):
default=0.05,
type=float,
help='Kraken filter threshold (default %(default)s)')
parser.add_argument('--numThreads', default=1, help='Number of threads to run. (default %(default)s)')
parser.add_argument('--numThreads', type=int, default=1, help='Number of threads to run. (default %(default)s)')
util.cmd.common_args(parser, (('loglevel', None), ('version', None),
('tmp_dir', None)))
util.cmd.attach_main(parser, kraken, split_args=True)
Expand Down
3 changes: 2 additions & 1 deletion test/unit/test_metagenomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import tools.picard
import metagenomics
import util.file
import util.misc
from test import TestCaseWithTmp


Expand Down Expand Up @@ -119,7 +120,7 @@ def test_num_threads(self):
self.mock_kraken().classify.assert_called_once_with(
self.db, mock.ANY, mock.ANY, options={
'--paired': None,
'--threads': 11,
'--threads': min(util.misc.available_cpu_count(), 11),
})


Expand Down
40 changes: 33 additions & 7 deletions tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def _patch(self, path, patch):

@property
def _package_installed(self):
result = util.misc.run_and_print(["conda", "list", "-f", "-c", "-p", self.env_path, "--json", self.package], silent=True, env=self.conda_env)
result = util.misc.run_and_print(["conda", "list", "-f", "-c", "-p", self.env_path, "--json", self.package], loglevel=logging.DEBUG, env=self.conda_env)
if result.returncode == 0:
command_output = result.stdout.decode("UTF-8")
data = json.loads(self._string_from_start_of_json(command_output))
Expand Down Expand Up @@ -337,9 +337,9 @@ def verify_install(self):
def _attempt_install(self):
try:
# check for presence of conda command
util.misc.run_and_print(["conda", "-V"], silent=True, check=True, env=self.conda_env)
util.misc.run_and_print(["conda", "-V"], loglevel=logging.INFO, check=True, env=self.conda_env)
except:
_log.debug("conda NOT installed; using custom tool install")
_log.debug("conda NOT installed")
self._is_attempted = True
self.installed = False
return
Expand Down Expand Up @@ -380,7 +380,7 @@ def get_installed_version(self):
# If we ever use conda to install pip packages as tools, "-c" needs to be removed
run_cmd = ["conda", "list", "-c", "--json", "-f", "-p", self.env_path, self.package]

result = util.misc.run_and_print(run_cmd, silent=True, env=self.conda_env)
result = util.misc.run_and_print(run_cmd, loglevel=logging.INFO, env=self.conda_env)
if result.returncode == 0:
try:
command_output = result.stdout.decode("UTF-8")
Expand All @@ -400,12 +400,34 @@ def get_installed_version(self):
return CondaPackageVersion(installed_version, installed_build_type)
return None

def package_available(self):
# If we ever use conda to install pip packages as tools, "-c" needs to be removed
run_cmd = ["conda", "search", "--json", "-c", self.channel, self.package]

result = util.misc.run_and_print(run_cmd, loglevel=logging.INFO, env=self.conda_env)
if result.returncode == 0:
try:
command_output = result.stdout.decode("UTF-8")
data = json.loads(self._string_from_start_of_json(command_output))
except:
_log.warning("failed to decode JSON output from conda search: %s", result.stdout.decode("UTF-8"))
return # return rather than raise so we can fall back to the next install method

if data and len(data):
if self.package in data and "error" not in data:
for sub_pkg in data[self.package]:
if sub_pkg.get("version", "") == self.version.version_spec:
return True

_log.info("Conda package for %s is not available on this platform.", self.package)
return False

def uninstall_package(self):
run_cmd = ["conda", "remove", "-q", "-y", "--json", "-p", self.env_path, self.package]

result = util.misc.run_and_print(
run_cmd,
silent=True,
loglevel=logging.INFO,
env=self.conda_env)

if result.returncode == 0:
Expand All @@ -423,6 +445,10 @@ def uninstall_package(self):
self.verify_install()

def install_package(self):
#if not self.package_available():
# _log.error("Conda package for %s cannot be installed; it is not available on this platform. Related functionality may not be available.", self.package)
# return

# try to create the environment and install the package
run_cmd = ["conda", "create", "-q", "-y", "--json", "-c", self.channel, "-p", self.env_path, self._package_str]

Expand All @@ -433,7 +459,7 @@ def install_package(self):
python_version = "python=" + python_version if python_version else ""
run_cmd.extend([python_version])

result = util.misc.run_and_print(run_cmd, silent=True, env=self.conda_env)
result = util.misc.run_and_print(run_cmd, loglevel=logging.INFO, env=self.conda_env)
try:
command_output = result.stdout.decode("UTF-8")
data = json.loads(self._string_from_start_of_json(command_output))
Expand All @@ -451,7 +477,7 @@ def install_package(self):
"conda", "install", "--json", "-c", self.channel, "-y", "-q", "-p", self.env_path,
self._package_str
],
silent=True,
loglevel=logging.DEBUG,
env=self.conda_env,
)

Expand Down
102 changes: 9 additions & 93 deletions tools/kraken.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,89 +15,21 @@
import util.misc
from builtins import super

URL = 'https://github.com/yesimon/kraken/archive/75154106773b41b1d0e55b3274178134eb14723d.zip'
TOOL_NAME = "kraken"
TOOL_VERSION = '0.10.5-beta'
CONDA_TOOL_VERSION = '0.10.5beta'
KRAKEN_COMMIT_DIR = 'kraken-75154106773b41b1d0e55b3274178134eb14723d'
KRAKEN_DIR = 'kraken-{}'.format(TOOL_VERSION)

JELLYFISH_URL = 'https://github.com/gmarcais/Jellyfish/archive/43fc99e4d44d11f115dc6741ff705cf7e113f251.zip'
JELLYFISH_VERSION = '1.1.11'
JELLYFISH_COMMIT_DIR = 'Jellyfish-43fc99e4d44d11f115dc6741ff705cf7e113f251'
JELLYFISH_DIR = 'jellyfish-{}'.format(JELLYFISH_VERSION)

YAGGO_URL = 'https://github.com/gmarcais/yaggo/releases/download/v1.5.9/yaggo'
YAGGO_VERSION = '1.5.9'
TOOL_NAME = "kraken-all"
TOOL_VERSION = '0.10.6_eaf8fb68'

log = logging.getLogger(__name__)


class Yaggo(tools.Tool):

def __init__(self, install_methods=None):
if not install_methods:
install_methods = []
install_methods.append(tools.CondaPackage("yaggo", version=YAGGO_VERSION))
install_methods.append(DownloadAndInstallYaggo(YAGGO_URL, 'yaggo'))
super().__init__(install_methods=install_methods)


class DownloadAndInstallYaggo(tools.DownloadPackage):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.verifycmd = '{}/yaggo -v > /dev/null 2>& 1'.format(util.file.get_build_path())

def post_download(self):
yaggo_path = os.path.join(self.destination_dir, 'yaggo')
os.chmod(yaggo_path, 0o755)


class Jellyfish(tools.Tool):

def __init__(self, install_methods=None):
if not install_methods:
install_methods = []
install_methods.append(tools.CondaPackage("jellyfish", version=JELLYFISH_VERSION))
install_methods.append(
DownloadAndInstallJellyfish(
JELLYFISH_URL, os.path.join(JELLYFISH_DIR, 'bin', 'jellyfish')
)
)
super().__init__(install_methods=install_methods)


class DownloadAndInstallJellyfish(tools.DownloadPackage):

def post_download(self):
yaggo_path = Yaggo().install_and_get_path()
env = os.environ.copy()
env['PATH'] = '{}:{}'.format(os.path.dirname(yaggo_path), env['PATH'])
jellyfish_dir = os.path.join(self.destination_dir, JELLYFISH_DIR)

if not os.path.exists(jellyfish_dir):
shutil.move(os.path.join(self.destination_dir, JELLYFISH_COMMIT_DIR), jellyfish_dir)

install_dir = os.path.join(jellyfish_dir, 'local')
util.file.replace_in_file(
os.path.join(jellyfish_dir, 'Makefile.am'), 'AM_CXXFLAGS = -g -O3',
'AM_CXXFLAGS = -g -O3 -Wno-maybe-uninitialized'
)
util.misc.run_and_print(['autoreconf', '-i'], cwd=jellyfish_dir, env=env, check=True)
util.misc.run_and_print(['./configure', '--prefix={}'.format(install_dir)], cwd=jellyfish_dir, env=env, check=True)
util.misc.run_and_print(['make', 'install'], cwd=jellyfish_dir, env=env, check=True)


@tools.skip_install_test(condition=tools.is_osx())
class Kraken(tools.Tool):

BINS = ['kraken', 'kraken-build', 'kraken-filter', 'kraken-mpa-report', 'kraken-report', 'kraken-translate']

def __init__(self, install_methods=None):
self.subtool_name = self.subtool_name if hasattr(self, "subtool_name") else "kraken"
if not install_methods:
install_methods = []
install_methods.append(tools.CondaPackage(TOOL_NAME, version=CONDA_TOOL_VERSION))
install_methods.append(DownloadAndInstallKraken(URL, os.path.join(KRAKEN_DIR, 'bin', 'kraken')))
install_methods.append(tools.CondaPackage(TOOL_NAME, executable=self.subtool_name, version=TOOL_VERSION))
super().__init__(install_methods=install_methods)

def version(self):
Expand Down Expand Up @@ -174,23 +106,7 @@ def execute(self, command, db, output, args=None, options=None,
print(res.stderr.decode('utf-8'), file=sys.stderr)
return res


class DownloadAndInstallKraken(tools.DownloadPackage):

def post_download(self):
jellyfish_path = Jellyfish().install_and_get_path()
env = os.environ.copy()
env['PATH'] = '{}:{}'.format(os.path.dirname(jellyfish_path), env['PATH'])
kraken_dir = os.path.join(self.destination_dir, KRAKEN_DIR)

if not os.path.exists(kraken_dir):
shutil.move(os.path.join(self.destination_dir, KRAKEN_COMMIT_DIR), kraken_dir)
libexec_dir = os.path.join(kraken_dir, 'libexec')
bin_dir = os.path.join(kraken_dir, 'bin')
util.misc.run_and_print(['./install_kraken.sh', 'libexec'], cwd=kraken_dir, env=env, check=True)
util.file.mkdir_p(bin_dir)
for bin_name in Kraken.BINS:
libexec_bin = os.path.join(libexec_dir, bin_name)
bin = os.path.join(bin_dir, bin_name)
if not os.path.islink(bin):
os.symlink(libexec_bin, bin)
@tools.skip_install_test(condition=tools.is_osx())
class Jellyfish(Kraken):
""" Tool wrapper for Jellyfish (installed by kraken-all metapackage) """
subtool_name = 'jellyfish'
26 changes: 26 additions & 0 deletions util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@
import itertools
import logging
import os
import re
import subprocess
import multiprocessing
import sys

import util.file

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -353,3 +356,26 @@ def get_intervals(self, c=None):
right = right - 1
features = list(self.get_features(c, left, right))
yield (c, left, right, len(features), features)


def available_cpu_count():
"""
Return the number of available virtual or physical CPUs on this system.
The number of available CPUs can be smaller than the total number of CPUs
when the cpuset(7) mechanism is in use, as is the case on some cluster
systems.
Adapted from http://stackoverflow.com/a/1006301/715090
"""
try:
with open('/proc/self/status') as f:
status = f.read()
m = re.search(r'(?m)^Cpus_allowed:\s*(.*)$', status)
if m:
res = bin(int(m.group(1).replace(',', ''), 16)).count('1')
if res > 0:
return min(res, multiprocessing.cpu_count())
except IOError:
pass

return multiprocessing.cpu_count()

0 comments on commit 460c2f4

Please sign in to comment.