From aaebccdebbe6fdbe65d2320b643632cbd9cdf88a Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Thu, 28 Apr 2016 16:54:00 -0400 Subject: [PATCH 1/8] added optional argument to pass number of threads to blastn during deplete_blastn --- taxon_filter.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/taxon_filter.py b/taxon_filter.py index a318af84b..fed068305 100755 --- a/taxon_filter.py +++ b/taxon_filter.py @@ -669,7 +669,7 @@ def multi_db_deplete_bam(inBam, refDbs, deplete_method, outBam, JVMmemory=None): # ======================== -def blastn_chunked_fasta(fasta, db, chunkSize=1000000): +def blastn_chunked_fasta(fasta, db, chunkSize=1000000, threads=1): """ Helper function: blastn a fasta file, overcoming apparent memory leaks on an input with many query sequences, by splitting it into multiple chunks @@ -689,7 +689,7 @@ def blastn_chunked_fasta(fasta, db, chunkSize=1000000): chunk_hits = mkstempfname('.hits.txt') blastnCmd = [ - blastnPath, '-db', db, '-word_size', '16', '-evalue', '1e-6', '-outfmt', '6', '-max_target_seqs', '2', + blastnPath, '-db', db, '-word_size', '16', '-num_threads', str(threads), '-evalue', '1e-6', '-outfmt', '6', '-max_target_seqs', '2', '-query', chunk_fasta, '-out', chunk_hits ] log.debug(' '.join(blastnCmd)) @@ -733,7 +733,7 @@ def no_blast_hits(blastOutCombined, inFastq, outFastq): outf.write(line1 + line2 + line3 + line4) -def deplete_blastn(inFastq, outFastq, refDbs): +def deplete_blastn(inFastq, outFastq, refDbs, threads): 'Use blastn to remove reads that match at least one of the databases.' # Convert to fasta @@ -744,7 +744,7 @@ def deplete_blastn(inFastq, outFastq, refDbs): blastOutFiles = [] for db in refDbs: log.info("running blastn on %s against %s", inFastq, db) - blastOutFiles += blastn_chunked_fasta(inFasta, db) + blastOutFiles += blastn_chunked_fasta(inFasta, db, threads) # Combine results from different databases blastOutCombined = mkstempfname('.txt') @@ -761,6 +761,7 @@ def parser_deplete_blastn(parser=argparse.ArgumentParser()): parser.add_argument('inFastq', help='Input fastq file.') parser.add_argument('outFastq', help='Output fastq file with matching reads removed.') parser.add_argument('refDbs', nargs='+', help='One or more reference databases for blast.') + parser.add_argument('--threads', type=int, default=4, help='The number of threads to use in running blastn.') util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None))) util.cmd.attach_main(parser, deplete_blastn, split_args=True) return parser @@ -769,7 +770,7 @@ def parser_deplete_blastn(parser=argparse.ArgumentParser()): __commands__.append(('deplete_blastn', parser_deplete_blastn)) -def deplete_blastn_paired(infq1, infq2, outfq1, outfq2, refDbs): +def deplete_blastn_paired(infq1, infq2, outfq1, outfq2, refDbs, threads): 'Use blastn to remove reads that match at least one of the databases.' tmpfq1_a = mkstempfname('.fastq') tmpfq1_b = mkstempfname('.fastq') @@ -781,7 +782,7 @@ def deplete_blastn_paired(infq1, infq2, outfq1, outfq2, refDbs): # (this should significantly speed up the second run of deplete_blastn) read_utils.purge_unmated(tmpfq1_a, infq2, tmpfq1_b, tmpfq2_b) # deplete fq2 - deplete_blastn(tmpfq2_b, tmpfq2_c, refDbs) + deplete_blastn(tmpfq2_b, tmpfq2_c, refDbs, threads) # purge fq1 of read pairs lost in fq2 read_utils.purge_unmated(tmpfq1_b, tmpfq2_c, outfq1, outfq2) @@ -792,6 +793,7 @@ def parser_deplete_blastn_paired(parser=argparse.ArgumentParser()): parser.add_argument('outfq1', help='Output fastq file with matching reads removed.') parser.add_argument('outfq2', help='Output fastq file with matching reads removed.') parser.add_argument('refDbs', nargs='+', help='One or more reference databases for blast.') + parser.add_argument('--threads', type=int, default=4, help='The number of threads to use in running blastn.') util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None))) util.cmd.attach_main(parser, deplete_blastn_paired, split_args=True) return parser @@ -800,7 +802,7 @@ def parser_deplete_blastn_paired(parser=argparse.ArgumentParser()): __commands__.append(('deplete_blastn_paired', parser_deplete_blastn_paired)) -def deplete_blastn_bam(inBam, db, outBam, chunkSize=1000000, JVMmemory=None): +def deplete_blastn_bam(inBam, db, outBam, threads, chunkSize=1000000, JVMmemory=None): 'Use blastn to remove reads that match at least one of the databases.' #blastnPath = tools.blast.BlastnTool().install_and_get_path() @@ -819,7 +821,7 @@ def deplete_blastn_bam(inBam, db, outBam, chunkSize=1000000, JVMmemory=None): os.unlink(fastq1) os.unlink(fastq2) log.info("running blastn on %s pair 1 against %s", inBam, db) - blastOutFiles = blastn_chunked_fasta(fasta, db, chunkSize) + blastOutFiles = blastn_chunked_fasta(fasta, db, chunkSize, threads) with open(blast_hits, 'wt') as outf: for blastOutFile in blastOutFiles: with open(blastOutFile, 'rt') as inf: @@ -864,6 +866,7 @@ def parser_deplete_blastn_bam(parser=argparse.ArgumentParser()): parser.add_argument('inBam', help='Input BAM file.') parser.add_argument('refDbs', nargs='+', help='One or more reference databases for blast.') parser.add_argument('outBam', help='Output BAM file with matching reads removed.') + parser.add_argument('--threads', type=int, default=4, help='The number of threads to use in running blastn.') parser.add_argument("--chunkSize", type=int, default=1000000, help='FASTA chunk size (default: %(default)s)') parser.add_argument( '--JVMmemory', From 755428db3035877887b6a664a13c65f1c839b21b Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Thu, 28 Apr 2016 17:30:26 -0400 Subject: [PATCH 2/8] specify threads in additional locations --- taxon_filter.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/taxon_filter.py b/taxon_filter.py index fed068305..7105a470c 100755 --- a/taxon_filter.py +++ b/taxon_filter.py @@ -68,6 +68,7 @@ def parser_deplete_human(parser=argparse.ArgumentParser()): help='One reference database for last (required if --taxfiltBam is specified).', default=None ) + parser.add_argument('--threads', type=int, default=4, help='The number of threads to use in running blastn.') parser.add_argument( '--JVMmemory', default=tools.picard.FilterSamReadsTool.jvmMemDefault, @@ -91,10 +92,11 @@ def main_deplete_human(args): args.bmtaggerDbs, deplete_bmtagger_bam, args.bmtaggerBam, + threads=args.threads, JVMmemory=args.JVMmemory ) read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory) - multi_db_deplete_bam(args.rmdupBam, args.blastDbs, deplete_blastn_bam, args.blastnBam, JVMmemory=args.JVMmemory) + multi_db_deplete_bam(args.rmdupBam, args.blastDbs, deplete_blastn_bam, args.blastnBam, threads=args.threads, JVMmemory=args.JVMmemory) if args.taxfiltBam and args.lastDb: filter_lastal_bam(args.blastnBam, args.lastDb, args.taxfiltBam, JVMmemory=args.JVMmemory) return 0 @@ -636,6 +638,7 @@ def parser_deplete_bam_bmtagger(parser=argparse.ArgumentParser()): and db.srprism.idx, db.srprism.map, etc. by srprism mkindex.''' ) parser.add_argument('outBam', help='Output BAM file.') + parser.add_argument('--threads', type=int, default=4, help='The number of threads to use in running blastn.') parser.add_argument( '--JVMmemory', default=tools.picard.FilterSamReadsTool.jvmMemDefault, @@ -648,17 +651,17 @@ def parser_deplete_bam_bmtagger(parser=argparse.ArgumentParser()): def main_deplete_bam_bmtagger(args): '''Use bmtagger to deplete input reads against several databases.''' - multi_db_deplete_bam(args.inBam, args.refDbs, deplete_bmtagger_bam, args.outBam, JVMmemory=args.JVMmemory) + multi_db_deplete_bam(args.inBam, args.refDbs, deplete_bmtagger_bam, args.outBam, threads=args.threads, JVMmemory=args.JVMmemory) __commands__.append(('deplete_bam_bmtagger', parser_deplete_bam_bmtagger)) -def multi_db_deplete_bam(inBam, refDbs, deplete_method, outBam, JVMmemory=None): +def multi_db_deplete_bam(inBam, refDbs, deplete_method, outBam, threads=1, JVMmemory=None): tmpBamIn = inBam for db in refDbs: tmpBamOut = mkstempfname('.bam') - deplete_method(tmpBamIn, db, tmpBamOut, JVMmemory=JVMmemory) + deplete_method(tmpBamIn, db, tmpBamOut, threads=threads, JVMmemory=JVMmemory) if tmpBamIn != inBam: os.unlink(tmpBamIn) tmpBamIn = tmpBamOut @@ -881,10 +884,10 @@ def parser_deplete_blastn_bam(parser=argparse.ArgumentParser()): def main_deplete_blastn_bam(args): '''Use blastn to remove reads that match at least one of the specified databases.''' - def wrapper(inBam, db, outBam, JVMmemory=None): - return deplete_blastn_bam(inBam, db, outBam, chunkSize=args.chunkSize, JVMmemory=JVMmemory) + def wrapper(inBam, db, outBam, threads, JVMmemory=None): + return deplete_blastn_bam(inBam, db, outBam, threads=args.threads, chunkSize=args.chunkSize, JVMmemory=JVMmemory) - multi_db_deplete_bam(args.inBam, args.refDbs, wrapper, args.outBam, JVMmemory=args.JVMmemory) + multi_db_deplete_bam(args.inBam, args.refDbs, wrapper, args.outBam, threads=args.threads, JVMmemory=args.JVMmemory) return 0 From f29d3f81795d25a6aebbfeb085931adf3ef8e068 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 29 Apr 2016 19:11:37 -0400 Subject: [PATCH 3/8] added threads param to hs_deplete rule to support new optional threads arg of blastn-based depletion methods --- pipes/rules/hs_deplete.rules | 5 +++-- taxon_filter.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pipes/rules/hs_deplete.rules b/pipes/rules/hs_deplete.rules index 4690ee560..6d3245f55 100644 --- a/pipes/rules/hs_deplete.rules +++ b/pipes/rules/hs_deplete.rules @@ -33,12 +33,13 @@ rule depletion: bmtaggerDbs = expand("{dbdir}/{db}", dbdir=config["bmtagger_db_dir"], db=config["bmtagger_dbs_remove"]), blastDbs = expand("{dbdir}/{db}", dbdir=config["blast_db_dir"], db=config["blast_db_remove"]), revert_bam = config["tmp_dir"] +'/'+config["subdirs"]["depletion"]+'/{sample}.raw.bam', - logid="{sample}" + logid="{sample}", + threads=int(config.get("number_of_threads", 1)) run: makedirs(expand("{dir}/{subdir}", dir=[config["data_dir"],config["tmp_dir"]], subdir=config["subdirs"]["depletion"])) - shell("{config[bin_dir]}/taxon_filter.py deplete_human {input} {params.revert_bam} {output} --bmtaggerDbs {params.bmtaggerDbs} --blastDbs {params.blastDbs} --JVMmemory 15g") + shell("{config[bin_dir]}/taxon_filter.py deplete_human {input} {params.revert_bam} {output} --bmtaggerDbs {params.bmtaggerDbs} --blastDbs {params.blastDbs} --threads {params.threads} --JVMmemory 15g") os.unlink(params.revert_bam) rule filter_to_taxon: diff --git a/taxon_filter.py b/taxon_filter.py index 7105a470c..6ffd74de9 100755 --- a/taxon_filter.py +++ b/taxon_filter.py @@ -885,7 +885,7 @@ def main_deplete_blastn_bam(args): '''Use blastn to remove reads that match at least one of the specified databases.''' def wrapper(inBam, db, outBam, threads, JVMmemory=None): - return deplete_blastn_bam(inBam, db, outBam, threads=args.threads, chunkSize=args.chunkSize, JVMmemory=JVMmemory) + return deplete_blastn_bam(inBam, db, outBam, threads=threads, chunkSize=args.chunkSize, JVMmemory=JVMmemory) multi_db_deplete_bam(args.inBam, args.refDbs, wrapper, args.outBam, threads=args.threads, JVMmemory=args.JVMmemory) return 0 From 486e0b16f90beb80c7e852148117005e9570eaa9 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Mon, 2 May 2016 09:47:20 -0400 Subject: [PATCH 4/8] add missing 'threads' kwargs to deplete_bmtagger_bam --- taxon_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taxon_filter.py b/taxon_filter.py index 6ffd74de9..e91611175 100755 --- a/taxon_filter.py +++ b/taxon_filter.py @@ -398,7 +398,7 @@ def parser_filter_lastal(parser=argparse.ArgumentParser()): # ============================ -def deplete_bmtagger_bam(inBam, db, outBam, JVMmemory=None): +def deplete_bmtagger_bam(inBam, db, outBam, threads=None, JVMmemory=None): """ Use bmtagger to partition the input reads into ones that match at least one of the databases and ones that don't match any of the databases. From 7e097cca5dc0f4bd3b02db1e3838c760c100b907 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Mon, 2 May 2016 10:13:11 -0400 Subject: [PATCH 5/8] add check=True to run_and_print() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the ‘check’ kwarg (default False) was added to misc.py:run_and_print(), which checks the return code of the called command and raises a CalledProcessError if returncode != 0. This commit adds check=True to instances of run_and_print where run_and_print previously replaced invocations of subprocess.check_call() (but not invocations of subprocess.call()) --- assembly.py | 2 +- read_utils.py | 4 ++-- taxon_filter.py | 6 +++--- test/unit/test_taxon_filter.py | 4 ++-- tools/__init__.py | 14 ++++++++------ tools/blast.py | 2 +- tools/bmtagger.py | 2 +- tools/diamond.py | 2 +- tools/gatk.py | 2 +- tools/kraken.py | 8 ++++---- tools/mummer.py | 6 +++--- tools/muscle.py | 2 +- tools/mvicuna.py | 2 +- tools/novoalign.py | 2 +- tools/picard.py | 2 +- tools/snpeff.py | 2 +- tools/trimmomatic.py | 2 +- tools/trinity.py | 2 +- 18 files changed, 34 insertions(+), 32 deletions(-) diff --git a/assembly.py b/assembly.py index dedda2446..6f71fedaf 100755 --- a/assembly.py +++ b/assembly.py @@ -93,7 +93,7 @@ def trim_rmdup_subsamp_reads(inBam, clipDb, outBam, n_reads=100000): '-out', subsampfq[0], subsampfq[1],] - util.misc.run_and_print(cmd) + util.misc.run_and_print(cmd, check=True) os.unlink(purgefq[0]) os.unlink(purgefq[1]) diff --git a/read_utils.py b/read_utils.py index f8f6d74fe..7c5566550 100755 --- a/read_utils.py +++ b/read_utils.py @@ -44,7 +44,7 @@ def purge_unmated(inFastq1, inFastq2, outFastq1, outFastq2, regex=r'^@(\S+)/[1|2 mergeShuffledFastqSeqsPath = os.path.join(util.file.get_scripts_path(), 'mergeShuffledFastqSeqs.pl') cmdline = [mergeShuffledFastqSeqsPath, '-t', '-r', regex, '-f1', inFastq1, '-f2', inFastq2, '-o', tempOutput] log.debug(' '.join(cmdline)) - util.misc.run_and_print(cmdline) + util.misc.run_and_print(cmdline, check=True) shutil.move(tempOutput + '.1.fastq', outFastq1) shutil.move(tempOutput + '.2.fastq', outFastq2) return 0 @@ -823,7 +823,7 @@ def rmdup_prinseq_fastq(inFastq, outFastq): inFastq, '-out_bad', 'null', '-line_width', '0', '-out_good', outFastq[:-6] ] log.debug(' '.join(cmd)) - util.misc.run_and_print(cmd) + util.misc.run_and_print(cmd, check=True) def parser_rmdup_prinseq_fastq(parser=argparse.ArgumentParser()): diff --git a/taxon_filter.py b/taxon_filter.py index 6ffd74de9..26ec09d3a 100755 --- a/taxon_filter.py +++ b/taxon_filter.py @@ -137,7 +137,7 @@ def trimmomatic(inFastq1, inFastq2, pairedOutFastq1, pairedOutFastq2, clipFasta) ) log.debug(' '.join(javaCmd)) - util.misc.run_and_print(javaCmd) + util.misc.run_and_print(javaCmd, check=True) os.unlink(tmpUnpaired1) os.unlink(tmpUnpaired2) @@ -377,7 +377,7 @@ def filter_lastal( '-line_width', '0', '-out_good', outFastq[:-6] ] log.debug(' '.join(prinseqCmd)) - util.misc.run_and_print(prinseqCmd) + util.misc.run_and_print(prinseqCmd, check=True) os.unlink(filteredFastq) @@ -506,7 +506,7 @@ def partition_bmtagger(inFastq1, inFastq2, databases, outMatch=None, outNoMatch= curReads2, '-o', matchesFile ] log.debug(' '.join(cmdline)) - util.misc.run_and_print(cmdline) + util.misc.run_and_print(cmdline, check=True) prevReads1, prevReads2 = curReads1, curReads2 if count < len(databases) - 1: curReads1, curReads2 = mkstempfname(), mkstempfname() diff --git a/test/unit/test_taxon_filter.py b/test/unit/test_taxon_filter.py index 27d64b019..e6f3e1f8c 100644 --- a/test/unit/test_taxon_filter.py +++ b/test/unit/test_taxon_filter.py @@ -146,7 +146,7 @@ def test_deplete_blastn(self): refDb = os.path.join(tempDir, dbname) os.symlink(os.path.join(myInputDir, dbname), refDb) refDbs.append(refDb) - util.misc.run_and_print([makeblastdbPath, '-dbtype', 'nucl', '-in', refDb]) + util.misc.run_and_print([makeblastdbPath, '-dbtype', 'nucl', '-in', refDb], check=True) # Run deplete_blastn outFile = os.path.join(tempDir, 'out.fastq') @@ -172,7 +172,7 @@ def test_deplete_blastn_bam(self): refDb = os.path.join(tempDir, dbname) os.symlink(os.path.join(myInputDir, dbname), refDb) refDbs.append(refDb) - util.misc.run_and_print([makeblastdbPath, '-dbtype', 'nucl', '-in', refDb]) + util.misc.run_and_print([makeblastdbPath, '-dbtype', 'nucl', '-in', refDb], check=True) # convert the input fastq's to a bam inFastq1 = os.path.join(myInputDir, "in1.fastq") diff --git a/tools/__init__.py b/tools/__init__.py index 68f589044..265c7c5b6 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -248,7 +248,7 @@ def executable_path(self): @property def _package_installed(self): - result = util.misc.run_and_print(["conda", "list", "-f", "-c", "-p", self.env_path, "--json", self.package], silent=True, env=self.conda_env) + result = util.misc.run_and_print(["conda", "list", "-f", "-c", "-p", self.env_path, "--json", self.package], silent=True, check=True, env=self.conda_env) if result.returncode == 0: command_output = result.stdout.decode("UTF-8") data = json.loads(self._string_from_start_of_json(command_output)) @@ -278,7 +278,7 @@ def verify_install(self): def _attempt_install(self): try: # check for presence of conda command - util.misc.run_and_print(["conda", "-V"], silent=True, env=self.conda_env) + util.misc.run_and_print(["conda", "-V"], silent=True, check=True, env=self.conda_env) except: _log.debug("conda NOT installed; using custom tool install") self._is_attempted = True @@ -289,10 +289,10 @@ def _attempt_install(self): # conda-build is not needed for pre-built binaries from conda channels # though we may will need it in the future for custom local builds # try: - # util.misc.run_and_print(["conda", "build", "-V"], silent=True, env=self.conda_env) + # util.misc.run_and_print(["conda", "build", "-V"], silent=True, check=True, env=self.conda_env) # except: # _log.warning("conda-build must be installed; installing...") - # util.misc.run_and_print(["conda", "install", "-y", "conda-build"]) + # util.misc.run_and_print(["conda", "install", "-y", "conda-build"], check=True) # if the package is already installed, we need to check if the version is correct if self.verify_install(): @@ -320,7 +320,7 @@ def get_installed_version(self): run_cmd = ["conda", "list", "-c", "--json", "-f", "-p", self.env_path, self.package] - result = util.misc.run_and_print(run_cmd, silent=True, env=self.conda_env) + result = util.misc.run_and_print(run_cmd, silent=True, check=True, env=self.conda_env) if result.returncode == 0: try: command_output = result.stdout.decode("UTF-8") @@ -347,6 +347,7 @@ def uninstall_package(self): result = util.misc.run_and_print( run_cmd, silent=True, + check=True, env=self.conda_env) if result.returncode == 0: @@ -374,7 +375,7 @@ def install_package(self): python_version = "python=" + python_version if python_version else "" run_cmd.extend([python_version]) - result = util.misc.run_and_print(run_cmd, silent=True, env=self.conda_env) + result = util.misc.run_and_print(run_cmd, silent=True, check=True, env=self.conda_env) try: command_output = result.stdout.decode("UTF-8") data = json.loads(self._string_from_start_of_json(command_output)) @@ -393,6 +394,7 @@ def install_package(self): self._package_str ], silent=True, + check=True, env=self.conda_env, ) diff --git a/tools/blast.py b/tools/blast.py index 65d4b5532..8ac0ac8a1 100644 --- a/tools/blast.py +++ b/tools/blast.py @@ -55,7 +55,7 @@ def __init__(self, install_methods=None): super(BlastTools, self).__init__(install_methods=install_methods) def execute(self, *args): - util.misc.run_and_print(self.exec_path, args) + util.misc.run_and_print(self.exec_path, args, check=True) class BlastnTool(BlastTools): diff --git a/tools/bmtagger.py b/tools/bmtagger.py index 8f45fd91d..74f0b3219 100644 --- a/tools/bmtagger.py +++ b/tools/bmtagger.py @@ -35,7 +35,7 @@ def __init__(self, install_methods=None): tools.Tool.__init__(self, install_methods=install_methods) def execute(self, *args): - util.misc.run_and_print(self.exec_path, args) + util.misc.run_and_print(self.exec_path, args, check=True) class BmtaggerShTool(BmtaggerTools): diff --git a/tools/diamond.py b/tools/diamond.py index 086d9d37b..a1fe8b1b4 100644 --- a/tools/diamond.py +++ b/tools/diamond.py @@ -117,4 +117,4 @@ def post_download(self): env['CC'] = 'gcc-4.9' env['CXX'] = 'g++-4.9' #util.misc.run_and_print(['cmake', '..'], env=env, cwd=build_dir) - util.misc.run_and_print(['make'], env=env, cwd=build_dir) + util.misc.run_and_print(['make'], env=env, cwd=build_dir, check=True) diff --git a/tools/gatk.py b/tools/gatk.py index 79fd8d290..b4e716f5b 100644 --- a/tools/gatk.py +++ b/tools/gatk.py @@ -51,7 +51,7 @@ def execute(self, command, gatkOptions=None, JVMmemory=None): # pylint: disab '-T', command ] + list(map(str, gatkOptions)) _log.debug(' '.join(tool_cmd)) - util.misc.run_and_print(tool_cmd) + util.misc.run_and_print(tool_cmd, check=True) @staticmethod def dict_to_gatk_opts(options): diff --git a/tools/kraken.py b/tools/kraken.py index 25a41a148..4c6fd9b20 100644 --- a/tools/kraken.py +++ b/tools/kraken.py @@ -84,9 +84,9 @@ def post_download(self): os.path.join(jellyfish_dir, 'Makefile.am'), 'AM_CXXFLAGS = -g -O3', 'AM_CXXFLAGS = -g -O3 -Wno-maybe-uninitialized' ) - util.misc.run_and_print(['autoreconf', '-i'], cwd=jellyfish_dir, env=env) - util.misc.run_and_print(['./configure', '--prefix={}'.format(install_dir)], cwd=jellyfish_dir, env=env) - util.misc.run_and_print(['make', 'install'], cwd=jellyfish_dir, env=env) + util.misc.run_and_print(['autoreconf', '-i'], cwd=jellyfish_dir, env=env, check=True) + util.misc.run_and_print(['./configure', '--prefix={}'.format(install_dir)], cwd=jellyfish_dir, env=env, check=True) + util.misc.run_and_print(['make', 'install'], cwd=jellyfish_dir, env=env, check=True) class Kraken(tools.Tool): @@ -186,7 +186,7 @@ def post_download(self): shutil.move(os.path.join(self.destination_dir, KRAKEN_COMMIT_DIR), kraken_dir) libexec_dir = os.path.join(kraken_dir, 'libexec') bin_dir = os.path.join(kraken_dir, 'bin') - util.misc.run_and_print(['./install_kraken.sh', 'libexec'], cwd=kraken_dir, env=env) + util.misc.run_and_print(['./install_kraken.sh', 'libexec'], cwd=kraken_dir, env=env, check=True) util.file.mkdir_p(bin_dir) for bin_name in Kraken.BINS: libexec_bin = os.path.join(libexec_dir, bin_name) diff --git a/tools/mummer.py b/tools/mummer.py index a50c9af51..799983492 100644 --- a/tools/mummer.py +++ b/tools/mummer.py @@ -48,7 +48,7 @@ def execute(self, refFasta, qryFastas): toolCmd = [os.path.join(self.install_and_get_path(), 'mummer'), refFasta] + qryFastas log.debug(' '.join(toolCmd)) - util.misc.run_and_print(toolCmd) + util.misc.run_and_print(toolCmd, check=True) def nucmer(self, refFasta, qryFasta, outDelta, extend=None, breaklen=None, maxgap=None, minmatch=None, mincluster=None): @@ -72,7 +72,7 @@ def nucmer(self, refFasta, qryFasta, outDelta, extend=None, breaklen=None, toolCmd.extend(['--mincluster', str(mincluster)]) toolCmd.extend([refFasta, qryFasta]) log.debug(' '.join(toolCmd)) - util.misc.run_and_print(toolCmd) + util.misc.run_and_print(toolCmd, check=True) def promer(self, refFasta, qryFasta, outDelta, extend=None, breaklen=None, maxgap=None, minmatch=None, mincluster=None): @@ -96,7 +96,7 @@ def promer(self, refFasta, qryFasta, outDelta, extend=None, breaklen=None, toolCmd.extend(['--mincluster', str(mincluster)]) toolCmd.extend([refFasta, qryFasta]) log.debug(' '.join(toolCmd)) - util.misc.run_and_print(toolCmd) + util.misc.run_and_print(toolCmd, check=True) def delta_filter(self, inDelta, outDelta): toolCmd = [os.path.join(self.install_and_get_path(), 'delta-filter'), diff --git a/tools/muscle.py b/tools/muscle.py index f8f8bbd11..54fc953d6 100644 --- a/tools/muscle.py +++ b/tools/muscle.py @@ -68,7 +68,7 @@ def execute( tool_cmd.extend(('-log', logFile)) _log.debug(' '.join(tool_cmd)) - util.misc.run_and_print(tool_cmd) + util.misc.run_and_print(tool_cmd, check=True) # pylint: enable=W0221 diff --git a/tools/mvicuna.py b/tools/mvicuna.py index 69b84a232..d64102645 100644 --- a/tools/mvicuna.py +++ b/tools/mvicuna.py @@ -60,7 +60,7 @@ def rmdup(self, inPair, outPair, outUnpaired=None): outUnpaired, '-drm_op', ','.join(tmp1OutPair), '-tasks', 'DupRm' ] _log.debug(' '.join(cmdline)) - util.misc.run_and_print(cmdline) + util.misc.run_and_print(cmdline, check=True) for tmpfname, outfname in zip(tmp2OutPair, outPair): shutil.copyfile(tmpfname, outfname) diff --git a/tools/novoalign.py b/tools/novoalign.py index 29f33f5b2..2ef53b667 100644 --- a/tools/novoalign.py +++ b/tools/novoalign.py @@ -203,7 +203,7 @@ def index_fasta(self, refFasta, k=None, s=None): cmd.extend(['-s', str(s)]) cmd.extend([outfname, refFasta]) _log.debug(' '.join(cmd)) - util.misc.run_and_print(cmd) + util.misc.run_and_print(cmd, check=True) try: mode = os.stat(outfname).st_mode & ~stat.S_IXUSR & ~stat.S_IXGRP & ~stat.S_IXOTH os.chmod(outfname, mode) diff --git a/tools/picard.py b/tools/picard.py index 7e49c0e2a..ada392f21 100644 --- a/tools/picard.py +++ b/tools/picard.py @@ -57,7 +57,7 @@ def execute(self, command, picardOptions=None, JVMmemory=None): # pylint: dis self.install_and_get_path(), '-Xmx' + JVMmemory, '-Djava.io.tmpdir=' + tempfile.tempdir, command ] + picardOptions _log.debug(' '.join(tool_cmd)) - util.misc.run_and_print(tool_cmd) + util.misc.run_and_print(tool_cmd, check=True) @staticmethod def dict_to_picard_opts(options): diff --git a/tools/snpeff.py b/tools/snpeff.py index deca80017..e3cd8d4a7 100644 --- a/tools/snpeff.py +++ b/tools/snpeff.py @@ -59,7 +59,7 @@ def execute(self, command, args, JVMmemory=None, stdin=None, stdout=None): # ] + args _log.debug(' '.join(tool_cmd)) - return util.misc.run_and_print(tool_cmd, stdin=stdin, buffered=True, silent=True) + return util.misc.run_and_print(tool_cmd, stdin=stdin, buffered=True, silent=True, check=True) def has_genome(self, genome): if not self.known_dbs: diff --git a/tools/trimmomatic.py b/tools/trimmomatic.py index 111c1b6f4..431b9879f 100644 --- a/tools/trimmomatic.py +++ b/tools/trimmomatic.py @@ -26,4 +26,4 @@ def __init__(self, install_methods=None): tools.Tool.__init__(self, install_methods=install_methods) def execute(self, *args): - util.misc.run_and_print(self.exec_path, args) + util.misc.run_and_print(self.exec_path, args, check=True) diff --git a/tools/trinity.py b/tools/trinity.py index 8e30f93ed..b8f20bfe0 100644 --- a/tools/trinity.py +++ b/tools/trinity.py @@ -56,7 +56,7 @@ def execute(self, '--output', outdir ] log.debug(' '.join(cmd)) - util.misc.run_and_print(cmd) + util.misc.run_and_print(cmd, check=True) shutil.copyfile(os.path.join(outdir, 'Trinity.fasta'), outFasta) shutil.rmtree(outdir, ignore_errors=True) From 04ef65960ea8aed23b9cafc49641ac64314a8fac Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Mon, 2 May 2016 11:28:33 -0400 Subject: [PATCH 6/8] merge output and error for run() CalledProcessError() At least on Py2, signature is CalledProcessError(self, returncode, cmd, output=None) --- util/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/misc.py b/util/misc.py index 04ddf1088..a2239042a 100644 --- a/util/misc.py +++ b/util/misc.py @@ -162,7 +162,7 @@ def run(args, stdin=None, stdout=None, stderr=None, shell=False, error = '' if check and returncode != 0: raise subprocess.CalledProcessError( - returncode, b' '.join(args), output, error) + returncode, b' '.join(args), str(output)+str(error)) return CompletedProcess(args, returncode, output, error) finally: if stdout_pipe: From 554d0d145c9ff26cafb902c0e345009ddd371f4c Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Mon, 2 May 2016 14:08:05 -0400 Subject: [PATCH 7/8] fix second call issue in util/misc.py:run() --- test/unit/test_assembly.py | 4 ++-- test/unit/test_tools_picard.py | 2 +- util/misc.py | 11 ++++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index c6c4f36ab..cce4eb3af 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -236,7 +236,7 @@ def test_ebov_refine1(self): imputeFasta = util.file.mkstempfname('.imputed.fasta') refine1Fasta = util.file.mkstempfname('.refine1.fasta') shutil.copy(inFasta, imputeFasta) - tools.picard.CreateSequenceDictionaryTool().execute(imputeFasta) + tools.picard.CreateSequenceDictionaryTool().execute(imputeFasta, overwrite=True) tools.novoalign.NovoalignTool().index_fasta(imputeFasta) assembly.refine_assembly( imputeFasta, @@ -256,7 +256,7 @@ def test_ebov_refine2(self): refine1Fasta = util.file.mkstempfname('.refine1.fasta') refine2Fasta = util.file.mkstempfname('.refine2.fasta') shutil.copy(inFasta, refine1Fasta) - tools.picard.CreateSequenceDictionaryTool().execute(refine1Fasta) + tools.picard.CreateSequenceDictionaryTool().execute(refine1Fasta, overwrite=True) tools.novoalign.NovoalignTool().index_fasta(refine1Fasta) assembly.refine_assembly( refine1Fasta, diff --git a/test/unit/test_tools_picard.py b/test/unit/test_tools_picard.py index 74cc0c65b..8e2aabf31 100644 --- a/test/unit/test_tools_picard.py +++ b/test/unit/test_tools_picard.py @@ -26,7 +26,7 @@ def test_fasta_index(self): shutil.copyfile(orig_ref, inRef) outDict = inRef[:-len(ext)] + '.dict' - picard_index.execute(inRef) + picard_index.execute(inRef, overwrite=True) # the dict files will not be exactly the same, just the first 3 cols with open(outDict, 'rt') as inf: diff --git a/util/misc.py b/util/misc.py index a2239042a..f903c0559 100644 --- a/util/misc.py +++ b/util/misc.py @@ -127,7 +127,7 @@ def run(args, stdin=None, stdout=None, stderr=None, shell=False, output = subprocess.check_output( args, stdin=stdin, stderr=stderr, shell=shell, env=env, cwd=cwd) - returncode = 0 + return CompletedProcess(args, 0, output, b'') # Py3.4 doesn't have stderr attribute except subprocess.CalledProcessError as e: if check: @@ -161,8 +161,13 @@ def run(args, stdin=None, stdout=None, stderr=None, shell=False, else: error = '' if check and returncode != 0: - raise subprocess.CalledProcessError( - returncode, b' '.join(args), str(output)+str(error)) + print(output.decode("utf-8")) + try: + raise subprocess.CalledProcessError( + returncode, args, output, error) + except TypeError: # py2 CalledProcessError does not accept error + raise subprocess.CalledProcessError( + returncode, args, output.decode("utf-8")) return CompletedProcess(args, returncode, output, error) finally: if stdout_pipe: From 4920e70fc8e051ab3e051286a9c3e669b48ccda4 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Mon, 2 May 2016 14:11:05 -0400 Subject: [PATCH 8/8] flush buffer to stdout in run_and_print --- util/misc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util/misc.py b/util/misc.py index f903c0559..aa896d648 100644 --- a/util/misc.py +++ b/util/misc.py @@ -206,11 +206,13 @@ def run_and_print(args, stdout=None, stderr=None, for c in iter(process.stdout.read, b""): output.append(c) print(c.decode('utf-8'), end="") # print for py3 / p2 from __future__ + sys.stdout.flush() # flush buffer to stdout # in case there are still chars in the pipe buffer if not silent: for c in iter(lambda: process.stdout.read(1), b""): print(c, end="") + sys.stdout.flush() # flush buffer to stdout if check and process.returncode != 0: raise subprocess.CalledProcessError(process.returncode, args,