Skip to content

Commit

Permalink
Merge branch 'master' into ct-patch-uger-jobscript
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkinsc committed Oct 29, 2018
2 parents 8fa2cb3 + 8205443 commit 3beab72
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 9 deletions.
13 changes: 8 additions & 5 deletions assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def assemble_spades(
kmer_sizes=(55,65),
n_reads=10000000,
outReads=None,
mask_errors=False,
always_succeed=False,
max_kmer_sizes=1,
mem_limit_gb=8,
threads=None,
Expand All @@ -379,7 +379,7 @@ def assemble_spades(
contigs_untrusted=contigs_untrusted, contigs_trusted=contigs_trusted,
contigs_out=out_fasta, filter_contigs=filter_contigs,
min_contig_len=min_contig_len,
kmer_sizes=kmer_sizes, mask_errors=mask_errors, max_kmer_sizes=max_kmer_sizes,
kmer_sizes=kmer_sizes, always_succeed=always_succeed, max_kmer_sizes=max_kmer_sizes,
spades_opts=spades_opts, mem_limit_gb=mem_limit_gb,
threads=threads)
except subprocess.CalledProcessError as e:
Expand All @@ -403,6 +403,9 @@ def parser_assemble_spades(parser=argparse.ArgumentParser()):
parser.add_argument('--outReads', default=None, help='Save the trimmomatic/prinseq/subsamp reads to a BAM file')
parser.add_argument('--filterContigs', dest='filter_contigs', default=False, action='store_true',
help='only output contigs SPAdes is sure of (drop lesser-quality contigs from output)')
parser.add_argument('--alwaysSucceed', dest='always_succeed', default=False, action='store_true',
help='if assembly fails for any reason, output an empty contigs file, rather than failing with '
'an error code')
parser.add_argument('--minContigLen', dest='min_contig_len', type=int, default=0,
help='only output contigs longer than this many bp')
parser.add_argument('--spadesOpts', dest='spades_opts', default='', help='(advanced) Extra flags to pass to the SPAdes assembler')
Expand All @@ -415,15 +418,15 @@ def parser_assemble_spades(parser=argparse.ArgumentParser()):
__commands__.append(('assemble_spades', parser_assemble_spades))

def gapfill_gap2seq(in_scaffold, in_bam, out_scaffold, threads=None, mem_limit_gb=4, time_soft_limit_minutes=60.0,
random_seed=0, gap2seq_opts='', mask_errors=False):
random_seed=0, gap2seq_opts='', always_succeed=False):
''' This step runs the Gap2Seq tool to close gaps between contigs in a scaffold.
'''
try:
tools.gap2seq.Gap2SeqTool().gapfill(in_scaffold, in_bam, out_scaffold, gap2seq_opts=gap2seq_opts, threads=threads,
mem_limit_gb=mem_limit_gb, time_soft_limit_minutes=time_soft_limit_minutes,
random_seed=random_seed)
except Exception as e:
if not mask_errors:
if not always_succeed:
raise
log.warning('Gap-filling failed (%s); ignoring error, emulating successful run where simply no gaps were filled.')
shutil.copyfile(in_scaffold, out_scaffold)
Expand All @@ -436,7 +439,7 @@ def parser_gapfill_gap2seq(parser=argparse.ArgumentParser(description='Close gap
parser.add_argument('--memLimitGb', dest='mem_limit_gb', default=4.0, help='Max memory to use, in gigabytes %(default)s')
parser.add_argument('--timeSoftLimitMinutes', dest='time_soft_limit_minutes', default=60.0,
help='Stop trying to close more gaps after this many minutes (default: %(default)s); this is a soft/advisory limit')
parser.add_argument('--maskErrors', dest='mask_errors', default=False, action='store_true',
parser.add_argument('--maskErrors', dest='always_succeed', default=False, action='store_true',
help='In case of any error, just copy in_scaffold to out_scaffold, emulating a successful run that simply could not '
'fill any gaps.')
parser.add_argument('--gap2seqOpts', dest='gap2seq_opts', default='', help='(advanced) Extra command-line options to pass to Gap2Seq')
Expand Down
2 changes: 2 additions & 0 deletions pipes/Broad_UGER/cluster-submitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def hard_blacklist_node(node):
if mem:
# on UGER, memory requests are per-core (according to BITS as of Sept. 6, 2016)
mem_per_core = round((int(mem)*1024)/int(threads), 2)
# increase memory requested to reflect new JVM-UGER changes requiring greater memory headroom
mem_per_core = round(mem_per_core*1.1,2)
if blacklisted_nodes:
# Pass h= as the hostname parameter; it accepts a regex, so
# invert the match to blacklist hostnames
Expand Down
3 changes: 2 additions & 1 deletion pipes/WDL/workflows/demux_plus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ workflow demux_plus {
input:
assembler = "spades",
reads_unmapped_bam = deplete.cleaned_bam,
trim_clip_db = trim_clip_db
trim_clip_db = trim_clip_db,
always_succeed = true
}
}

Expand Down
5 changes: 5 additions & 0 deletions pipes/WDL/workflows/tasks/tasks_assembly.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ task assemble {
Int? spades_min_contig_len=0

String? assembler="trinity" # trinity, spades, or trinity-spades
Boolean? always_succeed=false

String cleaned_assembler = select_first([assembler, ""]) # workaround for https://gatkforums.broadinstitute.org/wdl/discussion/10462/string-type-in-output-section
# do this in two steps in case the input doesn't actually have "taxfilt" in the name
Expand All @@ -26,6 +27,7 @@ task assemble {
${trim_clip_db} \
${sample_name}.assembly1-trinity.fasta \
${'--n_reads=' + trinity_n_reads} \
${true='--alwaysSucceed' false="" always_succeed} \
--JVMmemory "$mem_in_mb"m \
--outReads=${sample_name}.subsamp.bam \
--loglevel=DEBUG
Expand All @@ -36,6 +38,7 @@ task assemble {
${trim_clip_db} \
${sample_name}.assembly1-spades.fasta \
${'--nReads=' + spades_n_reads} \
${true="--alwaysSucceed" false="" always_succeed} \
${'--minContigLen=' + spades_min_contig_len} \
--memLimitGb $mem_in_gb \
--outReads=${sample_name}.subsamp.bam \
Expand All @@ -49,13 +52,15 @@ task assemble {
${'--n_reads=' + trinity_n_reads} \
--JVMmemory "$mem_in_mb"m \
--outReads=${sample_name}.subsamp.bam \
${true='--always_succeed' false='' always_succeed} \
--loglevel=DEBUG
assembly.py assemble_spades \
${reads_unmapped_bam} \
${trim_clip_db} \
${sample_name}.assembly1-spades.fasta \
--contigsUntrusted=${sample_name}.assembly1-trinity.fasta \
${'--nReads=' + spades_n_reads} \
${true='--alwaysSucceed' false='' always_succeed} \
${'--minContigLen=' + spades_min_contig_len} \
--memLimitGb $mem_in_gb \
--loglevel=DEBUG
Expand Down
9 changes: 9 additions & 0 deletions test/unit/test_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,15 @@ def test_empty_input_succeed(self):
assembly.assemble_spades(in_bam=inBam, clip_db=clipDb, out_fasta=outFasta)
self.assertEqual(os.path.getsize(outFasta), 0)

def test_always_succeed(self):
inDir = util.file.get_test_input_path(self)
inBam = os.path.join(inDir, '..', 'almost-empty.bam')
clipDb = os.path.join(inDir, 'clipDb.fasta')
with util.file.tempfname('.fasta') as outFasta:
assembly.assemble_spades(in_bam=inBam, clip_db=clipDb, out_fasta=outFasta, spades_opts='--bad-option',
always_succeed=True)
self.assertEqual(os.path.getsize(outFasta), 0)

class TestTrimRmdupSubsamp(TestCaseWithTmp):
''' Test the trim_rmdup_subsamp command '''

Expand Down
15 changes: 12 additions & 3 deletions tools/spades.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def execute(self, args): # pylint: disable=W0221
subprocess.check_call(tool_cmd)

def assemble(self, reads_fwd, reads_bwd, contigs_out, reads_unpaired=None, contigs_trusted=None,
contigs_untrusted=None, kmer_sizes=(55,65), mask_errors=False, max_kmer_sizes=1,
contigs_untrusted=None, kmer_sizes=(55,65), always_succeed=False, max_kmer_sizes=1,
filter_contigs=False, min_contig_len=0, mem_limit_gb=8, threads=None, spades_opts=''):
'''Assemble contigs from RNA-seq reads and (optionally) pre-existing contigs.
Expand All @@ -56,7 +56,7 @@ def assemble(self, reads_fwd, reads_bwd, contigs_out, reads_unpaired=None, conti
Params:
kmer_sizes: if given, use these kmer sizes and combine the resulting contigs. kmer size of 0 or None means use size auto-selected
by SPAdes based on read length.
mask_errors: if True, if spades fails with an error for a kmer size, pretend it just produced no contigs for that kmer size
always_succeed: if True, if spades fails with an error for a kmer size, pretend it just produced no contigs for that kmer size
max_kmer_sizes: if this many kmer sizes succeed, do not try further ones
filter_contigs: if True, outputs only "long and reliable transcripts with rather high expression" (per SPAdes docs)
min_contig_len: drop contigs shorter than this many bp
Expand Down Expand Up @@ -101,12 +101,21 @@ def assemble(self, reads_fwd, reads_bwd, contigs_out, reads_unpaired=None, conti
try:
self.execute(args=args)
except Exception as e:
if mask_errors:
if always_succeed:
log.warning('SPAdes failed for k={}: {}'.format(kmer_size, e))
util.file.make_empty(transcripts_fname)
else:
raise

# work around the bug that spades may succeed yet not create the transcripts.fasta file
if not os.path.isfile(transcripts_fname):
msg = 'SPAdes failed to make transcripts.fasta for k={}'.format(kmer_size)
if always_succeed:
log.warning(msg)
util.file.make_empty(transcripts_fname)
else:
raise RuntimeError(msg)

if min_contig_len:
transcripts = Bio.SeqIO.parse(transcripts_fname, 'fasta')
transcripts_sans_short = [r for r in transcripts if len(r.seq) >= min_contig_len]
Expand Down

0 comments on commit 3beab72

Please sign in to comment.