Skip to content

Commit

Permalink
Merge 9c5d75e into 028a72d
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkinsc committed Jun 7, 2019
2 parents 028a72d + 9c5d75e commit b8af188
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 3 deletions.
7 changes: 7 additions & 0 deletions errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env python

class QCError(RuntimeError):
'''Indicates a failure at a QC step.'''

def __init__(self, reason):
super(QCError, self).__init__(reason)
18 changes: 16 additions & 2 deletions pipes/WDL/workflows/tasks/tasks_taxon_filter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,11 @@ task deplete_taxa {
# level or greater for the virus of interest)
# ======================================================================
task filter_to_taxon {
File reads_unmapped_bam
File lastal_db_fasta
File reads_unmapped_bam
File lastal_db_fasta
Boolean? error_on_reads_in_neg_control = false
Int? negative_control_reads_threshold = 0
String? neg_control_prefixes_space_separated = "neg water NTC"

# do this in two steps in case the input doesn't actually have "cleaned" in the name
String bam_basename = basename(basename(reads_unmapped_bam, ".bam"), ".cleaned")
Expand All @@ -95,10 +98,21 @@ task filter_to_taxon {
# find 90% memory
mem_in_mb=`/opt/viral-ngs/source/docker/calc_mem.py mb 90`

if [[ "${error_on_reads_in_neg_control}" == "true" ]]; then
ERROR_ON_NEG_CONTROL_ARGS="--errorOnReadsInNegControl"
if [[ -n "${negative_control_reads_threshold}" ]]; then
ERROR_ON_NEG_CONTROL_ARGS="$ERROR_ON_NEG_CONTROL_ARGS ${'--negativeControlReadsThreshold=' + negative_control_reads_threshold}"
fi
if [[ -n "${neg_control_prefixes_space_separated}" ]]; then
ERROR_ON_NEG_CONTROL_ARGS="$ERROR_ON_NEG_CONTROL_ARGS ${'--negControlPrefixes=' + neg_control_prefixes_space_separated}"
fi
fi

taxon_filter.py filter_lastal_bam \
${reads_unmapped_bam} \
${lastal_db_fasta} \
${bam_basename}.taxfilt.bam \
$ERROR_ON_NEG_CONTROL_ARGS \
--JVMmemory="$mem_in_mb"m \
--loglevel=DEBUG

Expand Down
37 changes: 36 additions & 1 deletion taxon_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@
import tools.samtools
from util.file import mkstempfname
import read_utils
from errors import QCError

log = logging.getLogger(__name__)


# =======================
# *** deplete_human ***
# =======================


def parser_deplete(parser=argparse.ArgumentParser()):
parser.add_argument('inBam', help='Input BAM file.')
parser.add_argument('revertBam', nargs='?', help='Output BAM: read markup reverted with Picard.')
Expand Down Expand Up @@ -172,11 +173,15 @@ def filter_lastal_bam(
min_length_for_initial_matches=5,
max_length_for_initial_matches=50,
max_initial_matches_per_position=100,
error_on_reads_in_neg_control=False,
neg_control_prefixes=None, #set below: "neg","water","NTC"
negative_control_reads_threshold=0,
JVMmemory=None, threads=None
):
''' Restrict input reads to those that align to the given
reference database using LASTAL.
'''
neg_control_prefixes = neg_control_prefixes or ["neg","water","NTC"]

with util.file.tmp_dir('-lastdb') as tmp_db_dir:
# index db if necessary
Expand All @@ -185,6 +190,8 @@ def filter_lastal_bam(
db = lastdb.build_database(db, os.path.join(tmp_db_dir, 'lastdb'))

with util.file.tempfname('.read_ids.txt') as hitList:
number_of_hits=0

# look for lastal hits in BAM and write to temp file
with open(hitList, 'wt') as outf:
for read_id in tools.last.Lastal().get_hits(
Expand All @@ -195,8 +202,16 @@ def filter_lastal_bam(
max_initial_matches_per_position,
threads=threads
):
number_of_hits+=1
outf.write(read_id + '\n')

if error_on_reads_in_neg_control:
sample_name=os.path.basename(inBam)
if any(sample_name.lower().startswith(prefix.lower()) for prefix in neg_control_prefixes):
if number_of_hits > max(0,negative_control_reads_threshold):
log.warning("Error raised due to reads in negative control; re-run this without '--errorOnReadsInNegControl' if this execution should succeed.")
raise QCError("The sample '{}' appears to be a negative control, but it contains {} reads after filtering to desired taxa.".format(sample_name,number_of_hits))

# filter original BAM file against keep list
tools.picard.FilterSamReadsTool().execute(inBam, False, hitList, outBam, JVMmemory=JVMmemory)

Expand Down Expand Up @@ -233,6 +248,26 @@ def parser_filter_lastal_bam(parser=argparse.ArgumentParser()):
type=int,
default=100
)
parser.add_argument(
'--errorOnReadsInNegControl',
dest="error_on_reads_in_neg_control",
help='If specified, the function will return an error if there are reads after filtering for samples with names containing: (water,neg,ntc) (default: %(default)s)',
action="store_true",
)
parser.add_argument(
'--negativeControlReadsThreshold',
dest="negative_control_reads_threshold",
help='maximum number of reads (single-end) or read pairs (paired-end) to tolerate in samples identified as negative controls (default: %(default)s)',
type=int,
default=0
)
parser.add_argument(
'--negControlPrefixes',
dest="neg_control_prefixes",
default=["neg","water","NTC"],
nargs='*',
help='Bam file name prefixes to interpret as negative controls, space-separated (default: %(default)s)'
)
parser.add_argument(
'--JVMmemory',
default=tools.picard.FilterSamReadsTool.jvmMemDefault,
Expand Down

0 comments on commit b8af188

Please sign in to comment.