Skip to content

Commit

Permalink
index bam before calling samtools idxstats; warn user if input lacks …
Browse files Browse the repository at this point in the history
…reads (#102)

* index bam before calling samtools idxstats; warn user if input lacks reads

in `read_utils.py::minimap2_idxstats()`, warn the user if the input bam file lacks reads, and index the post-alignment post-filtering bam before calling `samtools idxstats` so `minimap2_idxstats()` succeeds if the input bam lacks reads. The minimap2 wrapper, `tools/minimap2.py::Minimap2::align_bam()`, tolerates empty input and indexes bam/cram output, but the post-filtering final bam in minimap2_idxstats was not being indexed, which led to a problematic exit-on-failure condition in a WDL command invoking the function. Also fix import of the InvalidBamHeaderError class used in several tool modules (add it to errors.py, and import the error subclasses where needed).
This also runs minimap2 alignment when a read group lacks reads, since the newer version of minimap2 seems to better tolerate empty RGs. This allows samtools idxstats to yield zeros across the board for all input sequences when no reads are present, rather than only emitting the catchall "*" (which can cause issues downstream where metrics are expected)

* add pandas to deps

* attempt to fix coverage reporting due to breaking changes in coveralls reporter

see:
coverallsapp/coverage-reporter#124
coverallsapp/github-action#205
additional refs:
https://github.com/coverallsapp/github-action?tab=readme-ov-file
https://github.com/coverallsapp/coverage-reporter#supported-coverage-report-formats
  • Loading branch information
tomkinsc committed Jun 10, 2024
1 parent 853bea1 commit d176365
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 14 deletions.
6 changes: 5 additions & 1 deletion errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ class QCError(RuntimeError):
'''Indicates a failure at a QC step.'''

def __init__(self, reason):
super(QCError, self).__init__(reason)
super(QCError, self).__init__(reason)

class InvalidBamHeaderError(ValueError):
'''Indicates a malformed or otherwise unusable bam file header'''
pass
7 changes: 7 additions & 0 deletions read_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,9 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None,
ref_indexed = util.file.mkstempfname('.reference.fasta')
shutil.copyfile(refFasta, ref_indexed)

if samtools.isEmpty(inBam):
log.warning("The input bam file appears to have zero reads: %s", inBam)

mm2.align_bam(inBam, ref_indexed, bam_aligned)

if filterReadsAfterAlignment:
Expand All @@ -1401,6 +1404,10 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None,
shutil.move(bam_aligned, bam_filtered)

if outStats is not None:
# index the final bam before calling idxstats
# but only if it is a bam or cram file (sam cannot be indexed)
if (bam_filtered.endswith(".bam") or bam_filtered.endswith(".cram")):
samtools.index(bam_filtered)
samtools.idxstats(bam_filtered, outStats)

if outBam is None:
Expand Down
1 change: 1 addition & 0 deletions tools/bwa.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import tools.picard
import util.file
import util.misc
from errors import *

TOOL_NAME = 'bwa'

Expand Down
7 changes: 6 additions & 1 deletion tools/minimap2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import tools.picard
import util.file
import util.misc
from errors import *

TOOL_NAME = 'minimap2'

Expand Down Expand Up @@ -80,6 +81,7 @@ def align_bam(self, inBam, refDb, outBam, options=None,
log.warning("No alignment output for RG %s in file %s against %s", rg, inBam, refDb)

if len(align_bams)==0:
log.warning("All read groups in file %s appear to be empty.", inBam)
with util.file.tempfname('.empty.sam') as empty_sam:
samtools.dumpHeader(inBam, empty_sam)
samtools.sort(empty_sam, outBam)
Expand All @@ -92,6 +94,7 @@ def align_bam(self, inBam, refDb, outBam, options=None,
picardOptions=picardOptions,
JVMmemory=JVMmemory
)

for bam in align_bams:
os.unlink(bam)

Expand Down Expand Up @@ -181,8 +184,10 @@ def align_one_rg(self, inBam, refDb, outBam, rgid=None, preset=None, options=Non

# perform actual alignment
if samtools.isEmpty(one_rg_inBam):
log.warning("Input file %s appears to lack reads for RG '%s'", inBam, rgid)
# minimap doesn't like empty inputs, so copy empty bam through
samtools.sort(one_rg_inBam, outBam)
# samtools.sort(one_rg_inBam, outBam)
self.align_cmd(one_rg_inBam, refDb, outBam, options=options, threads=threads)
else:
self.align_cmd(one_rg_inBam, refDb, outBam, options=options, threads=threads)

Expand Down
19 changes: 7 additions & 12 deletions tools/novoalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@
either in $PATH or $NOVOALIGN_PATH.
'''

import tools
import tools.picard
import tools.samtools
import util.file
import util.misc

import logging
import os
import os.path
Expand All @@ -21,11 +15,16 @@
import stat
import sys

import tools
import tools.picard
import tools.samtools
import util.file
import util.misc
from errors import *

_log = logging.getLogger(__name__)

TOOL_NAME = "novoalign"
#TOOL_VERSION = "3.09.04"


class NovoalignTool(tools.Tool):

Expand Down Expand Up @@ -217,7 +216,3 @@ def index_fasta(self, refFasta, k=None, s=None):
os.chmod(outfname, mode)
except (IOError, OSError):
_log.warning('could not chmod "%s", this is likely OK', refFasta)


class InvalidBamHeaderError(ValueError):
pass

0 comments on commit d176365

Please sign in to comment.