From c41c5aa492700e38c1b929b89bf82a41158e6ce0 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Thu, 25 Jan 2018 12:14:23 -0500 Subject: [PATCH] restore SortSamTool call, add boolean to skip samtools-based sort if specified (sort by default, but not in the case where align_mem_one_rg does it via Picard --- tools/bwa.py | 60 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/tools/bwa.py b/tools/bwa.py index e79ce726c..ef88bba9d 100644 --- a/tools/bwa.py +++ b/tools/bwa.py @@ -100,6 +100,8 @@ def align_mem_bam(self, inBam, refDb, outBam, options=None, picardOptions=['SORT_ORDER=coordinate', 'USE_THREADING=true', 'CREATE_INDEX=true'], JVMmemory=JVMmemory ) + if outBam.endswith(".bam") or outBam.endswith(".cram"): + samtools.index(outBam) for bam in align_bams: os.unlink(bam) @@ -168,20 +170,44 @@ def align_mem_one_rg(self, inBam, refDb, outBam, rgid=None, options=None, assert len(readgroup_line) > 0 - # rather than reheader the alignment bam file later so it has the readgroup information - # from the original bam file, we'll pass the RG line to bwa to write out - self.mem(one_rg_inBam, refDb, outBam, options=options+['-R', - readgroup_line.rstrip("\r\n")], - min_score_to_filter=min_score_to_filter, threads=threads, invert_filter=invert_filter) - - # if there was more than one RG in the input, we had to create a temporary file with the one RG specified - # and we can safely delete it this file - # if there was only one RG in the input, we used it directly and should not delete it - if removeInput: - os.unlink(one_rg_inBam) + with util.file.tempfname('.aligned.bam') as tmp_bam_aligned: + # rather than reheader the alignment bam file later so it has the readgroup information + # from the original bam file, we'll pass the RG line to bwa to write out + self.mem(one_rg_inBam, refDb, tmp_bam_aligned, options=options+['-R', + readgroup_line.rstrip("\r\n")], + min_score_to_filter=min_score_to_filter, threads=threads, invert_filter=invert_filter, sort=False) + + # if there was more than one RG in the input, we had to create a temporary file with the one RG specified + # and we can safely delete it this file + # if there was only one RG in the input, we used it directly and should not delete it + if removeInput: + os.unlink(one_rg_inBam) + + # if the aligned bam file contains no reads after filtering + # just create an empty file + if tools.samtools.SamtoolsTool().count(tmp_bam_aligned) == 0: + util.file.touch(outBam) + else: + # samtools reheader seems to segfault on some alignments created by bwa + # so rather than reheader, BWA will write out the RG given to it via '-R' + # reheadered_bam = util.file.mkstempfname('.reheadered.bam') + # tools.samtools.SamtoolsTool().reheader(tmp_bam_aligned, headerFile, reheadered_bam) + # os.unlink(tmp_bam_aligned) + # os.unlink(headerFile) + # os.system("samtools view -h {} > /Users/tomkinsc/Desktop/test_reheader.bam".format(reheadered_bam)) + + # sort + sorter = tools.picard.SortSamTool() + sorter.execute( + tmp_bam_aligned, + outBam, + sort_order='coordinate', + picardOptions=['CREATE_INDEX=true', 'VALIDATION_STRINGENCY=SILENT'], + JVMmemory=JVMmemory + ) def mem(self, inReads, refDb, outAlign, options=None, min_score_to_filter=None, - threads=None, invert_filter=False): + threads=None, invert_filter=False, sort=True): options = [] if not options else options threads = util.misc.sanitize_thread_count(threads) @@ -202,11 +228,13 @@ def mem(self, inReads, refDb, outAlign, options=None, min_score_to_filter=None, else: aln_sam_filtered = aln_sam - samtools.sort(aln_sam_filtered, outAlign, threads=threads) + if sort: + samtools.sort(aln_sam_filtered, outAlign, threads=threads) - # cannot index sam files; only do so if a bam/cram is desired - if outAlign.endswith(".bam") or outAlign.endswith(".cram"): - samtools.index(outAlign) + # cannot index sam files; only do so if a bam/cram is desired + # sorting is required for indexing + if outAlign.endswith(".bam") or outAlign.endswith(".cram"): + samtools.index(outAlign) def filter_sam_on_alignment_score(self, in_sam, out_sam, min_score_to_filter, bwa_options, invert_filter=False):