Skip to content

Commit

Permalink
Merge pull request #197 from broadinstitute/sort_vphaser_input
Browse files Browse the repository at this point in the history
vphaser_one_sample now sorts bam files, if unsorted, prior to processing
  • Loading branch information
tomkinsc committed Jan 19, 2016
2 parents d240b24 + b92ccc3 commit 162d129
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 7 deletions.
31 changes: 24 additions & 7 deletions intrahost.py
Expand Up @@ -109,19 +109,28 @@ def vphaser_one_sample(inBam, inConsFasta, outTab, vphaserNumThreads=None,
sequence/chrom name, and library counts and p-values appended to
the counts for each allele.
'''
samtoolsTool = SamtoolsTool()

if minReadsEach is not None and minReadsEach < 0:
raise Exception('minReadsEach must be at least 0.')

bamToProcess = inBam
sorted_bam_file = inBam
if not bam_is_sorted(inBam):
sorted_bam_file = util.file.mkstempfname('.mapped-sorted.bam')
sorted_bam_file_tmp = util.file.mkstempfname('.mapped-sorted.bam')
samtoolsTool.sort(args=['-T', sorted_bam_file_tmp], inFile=inBam, outFile=sorted_bam_file)

bam_to_process = sorted_bam_file
if removeDoublyMappedReads:
bamToProcess = util.file.mkstempfname('.mapped-withdoublymappedremoved.bam')
samtoolsTool = SamtoolsTool()
samtoolsTool.removeDoublyMappedReads(inBam, bamToProcess)
samtoolsTool.index(bamToProcess)
variantIter = Vphaser2Tool().iterate(bamToProcess, vphaserNumThreads)
bam_to_process = util.file.mkstempfname('.mapped-withdoublymappedremoved.bam')

samtoolsTool.removeDoublyMappedReads(sorted_bam_file, bam_to_process)
samtoolsTool.index(bam_to_process)

variantIter = Vphaser2Tool().iterate(bam_to_process, vphaserNumThreads)
filteredIter = filter_strand_bias(variantIter, minReadsEach, maxBias)

libraryFilteredIter = compute_library_bias(filteredIter, bamToProcess, inConsFasta)
libraryFilteredIter = compute_library_bias(filteredIter, bam_to_process, inConsFasta)
with util.file.open_or_gzopen(outTab, 'wt') as outf:
for row in libraryFilteredIter:
outf.write('\t'.join(row) + '\n')
Expand Down Expand Up @@ -1104,6 +1113,14 @@ def main_iSNP_per_patient(args):
# ===============[ Utility functions ]================


def bam_is_sorted(bam_file_path):
# Should perhaps be in samtools.py once it moves to pysam
samfile = pysam.AlignmentFile(bam_file_path, "rb")
if "HD" in samfile.header and "SO" in samfile.header["HD"]:
return samfile.header["HD"]["SO"] == "unsorted"
else:
raise KeyError("Could not locate the SO field in the SAM/BAM file header.")

def sampleIDMatch(inputString):
"""
Given a sample name in the form of [sample] or [sample]-#,
Expand Down
5 changes: 5 additions & 0 deletions tools/samtools.py
Expand Up @@ -72,6 +72,11 @@ def view(self, args, inFile, outFile, regions=None):

self.execute('view', args + ['-o', outFile, inFile] + regions)

def sort(self, inFile, outFile, args=None):
args = args or []

self.execute('sort', args + ['-o', outFile, inFile])

def merge(self, inFiles, outFile, options=None):
"Merge a list of inFiles to create outFile."
options = options or ['-f']
Expand Down

0 comments on commit 162d129

Please sign in to comment.