Skip to content

Commit

Permalink
change hash function to something faster
Browse files Browse the repository at this point in the history
avoids use of CRAM sequence and base-quals which otherwise do
not need to be decoded.

This changes the run-time of one test-case from

20.8 seconds to 14.9
  • Loading branch information
brentp committed Sep 10, 2018
1 parent 19aa65f commit b43902c
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
3 changes: 2 additions & 1 deletion svtyper/classic.py
Expand Up @@ -123,7 +123,8 @@ def sv_genotype(bam_string,
if b.endswith('.bam'):
bam_list.append(pysam.AlignmentFile(b, mode='rb'))
elif b.endswith('.cram'):
bam_list.append(pysam.AlignmentFile(b, mode='rc', reference_filename=ref_fasta))
bam_list.append(pysam.AlignmentFile(b,
mode='rc',reference_filename=ref_fasta,format_options=["required_fields=7167"]))
else:
sys.stderr.write('Error: %s is not a valid alignment file (*.bam or *.cram)\n' % b)
exit(1)
Expand Down
5 changes: 4 additions & 1 deletion svtyper/parsers.py
Expand Up @@ -717,6 +717,9 @@ def close(self):
# from a single molecule
# ==================================================

def rhash(r):
return hash((r.query_name, r.flag))

class SamFragment(object):
def __init__(self, read, lib):
self.lib = lib
Expand All @@ -738,7 +741,7 @@ def is_primary(self, read):

def add_read(self, read):
# ensure we don't add the same read twice
read_hash = read.__hash__()
read_hash = rhash(read)
if read_hash in self.read_set:
return
else:
Expand Down

0 comments on commit b43902c

Please sign in to comment.