forked from jdoughertyii/PyVCF
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix for issue #140, add vcf_record_sort_key arg #143
Closed
Closed
Changes from 1 commit
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
da0d7c0
Fix for issue #140, add vcf_record_sort_key arg
datagram a7097fe
Fixed spacing and wrapping in utils.py, removed test for old walk_tog…
be60cd2
Fixed edge case where all inputs are empty, simplified logic
6a204d0
finished fixing edge case where 'other' is None
1067a50
Test data for testing the fix for issue #140
a7da0a5
Added tests for walk_together with more complex inputs
10d9774
Added check for consistant chromosome ordering
File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,51 +2,50 @@ | |
Utilities for VCF files. | ||
""" | ||
|
||
import operator | ||
def walk_together(*readers, **kwargs): | ||
""" Simultaneously iteratate two or more VCF readers and return | ||
lists of concurrent records from each | ||
reader, with None if no record present. Caller must check the | ||
inputs are sorted in the same way and use the same reference | ||
otherwise behaviour is undefined. | ||
|
||
Args: | ||
vcf_record_sort_key: function that takes a VCF record and returns a tuple that can be used as the key for comparing and sorting VCF records across all given VCFReaders. The tuple's 1st element should be the contig name. | ||
""" | ||
if 'vcf_record_sort_key' in kwargs: | ||
get_key = kwargs['vcf_record_sort_key'] | ||
else: | ||
get_key = lambda r: (r.CHROM, r.POS) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The default should include REF and ALT |
||
|
||
nexts = [] | ||
for reader in readers: | ||
try: | ||
nexts.append(reader.next()) | ||
except StopIteration: | ||
nexts.append(None) | ||
|
||
min_k = (None,) # keep track of the previous min key's contig | ||
while True: | ||
kdict = {i: get_key(x) for i,x in enumerate(nexts) if x is not None} | ||
keys_with_prev_contig = [k for k in kdict.values() if k[0] == min_k[0]] | ||
if any(keys_with_prev_contig): | ||
# finish all records from previous contig | ||
min_k = min(keys_with_prev_contig) | ||
else: | ||
# move on to the next contig | ||
min_k = min(kdict.values()) | ||
|
||
min_k_idxs = set([i for i, k in kdict.items() if k == min_k]) | ||
yield [nexts[i] if i in min_k_idxs else None for i in range(len(nexts))] | ||
|
||
def walk_together(*readers, **kwargs): | ||
""" Simultaneously iteratate two or more VCF readers and return | ||
lists of concurrent records from each | ||
reader, with None if no record present. Caller must check the | ||
inputs are sorted in the same way and use the same reference | ||
otherwise behaviour is undefined. | ||
""" | ||
# if defined, custom equality functions must take the same arguments | ||
# as operator.eq | ||
if 'eq_func' in kwargs: | ||
eq_func = kwargs['eq_func'] | ||
# by default, we use the equality operator (==), which compares | ||
# equality in CHROM, POS, REF, and ALT | ||
else: | ||
eq_func = operator.eq | ||
|
||
# if one of the VCFs has no records, StopIteration is | ||
# raised immediately, so we need to check for that and | ||
# deal appropriately | ||
nexts = [] | ||
for reader in readers: | ||
try: | ||
nexts.append(reader.next()) | ||
except StopIteration: | ||
nexts.append(None) | ||
|
||
while True: | ||
min_next = min([x for x in nexts if x is not None]) | ||
|
||
yield [x if x is None or eq_func(x, min_next) else None for x in nexts] | ||
|
||
# update nexts that we just yielded | ||
for i, n in enumerate(nexts): | ||
|
||
if n is not None and eq_func(n, min_next): | ||
try: | ||
nexts[i] = readers[i].next() | ||
except StopIteration: | ||
nexts[i] = None | ||
|
||
if all([x is None for x in nexts]): | ||
break | ||
for i in min_k_idxs: | ||
try: | ||
nexts[i] = readers[i].next() | ||
except StopIteration: | ||
nexts[i] = None | ||
|
||
if all([x is None for x in nexts]): | ||
break | ||
|
||
|
||
def trim_common_suffix(*sequences): | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This docstring is over indented.