Skip to content

Commit

Permalink
Merge pull request #264 from emedgene/master
Browse files Browse the repository at this point in the history
  • Loading branch information
martijnvermaat committed Feb 5, 2017
2 parents c15c89d + a2f4a44 commit 0711a91
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 4 deletions.
4 changes: 2 additions & 2 deletions vcf/cparse.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from model import _Call

cdef _map(func, iterable, bad='.'):
cdef _map(func, iterable, bad=['.', '']):
'''``map``, but make bad values None.'''
return [func(x) if x != bad else None
return [func(x) if x not in bad else None
for x in iterable]

INTEGER = 'Integer'
Expand Down
4 changes: 2 additions & 2 deletions vcf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,9 @@ def _parse_metainfo(self):
self.samples = fields[9:]
self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)])

def _map(self, func, iterable, bad='.'):
def _map(self, func, iterable, bad=['.', '']):
'''``map``, but make bad values None.'''
return [func(x) if x != bad else None
return [func(x) if x not in bad else None
for x in iterable]

def _parse_filter(self, filt_str):
Expand Down
14 changes: 14 additions & 0 deletions vcf/test/bad-info-character.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
##fileformat=VCFv4.1
##INFO=<ID=EMPTY_1,Number=1,Type=Float,Description="A floating point value">
##INFO=<ID=EMPTY_3,Number=3,Type=Float,Description="Floating point values">
##INFO=<ID=EMPTY_N,Number=.,Type=Float,Description="Floating point values">
##INFO=<ID=DOT_1,Number=1,Type=Character,Description="A character value">
##INFO=<ID=DOT_3,Number=3,Type=Character,Description="Character values">
##INFO=<ID=DOT_N,Number=.,Type=Character,Description="Character values">
##INFO=<ID=NOTEMPTY_1,Number=1,Type=Float,Description="A floating point value">
##INFO=<ID=NOTEMPTY_3,Number=3,Type=Float,Description="Floating point values">
##INFO=<ID=NOTEMPTY_N,Number=.,Type=Float,Description="Floating point values">
##INFO=<ID=FLAG,Number=0,Type=Flag,Description="HapMap2 membership">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample
chr1 100 id1 G A . . FLAG;EMPTY_1=;EMPTY_3=;EMPTY_N=;DOT_1=.;DOT_3=.,.,.;DOT_N=.;NOTEMPTY_1=1;NOTEMPTY_3=1,2,3;NOTEMPTY_N=1 GT 0/1
19 changes: 19 additions & 0 deletions vcf/test/test_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,24 @@ def test_write(self):
self.assertEquals(l.INFO, r.INFO)


class TestBadInfoFields(unittest.TestCase):
def test_parse(self):
reader = vcf.Reader(fh('bad-info-character.vcf'))
record = next(reader)
self.assertEquals(record.INFO['DOT_1'], None)
self.assertEquals(record.INFO['DOT_3'], [None, None, None])
self.assertEquals(record.INFO['DOT_N'], [None])
self.assertEquals(record.INFO['EMPTY_1'], None)
# Perhaps EMPTY_3 should yield [None, None, None] but this is really a
# cornercase of unspecified behaviour.
self.assertEquals(record.INFO['EMPTY_3'], [None])
self.assertEquals(record.INFO['EMPTY_N'], [None])
self.assertEquals(record.INFO['NOTEMPTY_1'], 1)
self.assertEquals(record.INFO['NOTEMPTY_3'], [1, 2, 3])
self.assertEquals(record.INFO['NOTEMPTY_N'], [1])
pass


class TestParseMetaLine(unittest.TestCase):
def test_parse(self):
reader = vcf.Reader(fh('parse-meta-line.vcf'))
Expand Down Expand Up @@ -1724,3 +1742,4 @@ def test_strelka(self):
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGATKMeta))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUncalledGenotypes))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestStrelka))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBadInfoFields))

0 comments on commit 0711a91

Please sign in to comment.