Skip to content
This repository
Browse code

Merge pull request #102 from martijnvermaat/column-headers-separator

Adhere to `strict_whitespace` in parsing column headers
  • Loading branch information...
commit 6280a655b40d7f0192a5f30eaa6559d6d19d0b43 2 parents ac099c0 + 0fd74aa
James Casbon authored
12 vcf/parser.py
@@ -205,6 +205,11 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
205 205 if sys.version > '3':
206 206 self._reader = codecs.getreader('ascii')(self._reader)
207 207
  208 + if strict_whitespace:
  209 + self._separator = '\t'
  210 + else:
  211 + self._separator = '\t| +'
  212 +
208 213 self.reader = (line.strip() for line in self._reader if line.strip())
209 214
210 215 #: metadata fields from header (string or hash, depending)
@@ -226,11 +231,6 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
226 231 self._parse_metainfo()
227 232 self._format_cache = {}
228 233
229   - if strict_whitespace:
230   - self._separator = '\t'
231   - else:
232   - self._separator = '\t| +'
233   -
234 234 def __iter__(self):
235 235 return self
236 236
@@ -275,7 +275,7 @@ def _parse_metainfo(self):
275 275
276 276 line = self.reader.next()
277 277
278   - fields = re.split('\t| +', line[1:])
  278 + fields = re.split(self._separator, line[1:])
279 279 self._column_headers = fields[:9]
280 280 self.samples = fields[9:]
281 281 self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)])
10 vcf/test/samples-space.vcf
... ... @@ -0,0 +1,10 @@
  1 +##fileformat=VCFv4.0
  2 +##FILTER=<ID=q10,Description="Quality below 10">
  3 +##FILTER=<ID=s50,Description="Less than 50% of samples have data">
  4 +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
  5 +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
  6 +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
  7 +##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
  8 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA 00001 NA 00002 NA 00003
  9 +20 14370 rs6054257 G A 29 PASS . GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 ./.
  10 +20 76766 rs6054257 C T 29 PASS . GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 ./.
9 vcf/test/test_vcf.py
@@ -321,6 +321,14 @@ def testWrite(self):
321 321 assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line)
322 322
323 323
  324 +class TestSamplesSpace(unittest.TestCase):
  325 + filename = 'samples-space.vcf'
  326 + samples = ['NA 00001', 'NA 00002', 'NA 00003']
  327 + def test_samples(self):
  328 + self.reader = vcf.Reader(fh(self.filename), strict_whitespace=True)
  329 + self.assertEqual(self.reader.samples, self.samples)
  330 +
  331 +
324 332 class TestRecord(unittest.TestCase):
325 333
326 334 def test_num_calls(self):
@@ -875,6 +883,7 @@ def test_trim(self):
875 883 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))
876 884 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
877 885 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
  886 +suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamplesSpace))
878 887 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
879 888 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))
880 889 suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRegression))

0 comments on commit 6280a65

Please sign in to comment.
Something went wrong with that request. Please try again.