Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
Checking mergeability… Don't worry, you can still create the pull request.
  • 3 commits
  • 3 files changed
  • 0 commit comments
  • 2 contributors
Commits on Mar 16, 2013
@martijnvermaat martijnvermaat Adhere to `strict_whitespace` in parsing column headers
Fixes parsing of sample names with space characters in `strict_whitespace`
mode. Suggested by Lee Lichtenstein and Manaswi Gupta.
4fb0c86
@martijnvermaat martijnvermaat Forgot to add test file 0fd74aa
James Casbon Merge pull request #102 from martijnvermaat/column-headers-separator
Adhere to `strict_whitespace` in parsing column headers
6280a65
View
12 vcf/parser.py
@@ -205,6 +205,11 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
if sys.version > '3':
self._reader = codecs.getreader('ascii')(self._reader)
+ if strict_whitespace:
+ self._separator = '\t'
+ else:
+ self._separator = '\t| +'
+
self.reader = (line.strip() for line in self._reader if line.strip())
#: metadata fields from header (string or hash, depending)
@@ -226,11 +231,6 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
self._parse_metainfo()
self._format_cache = {}
- if strict_whitespace:
- self._separator = '\t'
- else:
- self._separator = '\t| +'
-
def __iter__(self):
return self
@@ -275,7 +275,7 @@ def _parse_metainfo(self):
line = self.reader.next()
- fields = re.split('\t| +', line[1:])
+ fields = re.split(self._separator, line[1:])
self._column_headers = fields[:9]
self.samples = fields[9:]
self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)])
View
10 vcf/test/samples-space.vcf
@@ -0,0 +1,10 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA 00001 NA 00002 NA 00003
+20 14370 rs6054257 G A 29 PASS . GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 ./.
+20 76766 rs6054257 C T 29 PASS . GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 ./.
View
9 vcf/test/test_vcf.py
@@ -321,6 +321,14 @@ def testWrite(self):
assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line)
+class TestSamplesSpace(unittest.TestCase):
+ filename = 'samples-space.vcf'
+ samples = ['NA 00001', 'NA 00002', 'NA 00003']
+ def test_samples(self):
+ self.reader = vcf.Reader(fh(self.filename), strict_whitespace=True)
+ self.assertEqual(self.reader.samples, self.samples)
+
+
class TestRecord(unittest.TestCase):
def test_num_calls(self):
@@ -875,6 +883,7 @@ def test_trim(self):
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamplesSpace))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRegression))

No commit comments for this range

Something went wrong with that request. Please try again.