Permalink
Browse files

Only write FORMAT if it is in the template

Also, don't write any additional tab characters at the end of the record.
  • Loading branch information...
1 parent 10b26fc commit 9d7f44f71b817a83378b459491ae3193c4b5a170 @martijnvermaat martijnvermaat committed Feb 26, 2013
Showing with 30 additions and 5 deletions.
  1. +8 −5 vcf/parser.py
  2. +22 −0 vcf/test/test_vcf.py
View
@@ -220,6 +220,7 @@ def __init__(self, fsock=None, filename=None, compressed=False, prepend_chr=Fals
self.samples = None
self._sample_indexes = None
self._header_lines = []
+ self._column_headers = []
self._tabix = None
self._prepend_chr = prepend_chr
self._parse_metainfo()
@@ -274,7 +275,8 @@ def _parse_metainfo(self):
line = self.reader.next()
- fields = re.split('\t| +', line)
+ fields = re.split('\t| +', line[1:])
+ self._column_headers = fields[:9]
self.samples = fields[9:]
self._sample_indexes = dict([(x,i) for (i,x) in enumerate(self.samples)])
@@ -538,8 +540,6 @@ def fetch(self, chrom, start, end=None):
class Writer(object):
""" VCF Writer """
- fixed_fields = "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT".split()
-
# Reverse keys and values in header field count dictionary
counts = dict((v,k) for k,v in field_counts.iteritems())
@@ -574,13 +574,16 @@ def __init__(self, stream, template, lineterminator="\r\n"):
def _write_header(self):
# TODO: write INFO, etc
- self.writer.writerow(self.fixed_fields + self.template.samples)
+ self.stream.write('#' + '\t'.join(self.template._column_headers
+ + self.template.samples) + '\n')
def write_record(self, record):
""" write a record to the file """
ffs = self._map(str, [record.CHROM, record.POS, record.ID, record.REF]) \
+ [self._format_alt(record.ALT), record.QUAL or '.', self._format_filter(record.FILTER),
- self._format_info(record.INFO), record.FORMAT]
+ self._format_info(record.INFO)]
+ if record.FORMAT:
+ ffs.append(record.FORMAT)
samples = [self._format_sample(record.FORMAT, sample)
for sample in record.samples]
View
@@ -192,6 +192,8 @@ class Test1kg(unittest.TestCase):
def testParse(self):
reader = vcf.Reader(fh('1kg.vcf.gz', 'rb'))
+ assert 'FORMAT' in reader._column_headers
+
self.assertEqual(len(reader.samples), 629)
for _ in reader:
pass
@@ -211,10 +213,30 @@ def test_reader(self):
"""The samples attribute should be the empty list."""
reader = vcf.Reader(fh('1kg.sites.vcf', 'r'))
+ assert 'FORMAT' not in reader._column_headers
+
self.assertEqual(reader.samples, [])
for record in reader:
self.assertEqual(record.samples, [])
+ def test_writer(self):
+ """FORMAT should not be written if not present in the template and no
+ extra tab character should be printed if there are no FORMAT fields."""
+ reader = vcf.Reader(fh('1kg.sites.vcf', 'r'))
+ out = StringIO()
+ writer = vcf.Writer(out, reader, lineterminator='\n')
+
+ for record in reader:
+ writer.write_record(record)
+ out.seek(0)
+ out_str = out.getvalue()
+ for line in out_str.split('\n'):
+ if line.startswith('##'):
+ continue
+ if line.startswith('#CHROM'):
+ assert 'FORMAT' not in line
+ assert not line.endswith('\t')
+
class TestGatkOutputWriter(unittest.TestCase):

0 comments on commit 9d7f44f

Please sign in to comment.