Skip to content


Subversion checkout URL

You can clone with
Download ZIP


Fix writing of Number=A and G INFO/FORMAT fields #53

merged 4 commits into from

2 participants


Currently, the Reader stores field Numbers of A and G as negative integers, and the Writer doesn't convert them back to letters.

To make it easier to reverse the cast to integer, I changed the conversion method. Instead of converting using hard-coded if/elif, I've made a module-level dictionary for both conversion and reversal lookup. I'm using if/else because I've read that it's faster than try/except if the check is expected to fail over half the time, and I put if not first because it's more likely.

For writing the header fields, I switched to use str.format() (instead of C-style) and added a method that uses the conversion dictionary to switch the integers back to the correct letters.

In the third commit I removed the '.' if None str() _mapped to each line. The dictionary handles None to '.' for the Number field. As far as I can tell from the spec, only Number is allowed to have a value of '.' -- if this is incorrect this commit can be skipped.


Added another commit to write a None alt as '.'

Current logic is str(x) or '.' but str(None) evaluates to 'None' which is True.

@jamescasbon jamescasbon merged commit 880ce55 into jamescasbon:master
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
Showing with 30 additions and 16 deletions.
  1. +30 −16 vcf/
46 vcf/
@@ -43,6 +43,13 @@
+# Conversion between value in file and Python value
+field_counts = {
+ '.': None, # Unknown number of values
+ 'A': -1, # Equal to the number of alleles in a given record
+ 'G': -2, # Equal to the number of genotypes in a given record
_Info = collections.namedtuple('Info', ['id', 'num', 'type', 'desc'])
_Filter = collections.namedtuple('Filter', ['id', 'desc'])
@@ -162,18 +169,12 @@ def __init__(self):
self.meta_pattern = re.compile(r'''##(?P<key>.+?)=(?P<val>.+)''')
def vcf_field_count(self, num_str):
- if num_str == '.':
- # Unknown number of values
- return None
- elif num_str == 'A':
- # Equal to the number of alleles in a given record
- return -1
- elif num_str == 'G':
- # Equal to the number of genotypes in a given record
- return -2
- else:
+ """Cast vcf header numbers to integer or None"""
+ if num_str not in field_counts:
# Fixed, specified number
return int(num_str)
+ else:
+ return field_counts[num_str]
def read_info(self, info_string):
'''Read a meta-information INFO line.'''
@@ -975,20 +976,26 @@ class Writer(object):
+ # Reverse keys and values in header field count dictionary
+ counts = dict((v,k) for k,v in field_counts.iteritems())
def __init__(self, stream, template):
self.writer = csv.writer(stream, delimiter="\t")
self.template = template
+ two = '##{key}=<ID={0},Description="{1}">\n'
+ four = '##{key}=<ID={0},Number={num},Type={2},Description="{3}">\n'
+ _num = self._fix_field_count
for line in template.metadata.iteritems():
- stream.write('##%s=%s\n' % line)
+ stream.write('##{0}={1}\n'.format(*line))
for line in template.infos.itervalues():
- stream.write('##INFO=<ID=%s,Number=%s,Type=%s,Description="%s">\n' % tuple(self._map(str, line)))
+ stream.write(four.format(key="INFO", *line, num=_num(line.num)))
for line in template.formats.itervalues():
- stream.write('##FORMAT=<ID=%s,Number=%s,Type=%s,Description="%s">\n' % tuple(self._map(str, line)))
+ stream.write(four.format(key="FORMAT", *line, num=_num(line.num)))
for line in template.filters.itervalues():
- stream.write('##FILTER=<ID=%s,Description="%s">\n' % tuple(self._map(str, line)))
+ stream.write(two.format(key="FILTER", *line))
for line in template.alts.itervalues():
- stream.write('##ALT=<ID=%s,Description="%s">\n' % tuple(self._map(str, line)))
+ stream.write(two.format(key="ALT", *line))
@@ -1006,8 +1013,15 @@ def write_record(self, record):
for sample in record.samples]
self.writer.writerow(ffs + samples)
+ def _fix_field_count(self, num_str):
+ """Restore header number to original state"""
+ if num_str not in self.counts:
+ return num_str
+ else:
+ return self.counts[num_str]
def _format_alt(self, alt):
- return ','.join([str(x) or '.' for x in alt])
+ return ','.join(self._map(str, alt))
def _format_info(self, info):
if not info:
Something went wrong with that request. Please try again.