Skip to content

Commit

Permalink
Merge pull request jamescasbon#53 from lennax/lenna
Browse files Browse the repository at this point in the history
Fix writing of Number=A and G INFO/FORMAT fields
  • Loading branch information
James Casbon committed Jun 25, 2012
2 parents e893ee2 + 4b36012 commit cce7abb
Showing 1 changed file with 30 additions and 16 deletions.
46 changes: 30 additions & 16 deletions vcf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@
'AHAP':'Integer'
}

# Conversion between value in file and Python value
field_counts = {
'.': None, # Unknown number of values
'A': -1, # Equal to the number of alleles in a given record
'G': -2, # Equal to the number of genotypes in a given record
}


_Info = collections.namedtuple('Info', ['id', 'num', 'type', 'desc'])
_Filter = collections.namedtuple('Filter', ['id', 'desc'])
Expand Down Expand Up @@ -182,18 +189,12 @@ def __init__(self):
self.meta_pattern = re.compile(r'''##(?P<key>.+?)=(?P<val>.+)''')

def vcf_field_count(self, num_str):
if num_str == '.':
# Unknown number of values
return None
elif num_str == 'A':
# Equal to the number of alleles in a given record
return -1
elif num_str == 'G':
# Equal to the number of genotypes in a given record
return -2
else:
"""Cast vcf header numbers to integer or None"""
if num_str not in field_counts:
# Fixed, specified number
return int(num_str)
else:
return field_counts[num_str]

def read_info(self, info_string):
'''Read a meta-information INFO line.'''
Expand Down Expand Up @@ -996,20 +997,26 @@ class Writer(object):

fixed_fields = "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT".split()

# Reverse keys and values in header field count dictionary
counts = dict((v,k) for k,v in field_counts.iteritems())

def __init__(self, stream, template):
self.writer = csv.writer(stream, delimiter="\t")
self.template = template

two = '##{key}=<ID={0},Description="{1}">\n'
four = '##{key}=<ID={0},Number={num},Type={2},Description="{3}">\n'
_num = self._fix_field_count
for line in template.metadata.iteritems():
stream.write('##%s=%s\n' % line)
stream.write('##{0}={1}\n'.format(*line))
for line in template.infos.itervalues():
stream.write('##INFO=<ID=%s,Number=%s,Type=%s,Description="%s">\n' % tuple(self._map(str, line)))
stream.write(four.format(key="INFO", *line, num=_num(line.num)))
for line in template.formats.itervalues():
stream.write('##FORMAT=<ID=%s,Number=%s,Type=%s,Description="%s">\n' % tuple(self._map(str, line)))
stream.write(four.format(key="FORMAT", *line, num=_num(line.num)))
for line in template.filters.itervalues():
stream.write('##FILTER=<ID=%s,Description="%s">\n' % tuple(self._map(str, line)))
stream.write(two.format(key="FILTER", *line))
for line in template.alts.itervalues():
stream.write('##ALT=<ID=%s,Description="%s">\n' % tuple(self._map(str, line)))
stream.write(two.format(key="ALT", *line))

self._write_header()

Expand All @@ -1027,8 +1034,15 @@ def write_record(self, record):
for sample in record.samples]
self.writer.writerow(ffs + samples)

def _fix_field_count(self, num_str):
"""Restore header number to original state"""
if num_str not in self.counts:
return num_str
else:
return self.counts[num_str]

def _format_alt(self, alt):
return ','.join([str(x) or '.' for x in alt])
return ','.join(self._map(str, alt))

def _format_info(self, info):
if not info:
Expand Down

0 comments on commit cce7abb

Please sign in to comment.