Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Correctly write meta lines with dictionary value #84

Merged
merged 2 commits into from

2 participants

@martijnvermaat
Collaborator

Write meta lines with a dictionary-like value as

##meta=<field=value,field=value,...>

instead of as the Python dictionary string representation. This is a
fix for jamescasbon#83 and a generalization of jamescasbon#81. A
regression compared to jamescasbon#81 is that the order of fields in
a contig line is no longer defined.

The implementation still feels a bit hackish and there are probably other unknown cases not handled properly, but this should fix a bunch.

martijnvermaat added some commits
@martijnvermaat martijnvermaat Correctly write meta lines with dictionary value
Write meta lines with a dictionary-like value as

    ##meta=<field=value,field=value,...>

instead of as the Python dictionary string representation. This is a
fix for jamescasbon#83 and a generalization of jamescasbon#81. A
regression compared to jamescasbon#81 is that the order of fields in
a `contig` line is no longer defined.
9d43fa9
@martijnvermaat martijnvermaat Preserve order in meta lines with dictionary value 1225561
@martijnvermaat
Collaborator

I think parsing values surrounded by quotes in these kind of dictionaries (e.g. "Tumor" in the unit test) would ideally make it just a Python string, without the quotes, and numerical values should also be typed as such. But that's probably opening a whole other can of worms.

It is however only by this missing functionality that we now get 100% match between input and output (quotes are only written if they were originally present).

@jamescasbon jamescasbon merged commit 9bb2a04 into from
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jan 10, 2013
  1. @martijnvermaat

    Correctly write meta lines with dictionary value

    martijnvermaat authored
    Write meta lines with a dictionary-like value as
    
        ##meta=<field=value,field=value,...>
    
    instead of as the Python dictionary string representation. This is a
    fix for jamescasbon#83 and a generalization of jamescasbon#81. A
    regression compared to jamescasbon#81 is that the order of fields in
    a `contig` line is no longer defined.
  2. @martijnvermaat
This page is out of date. Refresh to see the latest.
Showing with 28 additions and 5 deletions.
  1. +5 −4 vcf/parser.py
  2. +23 −1 vcf/test/test_vcf.py
View
9 vcf/parser.py
@@ -158,7 +158,7 @@ def read_meta_hash(self, meta_string):
# Removing initial hash marks and final equal sign
key = items[0][2:-1]
hashItems = items[1].split(',')
- val = dict(item.split("=") for item in hashItems)
+ val = OrderedDict(item.split("=") for item in hashItems)
return key, val
def read_meta(self, meta_string):
@@ -549,14 +549,15 @@ def __init__(self, stream, template, lineterminator="\r\n"):
two = '##{key}=<ID={0},Description="{1}">\n'
four = '##{key}=<ID={0},Number={num},Type={2},Description="{3}">\n'
- contig_format = '##contig=<ID={ID},length={length},assembly={assembly}>\n'
_num = self._fix_field_count
for (key, vals) in template.metadata.iteritems():
if key in SINGULAR_METADATA:
vals = [vals]
for val in vals:
- if key == "contig":
- stream.write(contig_format.format(**val))
+ if isinstance(val, dict):
+ values = ','.join('{0}={1}'.format(key, value)
+ for key, value in val.items())
+ stream.write('##{0}=<{1}>\n'.format(key, values))
else:
stream.write('##{0}={1}\n'.format(key, val))
for line in template.infos.itervalues():
View
24 vcf/test/test_vcf.py
@@ -221,7 +221,7 @@ def testWrite(self):
out_str = out.getvalue()
for line in out_str.split("\n"):
if line.startswith("##contig"):
- assert "<ID=" in line, "Found dictionary in contig line: {0}".format(line)
+ assert line.startswith('##contig=<'), "Found dictionary in contig line: {0}".format(line)
print (out_str)
reader2 = vcf.Reader(out)
@@ -257,6 +257,27 @@ def testWrite(self):
self.assertEquals(l.samples, r.samples)
+class TestWriterDictionaryMeta(unittest.TestCase):
+
+ def testWrite(self):
+
+ reader = vcf.Reader(fh('example-4.1-bnd.vcf'))
+ out = StringIO()
+ writer = vcf.Writer(out, reader)
+
+ records = list(reader)
+
+ for record in records:
+ writer.write_record(record)
+ out.seek(0)
+ out_str = out.getvalue()
+ for line in out_str.split("\n"):
+ if line.startswith("##PEDIGREE"):
+ self.assertEquals(line, '##PEDIGREE=<Derived="Tumor",Original="Germline">')
+ if line.startswith("##SAMPLE"):
+ assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line)
+
+
class TestRecord(unittest.TestCase):
def test_num_calls(self):
@@ -789,6 +810,7 @@ def test_trim(self):
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutput))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutputWriter))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutputWriter))
+suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriterDictionaryMeta))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestTabix))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter))
Something went wrong with that request. Please try again.