From d9c78ca590fc79e53aad664aa2fc202117a906f9 Mon Sep 17 00:00:00 2001 From: Eric Talevich Date: Mon, 26 Aug 2013 13:15:25 -0700 Subject: [PATCH] NewickIO: parse branch lengths in scientific notation (Issue #220) Included a unit test for it, too. Thanks to mathli for reporting this bug and offering a fix. --- Bio/Phylo/NewickIO.py | 17 +++++++++-------- Tests/test_Phylo.py | 7 +++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/Bio/Phylo/NewickIO.py b/Bio/Phylo/NewickIO.py index 38cf2a1b424..698b37a6e38 100644 --- a/Bio/Phylo/NewickIO.py +++ b/Bio/Phylo/NewickIO.py @@ -20,13 +20,13 @@ class NewickError(Exception): """Exception raised when Newick object construction cannot continue.""" pass - - + + tokens = [ (r"\(", 'open parens'), (r"\)", 'close parens'), (r"[^\s\(\)\[\]\'\:\;\,]+", 'unquoted node label'), - (r"\:[0-9]*\.?[0-9]+", 'edge length'), + (r"\:[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?", 'edge length'), (r"\,", 'comma'), (r"\[(\\.|[^\]])*\]", 'comment'), (r"\'(\\.|[^\'])*\'", 'quoted node label'), @@ -70,11 +70,11 @@ def _parse_confidence(text): # assert 0 <= current_clade.confidence <= 1 except ValueError: return None - - + + def _format_comment(text): return '[%s]' % (text.replace('[', '\\[').replace(']', '\\]')) - + def _get_comment(clade): if hasattr(clade, 'comment') and clade.comment: return _format_comment(str(clade.comment)) @@ -207,12 +207,13 @@ def new_clade(self, parent=None): def process_clade(self, clade): """Final processing of a parsed clade. Removes the node's parent and returns it.""" - if (clade.name and not (self.values_are_confidence or self.comments_are_confidence) + if (clade.name and not (self.values_are_confidence or + self.comments_are_confidence) and clade.confidence is None): clade.confidence = _parse_confidence(clade.name) if not clade.confidence is None: clade.name = None - + if hasattr(clade, 'parent'): parent = clade.parent parent.clades.append(clade) diff --git a/Tests/test_Phylo.py b/Tests/test_Phylo.py index d1512e77955..a766f86f748 100644 --- a/Tests/test_Phylo.py +++ b/Tests/test_Phylo.py @@ -67,6 +67,13 @@ def test_newick_write(self): if c is not None) self.assertEqual(internal_names, set(('E', 'F'))) + def test_newick_read_scinot(self): + """Parse Newick branch lengths in scientific notation.""" + tree = Phylo.read(StringIO("(foo:1e-1,bar:0.1)"), 'newick') + clade_a = tree.clade[0] + self.assertEqual(clade_a.name, 'foo') + self.assertAlmostEqual(clade_a.branch_length, 0.1) + def test_format_branch_length(self): """Custom format string for Newick branch length serialization.""" tree = Phylo.read(StringIO('A:0.1;'), 'newick')