Skip to content

Commit

Permalink
Fix for bug 2788; allow internal node labels on taxonomies in Nexus f…
Browse files Browse the repository at this point in the history
…iles.
  • Loading branch information
chapmanb committed Jul 27, 2009
1 parent 0852c04 commit bba8b8d
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 38 deletions.
78 changes: 43 additions & 35 deletions Bio/Nexus/Trees.py
Expand Up @@ -53,18 +53,18 @@ def __init__(self,tree=None,weight=1.0,rooted=False,name='',data=NodeData,values
self.rooted=rooted
self.name=name
root=Nodes.Node(data())
self.add(root)
self.root=root.id
self.root = self.add(root)
if tree: # use the tree we have
# if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc
tree=tree.strip().replace('\n','').replace('\r','')
# there's discrepancy whether newick allows semicolons et the end
tree=tree.rstrip(';')
self._add_subtree(parent_id=root.id,tree=self._parse(tree)[0])
subtree_info, base_info = self._parse(tree)
root.data = self._add_nodedata(root.data, [[], base_info])
self._add_subtree(parent_id=root.id,tree=subtree_info)

def _parse(self,tree):
"""Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively."""

#Remove any leading/trailing white space - want any string starting
#with " (..." should be recognised as a leaf, "(..."
tree = tree.strip()
Expand Down Expand Up @@ -106,51 +106,48 @@ def _parse(self,tree):

def _add_subtree(self,parent_id=None,tree=None):
"""Adds leaf or tree (in newick format) to a parent_id. (self,parent_id,tree)."""

if parent_id is None:
raise TreeError('Need node_id to connect to.')
for st in tree:
nd=self.dataclass()
nd = self._add_nodedata(nd, st)
if type(st[0])==list: # it's a subtree
nd=self.dataclass()
if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START): # last element of values is a text and starts with [&
nd.comment=st[1].pop(-1)
if len(st[1])>=2: # if there's two values, support comes first. Is that always so?
nd.support=st[1][0]
if st[1][1] is not None:
nd.branchlength=st[1][1]
elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths
if not self.__values_are_support: # default
if st[1][0] is not None:
nd.branchlength=st[1][0]
else:
nd.support=st[1][0]
sn=Nodes.Node(nd)
self.add(sn,parent_id)
self._add_subtree(sn.id,st[0])
else: # it's a leaf
nd=self.dataclass()
if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START):
nd.comment=st[1].pop(-1)
nd.taxon=st[0]
if len(st)>1:
if len(st[1])>=2: # if there's two values, support comes first. Is that always so?
nd.support=st[1][0]
if st[1][1] is not None:
nd.branchlength=st[1][1]
elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths
if not self.__values_are_support: # default
if st[1][0] is not None:
nd.branchlength=st[1][0]
else:
nd.support=st[1][0]
leaf=Nodes.Node(nd)
self.add(leaf,parent_id)

def _add_nodedata(self, nd, st):
"""Add data to the node parsed from the comments, taxon and support.
"""
if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START):
nd.comment=st[1].pop(-1)
# if the first element is a string, it's the subtree node taxon
elif isinstance(st[1][0], str):
nd.taxon = st[1][0]
st[1] = st[1][1:]
if len(st)>1:
if len(st[1])>=2: # if there's two values, support comes first. Is that always so?
nd.support=st[1][0]
if st[1][1] is not None:
nd.branchlength=st[1][1]
elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths
if not self.__values_are_support: # default
if st[1][0] is not None:
nd.branchlength=st[1][0]
else:
nd.support=st[1][0]
return nd

def _get_values(self, text):
"""Extracts values (support/branchlength) from xx[:yyy], xx."""

if text=='':
return None
nodecomment = None
if NODECOMMENT_START in text: # if there's a [&....] comment, cut it out
nc_start=text.find(NODECOMMENT_START)
nc_end=text.find(NODECOMMENT_END)
Expand All @@ -159,10 +156,21 @@ def _get_values(self, text):
% (NODECOMMENT_START, NODECOMMENT_END))
nodecomment=text[nc_start:nc_end+1]
text=text[:nc_start]+text[nc_end+1:]
values=[float(t) for t in text.split(':') if t.strip()]

# pase out supports and branchlengths, with internal node taxa info
values = []
taxonomy = None
for part in [t.strip() for t in text.split(":")]:
if part:
try:
values.append(float(part))
except ValueError:
assert taxonomy is None, "Two string taxonomies?"
taxonomy = part
if taxonomy:
values.insert(0, taxonomy)
if nodecomment:
values.append(nodecomment)
else:
values=[float(t) for t in text.split(':') if t.strip()]
return values

def _walk(self,node=None):
Expand Down
2 changes: 2 additions & 0 deletions Tests/Nexus/int_node_labels.nex
@@ -0,0 +1,2 @@
(((((Cephalotaxus:125.000000,(Taxus:100.000000,Torreya:100.000000)TT1:25.000000)Taxaceae:90.000000,((((((((Calocedrus:85.000000,Platycladus:85.000000)CP:5.000000,(Cupressus:85.000000,Juniperus:85.000000)CJ:5.000000)CJCP:5.000000,Chamaecyparis:95.000000)CCJCP:5.000000,(Thuja:7.870000,Thujopsis:7.870000)TT2:92.13)CJCPTT:30.000000,((Cryptomeria:120.000000,Taxodium:120.000000)CT:5.000000,Glyptostrobus:125.000000)CTG:5.000000)CupCallTax:5.830000,((Metasequoia:125.000000,Sequoia:125.000000)MS:5.000000,Sequoiadendron:130.000000)Sequoioid:5.830000)STCC:49.060001,Taiwania:184.889999)Taw+others:15.110000,Cunninghamia:200.000000)nonSci:15.000000)Tax+nonSci:10.000000,Sciadopitys:225.000000):25.000000,(((Abies:106.000000,Keteleeria:106.000000)AK:54.000000,(Pseudolarix:156.000000,Tsuga:156.000000)NTP:4.000000)NTPAK:24.000000,((Larix:87.000000,Pseudotsuga:87.000000)LP:81.000000,(Picea:155.000000,Pinus:155.000000)PPC:13.000000)Pinoideae:16.000000)Pinaceae:66.000000)Coniferales:25.000000,Gink
go:275.000000)gymnosperm:75.000000;
49 changes: 46 additions & 3 deletions Tests/test_Nexus.py
Expand Up @@ -4,12 +4,14 @@
import cStringIO
import sys

from Bio.Nexus import Nexus
from Bio.Nexus import Nexus, Trees


class NexusTest1(unittest.TestCase):
def setUp(self):
self.handle = open("Nexus/test_Nexus_input.nex")
self.testfile_dir = "Nexus"
self.handle = open(os.path.join(self.testfile_dir,
"test_Nexus_input.nex"))

def tearDown(self):
self.handle.close()
Expand Down Expand Up @@ -334,7 +336,48 @@ def test_TreeTest1(self):
Root: 16
""")
self.assertEqual(t3.is_compatible(t2,threshold=0.3), [])


def test_internal_node_labels(self):
"""Handle text labels on internal nodes.
"""
ts1b = "(Cephalotaxus:125.000000,(Taxus:100.000000,Torreya:100.000000)"\
"TT1:25.000000)Taxaceae:90.000000;"
tree = Trees.Tree(ts1b)
assert self._get_flat_nodes(tree) == [('Taxaceae', 90.0, None, None),
('Cephalotaxus', 125.0, None, None), ('TT1', 25.0, None, None),
('Taxus', 100.0, None, None), ('Torreya', 100.0, None, None)]

ts1c = "(Cephalotaxus:125.000000,(Taxus:100.000000,Torreya:100.000000)"\
"25.000000)90.000000;"
tree = Trees.Tree(ts1c)
assert self._get_flat_nodes(tree) == [(None, 90.0, None, None),
('Cephalotaxus', 125.0, None, None), (None, 25.0, None, None),
('Taxus', 100.0, None, None), ('Torreya', 100.0, None, None)]

ts2 = "(((t9:0.385832, (t8:0.445135,t4:0.41401)C:0.024032)B:0.041436,"\
"t6:0.392496)A:0.0291131, t2:0.497673, ((t0:0.301171,"\
"t7:0.482152)E:0.0268148, ((t5:0.0984167,t3:0.488578)G:0.0349662,"\
"t1:0.130208)F:0.0318288)D:0.0273876);"
tree = Trees.Tree(ts2)

large_ex_handle = open(os.path.join(self.testfile_dir,
"int_node_labels.nex"))
tree = Trees.Tree(large_ex_handle.read())
large_ex_handle.close()

def _get_flat_nodes(self, tree):
cur_nodes = [tree.node(tree.root)]
nodedata = []
while len(cur_nodes) > 0:
new_nodes = []
for cur_node in cur_nodes:
nodedata.append((cur_node.data.taxon,
cur_node.data.branchlength, cur_node.data.support,
cur_node.data.comment))
new_nodes.extend([tree.node(nid) for nid in
cur_node.get_succ()])
cur_nodes = new_nodes
return nodedata

if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity = 2)
Expand Down

0 comments on commit bba8b8d

Please sign in to comment.