Skip to content

Commit

Permalink
updates across the board
Browse files Browse the repository at this point in the history
  • Loading branch information
jeetsukumaran committed Nov 2, 2009
1 parent 15a1c39 commit 6d91fbe
Show file tree
Hide file tree
Showing 11 changed files with 134 additions and 36 deletions.
10 changes: 6 additions & 4 deletions dendropy/dataio/nexml.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,9 @@ def parse_nodes(self, tree_element, taxon_set):
nodes[node_id].label = nxnode.get('label', None)
taxon_id = nxnode.get('otu', None)
if taxon_id is not None:
taxon = taxon_set.get_taxon(oid=taxon_id)
if not taxon:
try:
taxon = taxon_set.require_taxon(oid=taxon_id)
except KeyError:
raise Exception('Taxon with id "%s" not defined in taxa block "%s"' % (taxon_id, taxon_set.oid))
nodes[node_id].taxon = taxon
self.parse_annotations(annotated=nodes[node_id], nxelement=nxnode)
Expand Down Expand Up @@ -713,8 +714,9 @@ def parse_char_array(self, nxchars, dataset):
row_id = nxrow.get('id', None)
label = nxrow.get('label', None)
taxon_id = nxrow.get('otu', None)
taxon = taxon_set.get_taxon(oid=taxon_id)
if not taxon:
try:
taxon = taxon_set.require_taxon(oid=taxon_id)
except KeyError, e:
raise Exception('Character Block %s (\"%s\"): Taxon with id "%s" not defined in taxa block "%s"' % (char_array.oid, char_array.label, taxon_id, taxon_set.oid))

character_vector = dendropy.CharacterDataVector(oid=row_id, label=label, taxon=taxon)
Expand Down
2 changes: 1 addition & 1 deletion dendropy/dataio/nexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def _parse_taxlabels_statement(self):
token = self.stream_tokenizer.read_next_token()
while token != ';':
label = token
self.current_taxon_set.get_taxon(label=label)
self.current_taxon_set.require_taxon(label=label)
token = self.stream_tokenizer.read_next_token()

###########################################################################
Expand Down
17 changes: 13 additions & 4 deletions dendropy/dataio/nexustokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,23 @@ class RootingInterpretation:
## StrToTaxon

class StrToTaxon(object):

def __init__(self, taxon_set, translate_dict=None):
self.taxon_set = taxon_set
self.translate = translate_dict or {}
def get_taxon(self, label, taxon_required=True):
v = self.translate.get(label)

def get_taxon(self, label):
return self.translate.get(label)

def require_taxon(self, label):
v = self.get_taxon(label)
if v is not None:
return v
t = self.taxon_set.get_taxon(label=label, taxon_required=taxon_required)
t = self.taxon_set.require_taxon(label=label)
if t is not None:
self.translate[label] = t #@this could lead to problems when we support multiple taxon blocks, but now it'll speed thing up
return t

def index(self, t):
return self.taxon_set.index(t)

Expand Down Expand Up @@ -165,7 +171,10 @@ def parse_tree_from_stream(stream_tokenizer, **kwargs):
curr_node = p
else:
is_leaf = curr_node.is_leaf()
t = stt.get_taxon(label=token, taxon_required=is_leaf)
if is_leaf:
t = stt.require_taxon(label=token)
else:
t = stt.get_taxon(label=token)
if t is None:
curr_node.label = token
else:
Expand Down
2 changes: 1 addition & 1 deletion dendropy/dataobject/char.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ def reindex_subcomponent_taxa(self):
self.taxon_set._is_mutable = True
new_map = CharacterDataMap()
for taxon, seq in self.taxon_seq_map.items():
taxon = self.taxon_set.get_taxon(label=taxon.label)
taxon = self.taxon_set.require_taxon(label=taxon.label)
new_map[taxon] = seq
self.taxon_set._is_mutable = ti_mutable
self.taxon_seq_map = new_map
Expand Down
79 changes: 64 additions & 15 deletions dendropy/dataobject/taxon.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,40 +157,89 @@ def __str__(self):
taxlist.append(str(taxon))
return ' '.join(header) + ' : [' + ', '.join(taxlist) + ']'

def get_taxon(self, label=None, taxon_required=True, oid=None):
def has_taxon(self, **kwargs):
"""
Returns True if taxon `taxon`, or with `oid` or `label`,
exists (supplied by keywords; matches any)
"""
if "oid" not in kwargs and "label" not in kwargs:
raise Exception("Need to specify oid or Label.")
req_taxon = kwargs.get("taxon", None)
oid = kwargs.get("oid", None)
label = kwargs.get("label", None)
for self_taxon in self:
if (req_taxon is not None and req_taxon is self_taxon) \
or (oid is not None and self_taxon.oid == oid) \
or (label is not None and self_taxon.label == label):
return True
return False

def has_taxa(self, **kwargs):
"""
Returns True if all taxon given by keyword argument `taxa` in self,
or at least one Taxon object exists in self with oid or label
for every oid given in list of oid's by keyword arg `oids`, or
every label in list of `labels` given by keyword arg `labels`.
"""
if "taxon" not in kwargs and "oid" not in kwargs and "label" not in kwargs:
raise Exception("Need to specify taxon, oid or label lists.")
taxa = set(kwargs.get("taxa", []))
oids = set(kwargs.get("oids", []))
labels = set(kwargs.get("labels", []))
taxon_oids = set([t.oid for t in self])
taxon_labels = set([t.label for t in self])
return taxa.issubset(self) \
and oids.issubset(taxon_oids) \
and labels.issubset(taxon_labels)

def get_taxon(self, **kwargs):
"""
Retrieves taxon object with given id OR label (if both are
given, the first match found is returned). If taxon does not
exist and update is False, an exception is raised. If taxon
does not exist and update is True, then a new taxon is
created, added, and returned.
exist then None is returned.
"""
update = self._is_mutable
if not oid and not label:
raise Exception("Need to specify DataObject ID or Label.")
if "oid" not in kwargs and "label" not in kwargs:
raise Exception("Need to specify Taxon oid or Label.")
oid = kwargs.get("oid", None)
label = kwargs.get("label", None)
for taxon in self:
if taxon.oid == oid or taxon.label == label:
if (oid is not None and taxon.oid == oid) \
or (label is not None and taxon.label == label):
return taxon
if taxon_required:
if not update:
raise Exception("Taxon not found: %s/%s" % (oid, label))
taxon = Taxon(label=label, oid=oid)
return None

def require_taxon(self, **kwargs):
"""
Retrieves taxon object with given id OR label (if both are
given, the first match found is returned). If taxon does not
exist and the `TaxonSet` is not mutable, an exception is raised.
If taxon does not exist and the `TaxonSet` is mutable, then a
new taxon is created, added, and returned.
"""
taxon = self.get_taxon(**kwargs)
if taxon is not None:
return taxon
elif self._is_mutable:
taxon = Taxon(label=kwargs.get("label", None), oid=kwargs.get("oid", None))
self.add(taxon)
return taxon
return None
else:
raise KeyError("Taxon not in TaxonSet, and cannot be created because TaxonSet is immutable.")

def add_taxon(self, taxon):
"""
Adds taxon to self.
"""
if not self._is_mutable:
raise KeyError("Taxon %s:'%s' cannot be added to an immutable TaxonSet." % (taxon.oid, taxon.label))
self.add(taxon)

def new_taxon(self, label=None, oid=None, error_if_label_exists=False):
"Creates and add a new `Taxon` if not already in the taxon index."
if not self._is_mutable:
raise Exception("Taxon %s:'%s' cannot be added to an immutable taxon index." % (oid, label))
raise KeyError("Taxon %s:'%s' cannot be added to an immutable TaxonSet." % (oid, label))
if error_if_label_exists and self.get_taxon(label=label, taxon_required=False) is not None:
raise Exception("Taxon with label %s:'%s' already definied in the taxon index." % (oid, label))
raise KeyError("Taxon with label %s:'%s' already defined in the TaxonSet." % (oid, label))
taxon = Taxon(label=label, oid=oid)
self.add(taxon)
return taxon
Expand Down
2 changes: 1 addition & 1 deletion dendropy/seqsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def compose_char_map(self, tree, taxon_set=None, include=None, exclude=None):
for state in seq:
cvec.append(dendropy.CharacterDataCell(value=state))
if taxon_set is not None:
taxon = taxon_set.get_taxon(label=leaf.taxon.label)
taxon = taxon_set.require_taxon(label=leaf.taxon.label)
else:
taxon = leaf.taxon
char_map[taxon] = cvec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,57 @@ class TreeInstantiationTest(unittest.TestCase):

def test_tree_init(self):

newick_str = "((A,B),(C,D));"
nexus_str = """\
#NEXUS
begin taxa;
dimensions ntax=4;
taxlabels =
A
B
C
D
;
end;
begin trees;
translate
1 A,
2 B,
3 C,
4 D;
tree 1 = ((A,B)i1, (C,D)i2)root;
end;
"""

# from file, using keywords
t1 = Tree(istream=StringIO("((A,B):i1,(C,D):i2);"), format="newick", oid="t1")
t1 = Tree(istream=StringIO(newick_str), format="newick", oid="t1")
self.assertTrue(t1.oid == "t1", "'%s'" % t1.oid)
t1.debug_check_tree(_LOG)

# test copying
t2 = Tree(t1)
t2.debug_check_tree(_LOG)
self.compare_trees(t1, t2)

# from file, args
t3 = Tree(StringIO("((A,B),(C,D));"), "newick")
t3 = Tree(StringIO(newick_str), "newick")
t3.debug_check_tree(_LOG)

# from file, mixed
t4 = Tree(StringIO("((A,B),(C,D));"), format="newick")
t4 = Tree(StringIO(newick_str), format="newick")
t4.debug_check_tree(_LOG)

# NEXUS
t5 = Tree(istream=StringIO(nexus_str), format="nexus")
t6 = Tree(StringIO(nexus_str), format="nexus")
t7 = Tree(StringIO(nexus_str), "nexus")
for tx in (t5, t6, t7):
tx.is_mutable = False
self.assertEqual(len(tx.taxon_set), 4)
self.assertTrue(tx.taxon_set.has_taxa(labels=["A", "B", "C", "D"]))
self.assertTrue(tx.taxon_set.has_taxon(label="A"))
self.assertTrue(tx.taxon_set.has_taxa(taxa=self.taxon_set))
t8 = Tree(t5)

def compare_trees(self, t1, t2):
self.assertTrue(t2 is not t1)
Expand Down
6 changes: 3 additions & 3 deletions dendropy/tests/test_taxon.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def test_taxonobj(self):
self.assertEquals(t.label, "T%d" % (idx+1))
ti.lock()
self.assertRaises(Exception, ti.new_taxon, label="A1")
self.assertRaises(Exception, ti.get_taxon, label="A1", taxon_required=True, oid=None)
self.assertRaises(Exception, ti.require_taxon, label="A1", oid=None)
ti.unlock()
ti.new_taxon("X1")
self.assertEquals(ti.get_taxon("X2", taxon_required=False), None)
ti.get_taxon("X3")
self.assertEquals(ti.get_taxon("X2"), None)
ti.require_taxon("X3")
self.assertEquals(len(ti), 12)

if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion dendropy/tests/test_tree_taxon_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def build_tree_from_labels(self, tax_labels):
tree = treeobj.Tree()
leaf_nodes = []
for tax_label in tax_labels[:4]:
t = tree.taxon_set.get_taxon(label=tax_label, taxon_required=True)
t = tree.taxon_set.require_taxon(label=tax_label)
leaf_nodes.append(treeobj.Node(taxon=t))
self.assertEquals(len(tree.taxon_set), 4)
tree.seed_node.add_child(node=leaf_nodes[0])
Expand Down
4 changes: 2 additions & 2 deletions dendropy/tests/test_treecalcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def test_pat_distance(self):
tree = dendropy.Tree(str="(((a:1, b:1):1, c:2):1, (d:2, (e:1,f:1):1):1):0;", format="newick")
pdm = treecalc.PatristicDistanceMatrix(tree)
def _chk_distance(pdm, t1, t2, exp_distance):
tax1 = tree.taxon_set.get_taxon(label=t1)
tax2 = tree.taxon_set.get_taxon(label=t2)
tax1 = tree.taxon_set.require_taxon(label=t1)
tax2 = tree.taxon_set.require_taxon(label=t2)
pd = pdm(tax1, tax2)
assert pd == exp_distance, ("%s, %s: Expecting %d, but received %d" % (t1, t2, exp_distance, pd))
_chk_distance(pdm, "a", "b", 2)
Expand Down
2 changes: 1 addition & 1 deletion dendropy/treesim.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def constrained_kingman(pop_tree,
gene_nodes = []
for gene_count in range(getattr(leaf, num_genes_attr)):
gene_node = dataobject.Node()
gene_node.taxon = gtaxa.get_taxon(label=gene_node_label_func(leaf.taxon.label, gene_count+1))
gene_node.taxon = gtaxa.require_taxon(label=gene_node_label_func(leaf.taxon.label, gene_count+1))
gene_nodes.append(gene_node)
leaf.gene_nodes = gene_nodes

Expand Down

0 comments on commit 6d91fbe

Please sign in to comment.