In [20]:
import baltic as bt
import dendropy
from collections import defaultdict

In [21]:
out_path = '../data/trees/'

# two copies of the same tree, with different annotations
genotype_tree = bt.loadNexus('../data/trees/genotype_tree.nexus', absoluteTime=False) # has taxonomic annotations
antigenic_tree = bt.loadJSON('../titer_model/full-tree-model-output/dengue_fulltree_tree.json', # has antigenic distance annotations
                                         json_translation={'name': 'clade', 'height': 'cTiter'})


Tree height: 1.955144
Tree length: 6.071398
annotations present

Numbers of objects in tree: 4792 (2230 nodes and 2562 leaves)



In [22]:
def get_width(k, tree, trait='cTiter'):
    value = k.traits[trait]
    same_trait_val = [w for w in tree.Objects if w.traits[trait] == value]
    return len(same_trait_val)

for k in antigenic_tree.Objects:
    k.traits['width'] = float(get_width(k, antigenic_tree, 'cTiter'))
    

In [23]:
## Transfer taxonomic traits to the antigenic tree

transfer_traits = ['genotype', 'serotype', 'width']
clade_lookup = { }

for k in genotype_tree.Objects: ## Pull genotype and serotype annotations from genotype tree
    if 'clade' not in k.traits:
        continue
    clade = int(k.traits['clade'])
    traits = { tr: k.traits[tr] for tr in transfer_traits if tr in k.traits }
    clade_lookup[clade] = traits
    
for k in antigenic_tree.Objects: ## Combine with all other traits in the annotated tree
    if 'clade' not in k.traits:
        continue
    clade = int(k.traits['clade'])
    clade_lookup[clade].update(k.traits)
    k.traits = clade_lookup[clade]

In [24]:
# ## Write the annotated antigenic tree to file for safekeeping
# annotated_antigenic_tree_str = antigenic_tree.toString(traits=transfer_traits, nexus=True)
# open(out_path+'annotated_antigenic_tree.nexus', 'w').write(annotated_antigenic_tree_str)

In [25]:
# ### Create a collapsed, annotated antigenic tree file by taking a hacky detour through dendropy

# ### This block only needs to be run once to create the collapsed_antigenic_tree file.
# ### Deal with collapsing antigenically uniform clades in dendropy 
# ### (initially load in baltic to convert from JSON to newick)

# antigenic_tree_str = uncollapsed_antigenic_tree.toString()
# antigenic_tree_newick_file = out_path+'antigenic_tree.newick'
# open(antigenic_tree_newick_file, 'w').write(antigenic_tree_str)

# ### Now edit to remove the outermost 0.00 bl root to make dendropy happy
# dendropy_antigenic_tree = dendropy.Tree.get(file=open(antigenic_tree_newick_file, 'r'), schema='newick')

# # collapse internal nodes
# for edge in dendropy_antigenic_tree.postorder_edge_iter():
#     if edge.tail_node is not None and edge.is_internal():
#         if edge.length < 0.01:
#             edge.collapse()
            
# # prune tips with 0 length branches
# # however, need to keep 1 tip per polytomy to properly draw

# for node in dendropy_antigenic_tree.postorder_node_iter():
#     if node.is_leaf() and node.edge_length < 0.01:
#         siblings = node.sibling_nodes()
#         no_length_leaf_siblings_count = 0
#         for sibling in node.sibling_nodes():
#             if sibling.is_leaf() and sibling.edge_length < 0.01:
#                 no_length_leaf_siblings_count += 1
#         if no_length_leaf_siblings_count > 0:
#             dendropy_antigenic_tree.prune_subtree(node)

# collapsed_antigenic_tree_file = './collapsed_antigenic_tree.newick'
# dendropy_antigenic_tree.write(path=collapsed_antigenic_tree_file, schema='newick')

In [26]:
#### Load the collapsed tree into baltic to attach traits; write the collapsed, annotated tree to file
collapsed_antigenic_tree_file = '../data/trees/collapsed_antigenic_tree.newick'
collapsed_antigenic_tree_str = open(collapsed_antigenic_tree_file, 'r').readlines()[0].strip()
collapsed_antigenic_tree = bt.tree()
bt.make_tree(collapsed_antigenic_tree_str, collapsed_antigenic_tree)
collapsed_antigenic_tree.traverse_tree()
collapsed_antigenic_tree.sortBranches()
collapsed_antigenic_tree.drawTree()

#### Recover the metadata we stored earlier
for k in collapsed_antigenic_tree.Objects:
    if hasattr(k, 'numName'):
        clade = int(k.numName)
        if clade in clade_lookup:
            k.traits = clade_lookup[clade]
            k.name = clade

annotated_collapsed_tree_str = collapsed_antigenic_tree.toString(traits=['genotype', 'serotype', 
                                                                         'clade','cTiter', 'width'], nexus=True)
open(out_path+'/annotated_collapsed_antigenic_tree.nexus', 'w').write(annotated_collapsed_tree_str)