In [1]:
import __future__
import sys
import json
# Assume that nextstrain/augur directory is next to h3n2_reassortment directory
sys.path.append('../../nextstrain/augur')
from base import io_util

In [2]:
# Note: to restore saved jsons in case of error:
# cp ~/Desktop/saved_reassortment_jsons/* data/
# from nextstrain/auspice

In [3]:
# Set default data directories
data_dir = '../data/auspice_jsons/'
ha_file = data_dir+'flu_seasonal_h3n2_ha_2y_tree.json'
na_file = data_dir+'flu_seasonal_h3n2_na_2y_tree.json'
meta_json = data_dir+'flu_seasonal_h3n2_ha_2y_meta.json'

# Mutations that identify A and B identities
A_mut = u'T131K'
B_mut = u'N329S'

In [4]:
# Read in HA and NA tree JSONs
with open(ha_file, 'r') as f:
    dump = json.loads(f.read())
    ha_tree = io_util.json_to_tree(dump)
with open(na_file, 'r') as f:
    dump = json.loads(f.read())
    na_tree = io_util.json_to_tree(dump)

In [5]:
print(na_tree)

NODE_0001922


In [6]:
print(dir(ha_tree))

print(len(ha_tree.clades))
print(ha_tree.is_terminal())

def count_nodes(root):
    count = 0
    queue = []
    
    queue.append(root)
    
    while queue:
        node = queue.pop(0)
        
        if not node.clades:
            count += 1
        else:
            queue.extend(node.clades)
            
    return count

print(count_nodes(ha_tree))
        


['__bool__', '__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__getitem__', '__hash__', '__init__', '__iter__', '__len__', '__module__', '__new__', '__nonzero__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_color', '_filter_search', '_get_color', '_set_color', 'attr', 'branch_length', 'clade', 'clades', 'collapse', 'collapse_all', 'color', 'comment', 'common_ancestor', 'confidence', 'count_terminals', 'depths', 'distance', 'find_any', 'find_clades', 'find_elements', 'get_nonterminals', 'get_path', 'get_terminals', 'is_bifurcating', 'is_monophyletic', 'is_parent_of', 'is_preterminal', 'is_terminal', 'ladderize', 'name', 'numdate', 'prune', 'root', 'serum', 'split', 'strain', 'total_branch_length', 'trace', 'tvalue', 'up', 'width', 'xvalue', 'yvalue']
2
False
1951


In [7]:
def label_na_clade(clade, na_label, B_mutation, na_strains_lookup):
    '''Assign labels b or B to nodes in the NA tree recursively.
    
    Adds leaves to dictionary that allows for easy state-lookup based
    on strain name, which gets used by label_na_na_co_clades.
    '''
    label = na_label
    if na_label == 'NA-329N' and hasattr(clade, 'aa_muts'):
        if B_mutation in clade.aa_muts[u'NA']:
            label = 'NA-329S'
    clade.attr[u'na_mut_label'] = label
    if clade.is_terminal():
        na_strains_lookup[clade.strain] = na_label
    else:
        for child in clade.clades:
            label_na_clade(child, label, B_mutation, na_strains_lookup)

In [8]:
def label_ha_na_co_clades(clade, ha_label, A_mutation, 
                          na_strains, 
                          counts=None,
                          ha_strains=None):
    '''Assign ab, aB, Ab, and AB labels recursively to HA tree.
    
    Relies on lookup dictionary to b/B status from NA tree and keeps track of
    counts of each state. Returns thouse counts.
    
    Count return is mostly for debugging, can be removed later.
    '''
    if ha_strains ==None:
        ha_strains = {}
    if counts==None:
        counts = {'HA1-131T, NA-329N': 0, 
                  'HA1-131T, NA-329S': 0, 
                  'HA1-131K, NA-329N': 0, 
                  'HA1-131K, NA-329S': 0, 
                  'unmatched': 0}
    label = ha_label
    if ha_label == 'HA1-131T' and hasattr(clade, 'aa_muts'):
        try:
            if A_mutation in clade.aa_muts[u'HA1']:
                label = 'HA1-131K'
        except:
            print(clade.aa_muts)
    if clade.is_terminal():
        try:
            matched_na_node_label = na_strains[clade.strain]
            clade.attr[u'co_clade_label'] = "{}, {}".format(label,matched_na_node_label)
        except:
            clade.attr[u'co_clade_label'] = 'unmatched'
        ha_strains[clade.strain] = clade.attr[u'co_clade_label']
        counts[clade.attr[u'co_clade_label']] += 1
    else:
        for child in clade.clades:
            label_ha_na_co_clades(child, label, A_mutation, na_strains, counts, ha_strains)
        
    return counts, ha_strains

In [9]:
import sys
def re_label_na_tree(node, lookup):
    if node.is_terminal():
        try:
            node.attr[u'co_clade_label'] = lookup[node.strain]
        except KeyError:
            node.attr[u'co_clade_label'] = 'unmatched'
            # pass
    else:
        for ch in node.clades:
            re_label_na_tree(ch, lookup)

In [10]:
NA_strains = {}
label_na_clade(na_tree, 'NA-329N', B_mut, NA_strains)

In [11]:
(C, ha_and_na_labels) = label_ha_na_co_clades(ha_tree, 'HA1-131T', A_mut, NA_strains)

In [12]:
print(C)

{'HA1-131T, NA-329N': 887, 'HA1-131K, NA-329S': 299, 'HA1-131K, NA-329N': 171, 'HA1-131T, NA-329S': 460, 'unmatched': 134}


In [13]:
re_label_na_tree(na_tree, ha_and_na_labels)

In [14]:
# modify_meta_json(j):
# with open(meta_json, 'r') as f:
#    meta_dict = json.loads(f.read())
#
#print(meta_dict.keys())
#print(meta_dict['color_options'])
#
x = {u'key': u'co_clade_label',
   u'legendTitle': u'Co-clade',
   u'menuItem': u'co_clade',
   u'type': u'discrete'}

#meta_dict[u'color_options'][u'co_clade_label'] = x

In [15]:
with open(ha_file, 'w') as f:
    data = io_util.tree_to_json(ha_tree, ['clade', 'attr', 'serum', 'muts', 'aa_muts'])
    json.dump(data, f, indent=1)
    
#with open(meta_json, 'w') as f:
#    json.dump(meta_dict, f, indent=1)

In [16]:
with open(na_file, 'w') as f:
    data = io_util.tree_to_json(na_tree, ['clade', 'attr', 'serum', 'muts', 'aa_muts'])
    json.dump(data, f, indent=1)

In [17]:
with open(ha_file, 'r') as f:
    dump = json.loads(f.read())
    ha_tree2 = io_util.json_to_tree(dump)

dir(ha_tree2) == dir(ha_tree)

True