In [1]:
import __future__
import sys
import json
sys.path.append('/Users/bpotter/nextstrain/augur')
from base import io_util

In [2]:
# Note: to restore saved jsons in case of error:
# cp ~/Desktop/saved_reassortment_jsons/* data/
# from nextstrain/auspice

In [3]:
# Set default data directories
data_dir = '/Users/bpotter/nextstrain/auspice/data/'
ha_file = data_dir+'flu_seasonal_h3n2_ha_2y_tree.json'
na_file = data_dir+'flu_seasonal_h3n2_na_2y_tree.json'
meta_json = data_dir+'flu_seasonal_h3n2_ha_2y_meta.json'

# Mutations that identify A and B identities
A_mut = u'T131K'
B_mut = u'N329S'

In [4]:
# Read in HA and NA tree JSONs
with open(ha_file, 'r') as f:
    dump = json.loads(f.read())
    ha_tree = io_util.json_to_tree(dump)
with open(na_file, 'r') as f:
    dump = json.loads(f.read())
    na_tree = io_util.json_to_tree(dump)

In [5]:
print(dir(ha_tree))

print(len(ha_tree.clades))
print(ha_tree.is_terminal())

['__bool__', '__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__getitem__', '__hash__', '__init__', '__iter__', '__len__', '__module__', '__new__', '__nonzero__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_color', '_filter_search', '_get_color', '_set_color', 'attr', 'branch_length', 'clade', 'clades', 'collapse', 'collapse_all', 'color', 'comment', 'common_ancestor', 'confidence', 'count_terminals', 'depths', 'distance', 'find_any', 'find_clades', 'find_elements', 'get_nonterminals', 'get_path', 'get_terminals', 'is_bifurcating', 'is_monophyletic', 'is_parent_of', 'is_preterminal', 'is_terminal', 'ladderize', 'name', 'numdate', 'prune', 'root', 'serum', 'split', 'strain', 'total_branch_length', 'trace', 'tvalue', 'up', 'width', 'xvalue', 'yvalue']
2
False


In [6]:
def label_na_clade(clade, na_label, B_mutation, na_strains_lookup):
    '''Assign labels b or B to nodes in the NA tree recursively.
    
    Adds leaves to dictionary that allows for easy state-lookup based
    on strain name, which gets used by label_na_na_co_clades.
    '''
    label = na_label
    if na_label == 'b' and hasattr(clade, 'aa_muts'):
        if B_mutation in clade.aa_muts[u'NA']:
            label = 'B'
    clade.attr[u'na_mut_label'] = label
    if clade.is_terminal():
        na_strains_lookup[clade.strain] = na_label
    else:
        for child in clade.clades:
            label_na_clade(child, label, B_mutation, na_strains_lookup)

In [7]:
def label_ha_na_co_clades(clade, ha_label, A_mutation, 
                          na_strains, 
                          counts=None):
    '''Assign ab, aB, Ab, and AB labels recursively to HA tree.
    
    Relies on lookup dictionary to b/B status from NA tree and keeps track of
    counts of each state. Returns thouse counts.
    
    Count return is mostly for debugging, can be removed later.
    '''
    if counts==None:
        counts = {'ab': 0, 'Ab': 0, 'aB': 0, 'AB': 0, 'unmatched': 0}
    label = ha_label
    if ha_label == 'a' and hasattr(clade, 'aa_muts'):
        try:
            if A_mutation in clade.aa_muts[u'HA1']:
                label = 'A'
        except:
            print(clade.aa_muts)
    if clade.is_terminal():
        try:
            matched_na_node_label = na_strains[clade.strain]
            clade.attr[u'co_clade_label'] = label+matched_na_node_label
        except:
            clade.attr[u'co_clade_label'] = 'unmatched'
        counts[clade.attr[u'co_clade_label']] += 1
    else:
        for child in clade.clades:
            label_ha_na_co_clades(child, label, A_mutation, na_strains, counts)
        
    return counts

In [8]:
NA_strains = {}
label_na_clade(na_tree, 'b', B_mut, NA_strains)

In [9]:
C = label_ha_na_co_clades(ha_tree, 'a', A_mut, NA_strains)

In [13]:
print(C)

{'unmatched': 134, 'ab': 887, 'aB': 460, 'AB': 299, 'Ab': 171}


In [11]:
# modify_meta_json(j):
with open(meta_json, 'r') as f:
    meta_dict = json.loads(f.read())

print(meta_dict.keys())
print(meta_dict['color_options'])

x = {u'key': u'co_clade_label',
   u'legendTitle': u'Co-clade',
   u'menuItem': u'co_clade',
   u'type': u'discrete'}

meta_dict[u'color_options'][u'co_clade_label'] = x

[u'updated', u'author_info', u'virus_count', u'filters', u'maintainer', u'title', u'vaccine_choices', u'controls', u'color_options', u'seq_author_map', u'defaults', u'commit', u'panels', u'geo', u'annotations']
{u'num_date': {u'menuItem': u'date', u'legendTitle': u'Sampling date', u'type': u'continuous', u'key': u'num_date'}, u'rb': {u'menuItem': u'receptor binding mutations', u'type': u'continuous', u'legendTitle': u'Receptor binding mutations', u'key': u'rb'}, u'region': {u'menuItem': u'region', u'legendTitle': u'Region', u'color_map': [[u'china', u'#4042C7'], [u'southeast_asia', u'#4274CE'], [u'south_asia', u'#5199B7'], [u'japan_korea', u'#69B091'], [u'oceania', u'#88BB6C'], [u'west_asia', u'#ADBD51'], [u'africa', u'#CEB541'], [u'europe', u'#E39B39'], [u'south_america', u'#E56C2F'], [u'north_america', u'#DC2F24']], u'type': u'discrete', u'key': u'region'}, u'ne': {u'menuItem': u'non-epitope mutations', u'type': u'continuous', u'legendTitle': u'Non-epitope mutations', u'key': u'ne'},

In [15]:
with open(ha_file, 'w') as f:
    data = io_util.tree_to_json(ha_tree)
    json.dump(data, f, indent=1)
    
#with open(meta_json, 'w') as f:
#    json.dump(meta_dict, f, indent=1)