In [155]:
import json
from augur.utils import json_to_tree
tree_file = '../nextstrain-build/phylogenetic/auspice/mers.json'

In [156]:
#read in the tree
with open(tree_file, 'r') as f:
    tree_json = json.load(f)
    

#put tree in Bio.phylo format
tree = json_to_tree(tree_json)
# store the S1 mutations on each branch here
muts_in_s1 = {}


In [157]:

# iterate through all branches on the tree to find what mutations are there
for node in tree.find_clades(): 
    # get the mutations on this node from the 'branch_attrs'
    ### write code here
    muts = node.branch_attrs.get('mutations', {})
    # now find only the mutations that are in S1
    ### write code here
    s1_muts_this_node = muts.get('S1', [])
    
    # add this information to the 'muts_in_s1' dict,
    # so that the dictionary will map each branch to the S1 mutations it has
    muts_in_s1[node.name] = s1_muts_this_node

In [158]:
# for each tip, store all S1 mutations that have happened between root and that tip 
# also store what host this tip was sampled from
mut_and_host_info_by_tip = {}

# iterate through tips only
for node in tree.find_clades(terminal=True): 
    # get the path from root to this tip
    path = tree.get_path(node)

    # store a list of all S1 mutations along that path
    ### write code here
    ### do this by looking up the S1 mutations for each branch in the path (using the dict you made above)
    ### this can be done with a for loop or with list comprehension
    muts_on_path = []
    for path_node in path:
        muts_on_path += muts_in_s1[path_node.name]
    # get the host this virus was sampled from
    ### write code here
    ### you can access host from 'node_attrs'
    host = node.node_attrs.get('host', {})
    host_name = host.get('value', {})
    
    # store the tip name as the key with the value being a dictionary that gives the host and S1 mutations  
    mut_and_host_info_by_tip[node.name] = {'host': host_name, 'muts_on_path': muts_on_path}
    
# use json dump to save 'mut_and_host_info_by_tip' to a file
### write code here
with open('intermediate-results/mut_and_host_info_by_tip.json', 'w') as outfile:
    json.dump(mut_and_host_info_by_tip, outfile)