# Auspice Trimming

This notebook demonstrates how to load and trim an auspice v2 JSON.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from tree import *
from meta_munge import *

In [None]:
# with open("/Users/josh/data/covidtracker/santaclara/ncov_rr68.json", 'r') as fp:
#     js = json.load(fp)
# t = Tree(js['tree'])

In [None]:
with open("/Users/josh/data/covidtracker/santaclara/ncov_with_accessions.json", 'r') as fp:
    js = json.load(fp)
t = Tree(js['tree'])

In [None]:
# Set County
for node in t.nodes:
    if node.type == NodeType.LEAF:
        county = get_county(node)
        node.set_attr('county', county)

In [None]:
# Set Local Lab
for node in t.nodes:
    if node.type == NodeType.LEAF:
        if node.get_attr('submitting_lab') == 'Biohub':
            node.set_attr('submitting_lab', 'Chan-Zuckerberg Biohub')

In [None]:
# Set Local Lab
for node in t.nodes:
    if node.type == NodeType.LEAF:
        local_lab = node.get_attr('submitting_lab')
        if node.get_attr('division') == 'California':
            node.set_attr('local_lab', local_lab)

In [None]:
nodes_to_keep = [node for node in t.nodes if 
                 node.get_attr('county') == 'Santa Clara']

In [None]:
len(nodes_to_keep)

In [None]:
# Get ancestors of Santa Clara
nodes_to_keep = walk_to_root(nodes_to_keep)
# Get things with 0 SNPs from those ancestors
nodes_to_keep = walk_down(nodes_to_keep, mode='mutations', depth=0)
# Keep only subtree determined by leaves (ie, filter out internal nodes w/ no descendents)
nodes_to_keep = walk_to_root([n for n in nodes_to_keep if n.type == NodeType.LEAF])

In [None]:
t.subset_tree(nodes_to_keep)

In [None]:
len(t.nodes)

# Translate Names

In [None]:
scc_ids = pd.read_csv('/Users/josh/data/covidtracker/santaclara/scc_sample_ids.csv')

local_translator = dict(zip(scc_ids['CZB_ID'], scc_ids['Supplier_ID']))
local_translator.update(dict(zip(scc_ids['gisaid_name'], scc_ids['Supplier_ID'])))

gisaid_translator = dict(zip(scc_ids['CZB_ID'], scc_ids['gisaid_name']))

In [None]:
id_translator = gisaid_translator
id_translator = local_translator

for node in t.nodes:
    if 'RR0' in node.name:
        node.name = "_".join(node.name.split('_')[:2])
    if node.name in id_translator:
        node.name = id_translator[node.name]

# Add Metadata

In [None]:
county_coloring = {'key': 'county', 'title': 'County', 'type': 'categorical'}
js['meta']['colorings'].insert(0, county_coloring)

In [None]:
local_lab_coloring = {'key': 'local_lab', 'title': 'Local Lab', 'type': 'categorical'}
js['meta']['colorings'].insert(0, local_lab_coloring)

In [None]:
js['meta']['filters'].insert(0, 'county')
js['meta']['filters'].append('originating_lab')
js['meta']['filters'].append('submitting_lab')

In [None]:
maintainers = [{'name': 'Chan Zuckerberg Biohub', 'url': 'https://www.czbiohub.org'},
               {'name': 'Santa Clara DPH', 'url': 'https://www.sccgov.org/sites/phd/Pages/phd.aspx'}]

In [None]:
js['meta']['maintainers'] = maintainers

In [None]:
js['meta']['display_defaults']['color_by'] = 'county'
js['meta']['display_defaults']['geo_resolution'] = 'division'

In [None]:
with open('/Users/josh/data/covidtracker/santaclara/scc_description.md', 'r') as fp:
    description = fp.read()

In [None]:
js['meta']['description'] = description

In [None]:
with open('/Users/josh/data/covidtracker/santaclara/scc_local_id_new.json', 'w') as fp:
    json.dump(
        {"meta": js['meta'],
         "version": js['version'],
        "tree": t.to_dict()},
        fp,
        indent=2)

# Misc

Tree Library: https://github.com/caesar0301/treelib
Baltic: https://github.com/evogytis/baltic