### Rename tips on the oak tree

In [1]:
import toytree
import toyplot
import pandas as pd

In [18]:
# note toytree 2.0
toytree.__version__

'2.0.0'

### Load the samples metadata

In [14]:
DATA = pd.read_csv("../oak-data/Hipp-2019-table-S1.csv")
columns = [
    "SRA_Run_Accession_RADseq", 
    "subgenus", 
    "section", 
    "clade", 
    "Cleaned_NAMES-USE-THIS",
]
data = DATA.loc[:, columns]
data.columns = ["SRA", "subgenus", "section", "clade", "species"]
data.head()

Unnamed: 0,SRA,subgenus,section,clade,species
0,SRR5632368,subg. Quercus,Quercus,Leucomexicana,Quercus rugosa
1,SRR5632418,subg. Quercus,Lobatae,Erythromexicana,Quercus calophylla
2,SRR5632456,subg. Quercus,Quercus,Leucomexicana,Quercus magnoliifolia
3,SRR5632554,subg. Quercus,Lobatae,Erythromexicana,Quercus crassipes
4,SRR5632551,subg. Quercus,Lobatae,Erythromexicana,Quercus scytophylla


### Load the tetrad tree

In [25]:
tre = toytree.tree("../tree-files/robur-ref-basic.tree")

### Make a name conversion dictionary

In [64]:
# filter to include only samples in the tree
mask = [i in tre.get_tip_labels() for i in data.SRA]
tipdata = data.loc[mask, :]
tipdata = tipdata.reset_index(drop=True)
tipdata.head()

Unnamed: 0,SRA,subgenus,section,clade,species
0,SRR5632418,subg. Quercus,Lobatae,Erythromexicana,Quercus calophylla
1,SRR5632552,subg. Quercus,Lobatae,Erythromexicana,Quercus crassipes
2,SRR5284351,subg. Quercus,Quercus,Albae,Quercus michauxii
3,SRR5632579,subg. Quercus,Lobatae,Laurifoliae,Quercus pagoda
4,SRR5284355,subg. Quercus,Quercus,Prinoids,Quercus macrocarpa


In [67]:
# set missing values to empty
tipdata.loc[tipdata.subgenus.isna(), ["subgenus", "section", "clade"]] = ""

In [68]:
# a dictionary to convert SRA to complex name
tips_dict = {}
for idx in range(tipdata.shape[0]):
    
    # outgroup samples
    if tipdata.subgenus[idx] == "" :
        subg = sect = clad = "NaN"
        spec = tipdata.species[idx].replace(" ", "-")
        
    # oak samples
    else:
        subg = tipdata.subgenus[idx].split()[-1]
        sect = tipdata.section[idx]
        clad = tipdata.clade[idx]
        spec = tipdata.species[idx].split()[-1]
    
    tips_dict[tipdata.SRA[idx]] = "{}|{}|{}|Q.{}".format(subg, sect, clad, spec)

In [90]:
# set values on the tree
ntre = tre.set_node_values('name', tips_dict)

### TODO:

In [91]:
# names in tree not in database!
[i for i in tre.get_tip_labels() if i not in tips_dict]

['SRR5984311',
 'SRR8860579',
 'SRR5984304',
 'SRR5984321',
 'SRR5632562',
 'SRR5632417',
 'reference',
 'SRR1915548',
 'SRR1915533',
 'SRR1915549',
 'SRR1915534',
 'SRR1915542',
 'SRR1915528',
 'SRR1915525',
 'SRR1915539']

### Draw the tree

In [114]:
# get rooted tree
rtre = ntre.root(wildcard="Lithocarpus")

# draw the tree
canvas, axes, mark = rtre.draw(
    height=2000,
    tip_labels_style={"font-size": "8px"},
    use_edge_lengths=False,
    
    # color clades
    edge_colors=rtre.get_edge_values_mapped({
        rtre.get_mrca_idx_from_tip_labels(wildcard="Cerris|"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Lobatae|"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Roburoids|"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Virentes|"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Protoba"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Leuco"),
    }),
)

In [115]:
# get rooted tree
rtre = ntre.root(wildcard="Lithocarpus")

# draw the tree
canvas, axes, mark = rtre.draw(
    height=600,
    tip_labels=False,
    use_edge_lengths=False,
    
    # color clades
    edge_colors=rtre.get_edge_values_mapped({
        rtre.get_mrca_idx_from_tip_labels(wildcard="Cerris|"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Lobatae|"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Roburoids|"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Virentes|"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Protoba"),
        rtre.get_mrca_idx_from_tip_labels(wildcard="Leuco"),
    }),
    layout='c',
)