In [1]:
import numpy as np
import pandas as pd
import toytree
from scipy.optimize import minimize
from scipy.linalg import expm

In [2]:
testdata = [0,0,1,1,0,1,0,1,0,1,1,0]
testtree = toytree.rtree.unittree(ntips = 12)
testtree.draw(tree_style = 'p')

(<toyplot.canvas.Canvas at 0x7fcb0724ad00>,
 <toyplot.coordinates.Cartesian at 0x7fcb0724ad60>,
 <toytree.Render.ToytreeMark at 0x7fcb0724a2e0>)

In [3]:
def data_to_dict(data):
    """
    Parses data into format that can be used by the cond_like and
    pruningalg functions
    """
    values = [{0:-(i-1),1:i} for i in data]
    keys = list(range(0, len(data), 1))
    valuesdict = dict(zip(keys,values))
    return valuesdict

In [4]:
check = data_to_dict(data = testdata)
check

testtree = testtree.set_node_values('test', values = check)
testtree.get_node_values('test',True,True)

array(['', '', '', '', '', '', '', '', '', '', '', {0: 1, 1: 0},
       {0: 0, 1: 1}, {0: 0, 1: 1}, {0: 1, 1: 0}, {0: 0, 1: 1},
       {0: 1, 1: 0}, {0: 0, 1: 1}, {0: 1, 1: 0}, {0: 0, 1: 1},
       {0: 0, 1: 1}, {0: 1, 1: 0}, {0: 1, 1: 0}], dtype=object)

In [58]:
def assign_tip_like_values(tree, data):
    """
    Assigns likelihood values to tree tips
    """
    values = [{0:-(i-1),1:i} for i in data]
    keys = list(range(0, len(data), 1))
    valuesdict = dict(zip(keys,values))
    tree = tree.set_node_values(feature = "likelihood", values = valuesdict)
    return tree

In [6]:
mytree = assign_tip_like_values(tree = testtree, data=testdata)
mytree.get_node_values('likelihood',True,True)

array(['', '', '', '', '', '', '', '', '', '', '', {0: 1, 1: 0},
       {0: 0, 1: 1}, {0: 0, 1: 1}, {0: 1, 1: 0}, {0: 0, 1: 1},
       {0: 1, 1: 0}, {0: 0, 1: 1}, {0: 1, 1: 0}, {0: 0, 1: 1},
       {0: 0, 1: 1}, {0: 1, 1: 0}, {0: 1, 1: 0}], dtype=object)

In [67]:
def cond_like(likeleft0, likeleft1, likeright0, likeright1, tL, tR, alpha, beta):
    """
    Calculates conditional likelihood of character states at each node
    """

    Q = np.array([[-alpha, alpha], [beta, -beta]])
    probleft = expm(Q*tL)
    probright = expm(Q*tR)
 
    #ancestor is 0
    left0 = probleft[0, 0] * likeleft0 + probleft[0, 1] * likeleft1
    right0 = probright[0, 0] * likeright0 + probright[0, 1] * likeright1
    like_zero = left0*right0
 
    #ancestor is 1
    left1 = probleft[1, 0] * likeleft0 + probleft[1, 1] * likeleft1
    right1 = probright[1, 0] * likeright0 + probright[1, 1] * likeright1
    like_one = left1*right1
 
    return {0: like_zero, 1: like_one}

In [103]:
def pruning_alg_before(tree, alpha, beta):
    for node in tree.treenode.traverse("postorder"):
        if len(node.children) == 2:
            child1 = node.children[0]
            child2 = node.children[1]
            likedict = cond_like(likeright0 = child1.likelihood[0],
                                 likeright1 = child1.likelihood[1],
                                 likeleft0 = child2.likelihood[0],
                                 likeleft1 = child2.likelihood[1],
                                 tR = child1.dist,
                                 tL = child2.dist,
                                 alpha = alpha,
                                 beta = beta)
            print(likedict)
            node.likelihood = likedict

In [99]:
tre1 = toytree.rtree.unittree(ntips=5)
data1 = [0,1,1,0,0]
tre2 = assign_tip_like_values(tree=tre1, data=data1)
tre2.get_node_values('likelihood',True,True)

array(['', '', '', '', {0: 1, 1: 0}, {0: 1, 1: 0}, {0: 0, 1: 1},
       {0: 0, 1: 1}, {0: 1, 1: 0}], dtype=object)

In [105]:
pruning_alg_before(tree=tre2,alpha=8.0,beta=8.0)
tre2.get_node_values('likelihood',True,True)

{0: 0.24991613434302426, 1: 0.24991613434302432}
{0: 0.12491614840992109, 1: 0.12499998593310307}
{0: 0.06247903358339737, 1: 0.06247903358339738}
{0: 0.03123952030724314, 1: 0.03123951327615419}


array([{0: 0.03123952030724314, 1: 0.03123951327615419},
       {0: 0.06247903358339737, 1: 0.06247903358339738},
       {0: 0.12491614840992109, 1: 0.12499998593310307},
       {0: 0.24991613434302426, 1: 0.24991613434302432}, {0: 1, 1: 0},
       {0: 1, 1: 0}, {0: 0, 1: 1}, {0: 0, 1: 1}, {0: 1, 1: 0}],
      dtype=object)

In [10]:
def node_like(x0, likeleft0, likeleft1, likeright0, likeright1, tL, tR, anca):
    
    condlik = cond_like(likeleft0, likeleft1, likeright0, likeright1, tL, tR, x0[0], x0[1])
    
    # get full likelihood
    lik = (1 - anca) * condlik[0] + (anca) * condlik[1]
    
    # I don't understand this part
    if anca in [0., 1.]:
        lik /= 2
    
    return -lik #np.log(lik)

In [13]:
def model_fit(likeleft0, likeleft1, likeright0, likeright1, tL, tR, anca):
    """
    Find the maximum likelihood estimate of the two
    rate model parameters at each node given the data.
    """
    args = (likeleft0, likeleft1, likeright0, likeright1, tL, tR, anca)
    
    # ML estimate
    estimate = minimize(
        fun=node_like, 
        x0=np.array([1., 1.]),
        args=args,
        method='L-BFGS-B',
        bounds=((0, 10), (0, 10))
    )
    
    score = -1 * node_like(estimate.x, *args)
    result = {
        "alpha": round(estimate.x[0], 3),
        "beta": round(estimate.x[1], 3), 
        "lik": round(score, 3),
        "convergence": estimate.success,
    }
    return result

In [14]:
model_fit(0,1,1,0,5,5,0.5)

{'alpha': 1.0, 'beta': 1.0, 'lik': 0.25, 'convergence': True}

In [118]:
def fit_model_at_nodes(tree, anca=0.5):
    tree = tree.set_node_values('alpha')
    tree = tree.set_node_values('beta')
    for node in tree.treenode.traverse("postorder"):
        if len(node.children) == 2:
            child1 = node.children[0]
            child2 = node.children[1]
            model = model_fit(likeright0 = child1.likelihood[0],
                              likeright1 = child1.likelihood[1],
                              likeleft0 = child2.likelihood[0],
                              likeleft1 = child2.likelihood[1],
                              tR = child1.dist,
                              tL = child2.dist,
                              anca = anca)
            print(model)
            node.alpha = model['alpha']
            node.beta = model['beta']  
    return tree

In [119]:
tre3 = fit_model_at_nodes(tree=tre2, anca=0.6)
tre3.get_node_values('alpha',True,True)

{'alpha': 9.977, 'beta': 10.0, 'lik': 0.25, 'convergence': True}
{'alpha': 10.0, 'beta': 0.0, 'lik': 0.249, 'convergence': True}
{'alpha': 0.0, 'beta': 10.0, 'lik': 0.125, 'convergence': True}
{'alpha': 0.0, 'beta': 8.267, 'lik': 0.062, 'convergence': True}


array(['0.0', '0.0', '10.0', '9.977', '', '', '', '', ''], dtype='<U32')

In [121]:
def pruning_alg_after(tree):
    """
    Runs Felsenstein's pruning algorithm on an input tree, given instantaneous transition
    rates alpha and beta. Assigns likelihood scores for characters states at each node.
    Specifically for binary state model. 
    """
    tree = fit_model_at_nodes(tree)
    for node in tree.treenode.traverse("postorder"):
        if len(node.children) == 2:
            child1 = node.children[0]
            child2 = node.children[1]
            likedict = cond_like(likeright0 = child1.likelihood[0],
                                 likeright1 = child1.likelihood[1],
                                 likeleft0 = child2.likelihood[0],
                                 likeleft1 = child2.likelihood[1],
                                 tR = child1.dist,
                                 tL = child2.dist,
                                 alpha = float(node.alpha),
                                 beta = float(node.beta))
            node.likelihood = likedict
    return tree

In [122]:
tre4 = pruning_alg_after(tree=tre3)
tre4.get_node_values('likelihood',True,True)

{'alpha': 10.0, 'beta': 10.0, 'lik': 0.25, 'convergence': True}
{'alpha': 10.0, 'beta': 0.0, 'lik': 0.249, 'convergence': True}
{'alpha': 0.0, 'beta': 10.0, 'lik': 0.125, 'convergence': True}
{'alpha': 0.0, 'beta': 8.684, 'lik': 0.062, 'convergence': True}


array([{0: 0.2483042397433679, 1: 0.24826230615863087},
       {0: 0.2483042397433679, 1: 0.24830509489225527},
       {0: 0.2483042397433679, 1: 0.2499886500175592},
       {0: 0.24998865001755924, 1: 0.2499886500175592}, {0: 1, 1: 0},
       {0: 1, 1: 0}, {0: 0, 1: 1}, {0: 0, 1: 1}, {0: 1, 1: 0}],
      dtype=object)

In [123]:
newtree = toytree.rtree.unittree(ntips = 5)
newdata = [0,1,1,0,1]

In [129]:
newtree = assign_tip_like_values(tree=newtree,data=newdata)
pruning_alg_before(tree=newtree,alpha=5.0,beta=2.0)
newtree = pruning_alg_after(tree=newtree)

{0: 0.20691515843991698, 1: 0.2029229574844161}
{0: 0.22896919294242238, 1: 0.1914398763797467}
{0: 0.14488324266621644, 1: 0.1441994633863237}
{0: 0.02951533310630989, 1: 0.029445891375951643}
{'alpha': 4.259, 'beta': 4.259, 'lik': 0.25, 'convergence': True}
{'alpha': 8.166, 'beta': 8.163, 'lik': 0.25, 'convergence': True}
{'alpha': 7.041, 'beta': 0.0, 'lik': 0.192, 'convergence': True}
{'alpha': 0.0, 'beta': 8.499, 'lik': 0.03, 'convergence': True}


In [130]:
newtree.get_node_values('likelihood',True,True)

array([{0: 0.06192616013452055, 1: 0.06195980488444142},
       {0: 0.2499970791260673, 1: 0.2499970791260674},
       {0: 0.24770753462800554, 1: 0.2499949163015982},
       {0: 0.24999570773387142, 1: 0.2499949163015982}, {0: 0, 1: 1},
       {0: 1, 1: 0}, {0: 0, 1: 1}, {0: 0, 1: 1}, {0: 1, 1: 0}],
      dtype=object)