In [2]:
import dendropy
from dendropy import Tree
import math
from dendropy import Node
import operator

In [3]:
#function that takes the product of multiple numbers, or returns 1 if the list is empty
def prod(factors):
    return reduce(operator.mul, factors, 1)

In [4]:
def zipped_sorted_intervals(tree):
    zipped_dists = zip(tree.nodes(), tree.calc_node_root_distances(return_leaf_distances_only =False))
   
    #sort nodes by distance from root
    sorted_zipped_dists = sorted(zipped_dists, key=lambda branches: branches[1])
    
    #take those distances and transform them into interval lengths
    #sorted_nodes = list(zip(*sorted_zipped_dists)[0])
    sorted_dists = [dist for (node, dist) in sorted_zipped_dists]
    len_diff = [next_node - current_node for current_node, next_node in zip(sorted_dists[:-1], sorted_dists[1:])]

    #get the number of lineages at all intervals in the tree
    lineages = [tree.num_lineages_at(dist) for dist in sorted_dists]
    lineages.pop(0)
    #combine the node, interval length, and number of lineages  back together
    zipped_intervals = zip([node[0] for node in sorted_zipped_dists[1:]], len_diff, lineages)

    return list(reversed(zipped_intervals))

In [5]:
def conditioned_prob_lineage_coalescence(tree, popsize):
    zip_intervals = zipped_sorted_intervals(tree)
    #number of lineages in each interval
    num_lineages = zip(*zip_intervals)[2]
    
    #probability of coalescence and no coalescence within each interval continuous time (not conditioned on place in tree)
    prob_coalescence_in_interval = [1-math.exp(-(float(lineages)/popsize)*length) for (node, length, lineages) in zip_intervals]
    prob_no_coalescence_in_interval = [1-pcoal for pcoal in prob_coalescence_in_interval]

    coalintervals=[]
    
    #the probability of coalescence on one lineage in a specific interval conditioned by time (the probability of not coalescing in any of the previous intervals)
    for (index, interval) in enumerate(prob_no_coalescence_in_interval):
        coalintervals.append(prod(prob_no_coalescence_in_interval[:index])*prob_coalescence_in_interval[index]/num_lineages[index])#the number of lineages 
    return coalintervals

In [6]:
def lineages_in_each_interval(tree):  
    #the nodes corresponding to the intervals in each interval
    sorted_zipped_nodes = sorted(zip(tree.nodes(), tree.calc_node_root_distances(return_leaf_distances_only =False)), key=lambda branches: branches[1])

    nodes_in_current_interval =[]
    nodes_in_interval_set = []
    
    for (node, dist) in list(reversed(sorted_zipped_nodes)):
        #add the new node to the current interval
        nodes_in_current_interval.append(node)
        
        #find the children of the current node (it it has children)
        children =set(node.child_nodes())
        
        #remove the children of a node when the node is added
        nodes_in_current_interval = list(children.symmetric_difference(nodes_in_current_interval))
       
        #add the current interval to the set of intervals
        nodes_in_interval_set.append(list(nodes_in_current_interval))
       
    return nodes_in_interval_set


In [7]:
def pcoal_along_edge(tree, popsize):
    #probability of coalescing along a single edge conditioned on where that edge is in the tree
    #dictionary with nodes/lineages as keys and probabilities as values -but the pcoal is actually for the edge preceding the node
    edge_prob ={}
    pcoal = conditioned_prob_lineage_coalescence(tree, popsize)
    full_lin_set = lineages_in_each_interval(tree)

    #go through each node in each interval and sum the probability of coalescing across the intervals where the lineage is present
    for (index, interval) in enumerate(full_lin_set[:-1]):
        for node in interval:
            if node.edge not in edge_prob:
                edge_prob[node.edge] = pcoal[index]

            else:
                edge_prob[node.edge] += pcoal[index]

    return edge_prob

In [8]:
def calculate_cumulative_node_prob(tree, popsize):
    prob_lineage = pcoal_along_edge(tree, popsize)
    
    #dictionary to store the cumulative probability of a new sample coalescing (value) under each node (key)
    cumulative_node_prob ={}
    
    #look at each internal (non-tip) node
    for node in list(set(tree.leaf_nodes()).symmetric_difference(tree.nodes())):
        node_prob = 0
        
        #iterate through the nodes belonging to the subtree rooted at node
        for subtree_node in node.preorder_iter():
            
            #look at each edge of the node and add its probability to the cumulative node prob
            for edge in subtree_node.child_edge_iter():
                node_prob += prob_lineage[edge]
                
        cumulative_node_prob[node] = node_prob
    return cumulative_node_prob

In [None]:
tr = Tree.get(path="jittered-ebola.nex", schema="nexus")
popsize = 1
print zipped_sorted_intervals(tr), "\n"
print conditioned_prob_lineage_coalescence(tr, popsize), "\n"
print lineages_in_each_interval(tr), "\n"
print pcoal_along_edge(tr, popsize), "\n"
calculate_cumulative_node_prob(tr, popsize)

In [11]:
tr = Tree.get(path="toytree.nex", schema="nexus")
popsize = 50
print zipped_sorted_intervals(tr), "\n"
print conditioned_prob_lineage_coalescence(tr, popsize), "\n"
print lineages_in_each_interval(tr), "\n"
print pcoal_along_edge(tr, popsize), "\n"
calculate_cumulative_node_prob(tr, popsize)

[(<Node object at 0x5d65e80L: 'None' (<Taxon 0x5d65c18L '1'>)>, 2.0, 1), (<Node object at 0x5d65ef0L: 'None' (<Taxon 0x5d65c50L '2'>)>, 2.0, 2), (<Node object at 0x5bbd0f0L: 'None' (<Taxon 0x5d65cc0L '4'>)>, 2.0, 3), (<Node object at 0x5bbd080L: 'None' (<Taxon 0x5d65c88L '3'>)>, 6.0, 4), (<Node object at 0x5d65fd0L: 'None' (None)>, 4.0, 3), (<Node object at 0x5d65dd8L: 'None' (None)>, 2.0, 2), (<Node object at 0x5bbd160L: 'None' (<Taxon 0x5d65cf8L '5'>)>, 4.0, 3), (<Node object at 0x5d65f60L: 'None' (None)>, 12.0, 2)] 

[0.03921056084767682, 0.036934501217582856, 0.03343085855020138, 0.07496890127664543, 0.034619789994953194, 0.014719102008165941, 0.02513912716852866, 0.05299621831507181] 

[[<Node object at 0x5d65e80L: 'None' (<Taxon 0x5d65c18L '1'>)>], [<Node object at 0x5d65e80L: 'None' (<Taxon 0x5d65c18L '1'>)>, <Node object at 0x5d65ef0L: 'None' (<Taxon 0x5d65c50L '2'>)>], [<Node object at 0x5d65e80L: 'None' (<Taxon 0x5d65c18L '1'>)>, <Node object at 0x5d65ef0L: 'None' (<Taxon 0x5

{<Node object at 0x5d652b0L: 'None' (None)>: 0.8279551361769494,
 <Node object at 0x5d65dd8L: 'None' (None)>: 0.3991186629264425,
 <Node object at 0x5d65f60L: 'None' (None)>: 0.2829858074436687,
 <Node object at 0x5d65fd0L: 'None' (None)>: 0.18336866110349226}