In [3]:
from ete3 import Tree

# generate 2 random trees
t1 = Tree()
t2 = Tree()
t1.populate(5)
t2.populate(5)
# root both to the same tip name
root = t1.get_leaf_names()[0]
t1.set_outgroup(root)
t2.set_outgroup(root)

print (t1)
print (t2)


   /-aaaaaaaaaa
--|
  |   /-aaaaaaaaab
   \-|
     |   /-aaaaaaaaac
      \-|
        |   /-aaaaaaaaad
         \-|
            \-aaaaaaaaae

   /-aaaaaaaaaa
  |
--|      /-aaaaaaaaab
  |   /-|
  |  |   \-aaaaaaaaac
   \-|
     |   /-aaaaaaaaad
      \-|
         \-aaaaaaaaae


In [1]:
from ete3 import Tree

t1 = Tree(r"C:\Users\awila\Downloads\testdata\Testdata\tree1.new")
t2 = Tree(r"C:\Users\awila\Downloads\testdata\Testdata\tree2.new")


# root both to the same tip name
root = t1.get_leaf_names()[0]
t1.set_outgroup(root)
t2.set_outgroup(root)



print (t1)
print (t2)


   /-seq2
  |
  |      /-seq10
--|   /-|
  |  |   \-seq7
  |  |
   \-|   /-seq3
     |  |
     |  |         /-seq5
     |  |      /-|
      \-|     |   \-seq4
        |   /-|
        |  |  |   /-seq1
        |  |   \-|
         \-|     |   /-seq8
           |      \-|
           |         \-seq6
           |
            \-seq9

   /-seq2
  |
  |      /-seq8
--|   /-|
  |  |   \-seq6
  |  |
   \-|      /-seq5
     |   /-|
     |  |   \-seq4
      \-|
        |   /-seq9
        |  |
         \-|      /-seq1
           |   /-|
           |  |  |   /-seq10
            \-|   \-|
              |      \-seq7
              |
               \-seq3


In [2]:
#Step 1: Rooting the two input trees at the same leaf

def get_node_list(t):
    node_list = []
    for node in t : 
        node_list.append(node.name)
    return node_list

In [4]:
t1.set_outgroup(get_node_list(t1)[0])
t2.set_outgroup(get_node_list(t1)[0])

In [6]:
def step1(t1,t2):
    t1_nodes = get_node_list(t1)
    root = t1_nodes[0]
    t1.set_outgroup(root)
    t2.set_outgroup(root)
    return t1,t2

In [7]:
print(t1)
print(t2)


   /-seq2
  |
  |      /-seq10
--|   /-|
  |  |   \-seq7
  |  |
   \-|   /-seq3
     |  |
     |  |         /-seq5
     |  |      /-|
      \-|     |   \-seq4
        |   /-|
        |  |  |   /-seq1
        |  |   \-|
         \-|     |   /-seq8
           |      \-|
           |         \-seq6
           |
            \-seq9

   /-seq2
  |
  |      /-seq8
--|   /-|
  |  |   \-seq6
  |  |
   \-|      /-seq5
     |   /-|
     |  |   \-seq4
      \-|
        |   /-seq9
        |  |
         \-|      /-seq1
           |   /-|
           |  |  |   /-seq10
            \-|   \-|
              |      \-seq7
              |
               \-seq3


In [8]:
#Step 2: Make a Depth-First numbering of the leaves in T1
def step2(t1,t2):
    """Assume that t1 and t2 are rooted on the same node"""
    t1_depth_first_names = []
    for node in t1.traverse("preorder"):
        if node.name != "":
            t1_depth_first_names.append(node.name)
    return (t1_depth_first_names)

In [9]:
step2(t1,t2)

['seq2',
 'seq10',
 'seq7',
 'seq3',
 'seq5',
 'seq4',
 'seq1',
 'seq8',
 'seq6',
 'seq9']

In [89]:
def step21(t1,t2):
    """Assume that t1 and t2 are rooted on the same node"""
    t2_depth_first_names = []
    for node in t2.traverse("preorder"):
        if node.name != "":
            t2_depth_first_names.append(node.name)
    return (t2_depth_first_names)

In [90]:
step21(t1,t2)

['seq2',
 'seq8',
 'seq6',
 'seq5',
 'seq4',
 'seq9',
 'seq1',
 'seq10',
 'seq7',
 'seq3']

In [10]:
#Step 3:  Rename the leaves in T2 cf. the DF-numbering of leaves in T1

def step3(t1,t2):
    t1_ordering = step2(t1,t2)
    t2_reordering = []
    for node in t2.traverse("preorder"):
        if node.name != "":
            t2_reordering.append(t1_ordering.index(node.name))
    return t2_reordering

In [11]:
step3(t1,t2)

[0, 7, 8, 4, 5, 9, 6, 1, 2, 3]

In [12]:
depth_first_names = []
for node in t2.traverse("preorder"):
  # Do some analysis on node
    if node.name != "":
        depth_first_names.append(node.name)
depth_first_names

['seq2',
 'seq8',
 'seq6',
 'seq5',
 'seq4',
 'seq9',
 'seq1',
 'seq10',
 'seq7',
 'seq3']

In [81]:
#Step 4: Annotate internal nodes

t1_nodes_children = []
for node in t1.traverse("preorder"):
    if node.name == "":
        t1_nodes_children.append(node.get_leaf_names())

In [82]:
list_intervals = []
for l in t1_nodes_children:
    order_t1 = step2(t1,t2)
    m = order_t1.index(l[0])
    M = order_t1.index(l[0])
    for i in range(len(l)):
        if order_t1.index(l[i])<m:
            m= order_t1.index(l[i])
        if order_t1.index(l[i])>M : 
            M= order_t1.index(l[i])
    list_intervals.append("["+str(m)+","+str(M)+"]")
list_intervals

['[0,9]',
 '[1,9]',
 '[1,2]',
 '[3,9]',
 '[4,9]',
 '[4,8]',
 '[4,5]',
 '[6,8]',
 '[7,8]']

In [93]:
t2_nodes_children = []
for node in t2.traverse("preorder"):
    if node.name == "":
        t2_nodes_children.append(node.get_leaf_names())

In [94]:
list_intervals2 = []
for l in t2_nodes_children:
    order_t2 = step21(t1,t2)
    m = order_t2.index(l[0])
    M = order_t2.index(l[0])
    for i in range(len(l)):
        if order_t2.index(l[i])<m:
            m= order_t1.index(l[i])
        if order_t2.index(l[i])>M : 
            M= order_t2.index(l[i])
    list_intervals2.append("["+str(m)+","+str(M)+"]")
list_intervals2

['[0,9]',
 '[1,9]',
 '[1,2]',
 '[3,9]',
 '[3,4]',
 '[5,9]',
 '[6,9]',
 '[6,8]',
 '[7,8]']

In [97]:
def RF_dist(t1,t2):
    step1(t1,t2)
    order_t1 = step2(t1,t2) #step2
    order_t2 = step21(t1,t2)
    
    #skip step3 because I do not use it
    #step 4.1
    t1_nodes_children = []
    for node in t1.traverse("preorder"):
        if node.name == "":
            t1_nodes_children.append(node.get_leaf_names())
    list_intervals = []
    for l in t1_nodes_children:
        m = order_t1.index(l[0])
        M = order_t1.index(l[0])
        for i in range(len(l)):
            if order_t1.index(l[i])<m:
                m= order_t1.index(l[i])
            if order_t1.index(l[i])>M : 
                M= order_t1.index(l[i])
        list_intervals.append("["+str(m)+","+str(M)+"]")
    
    # Step 4.2 
    t2_nodes_children = []
    for node in t2.traverse("preorder"):
        if node.name == "":
            t2_nodes_children.append(node.get_leaf_names())
    list_intervals2 = []
    for l in t2_nodes_children:
        order_t2 = step21(t1,t2)
        m = order_t2.index(l[0])
        M = order_t2.index(l[0])
        for i in range(len(l)):
            if order_t2.index(l[i])<m:
                m= order_t1.index(l[i])
            if order_t2.index(l[i])>M : 
                M= order_t2.index(l[i])
        if M-m+1 == len(l):
            list_intervals2.append("["+str(m)+","+str(M)+"]")
        
    

    # Step 5 
    l1 = sorted(list_intervals)
    l2 = sorted(list_intervals2)
    i=0
    j=0
    share = 0
    while i<len(l1) and j<len(l2):
        if l1[i]==l2[j]:
            share+=1
            i+=1
            j+=1
        else : 
            i+=1
    return(2*len(l1)+2*len(l2)-2*share)

SyntaxError: 'return' outside function (<ipython-input-97-f0068ff7cd3a>, line 55)

In [80]:
t1 = Tree(r"C:\Users\awila\Downloads\testdata\Testdata\tree1.new")
t2 = Tree(r"C:\Users\awila\Downloads\testdata\Testdata\tree2.new")
RF_dist(t1,t2)

8

In [63]:
t1 = Tree(r"C:\Users\awila\Downloads\testdata\Testdata\tree2.new")
t2 = Tree(r"C:\Users\awila\Downloads\testdata\Testdata\tree1.new")
RF_dist(t1,t2)

8

In [74]:
t1 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_clustal.newick")
t2 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_kalign.newick")
RF_dist(t1,t2)

754

In [75]:
t1 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_kalign.newick")
t2 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_clustal.newick")
RF_dist(t1,t2)

776

In [51]:
t1 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_permuted_kalign.newick")
t2 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_permuted_clustal.newick")
RF_dist(t1,t2)

772

In [24]:
t1 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_permuted_clustal.newick")
t2 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_permuted_muscle.newick")
RF_dist(t1,t2)

780

In [25]:
t1 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_permuted_muscle.newick")
t2 = Tree(r"C:\Users\awila\Downloads\Tree\Tree\quicktree_permuted_clustal.newick")
RF_dist(t1,t2)

770