## References
- ete3: http://etetoolkit.org/docs/latest/tutorial/tutorial_trees.html
- changes in ete4: https://github.com/etetoolkit/ete/releases/tag/4.1.0-beta

## Working with tree
In ete4, a tree is a set of nodes hirerachically connected. ete does not discriminate trees and nodes. A tree is represented by its root node. When a tree is created, the root node is returned (no matter the tree is rooted or not).

### Read tree

In [1]:
from ete4 import Tree

In [2]:
t = Tree("(A:1,(B:1,(E:1,D:1):0.5):0.5);" )
print(t)

─┬╴A
 ╰─┬╴B
   ╰─┬╴E
     ╰╴D


In [3]:
t = Tree("(A:1,(B:1.5,(E:0.3,D:0.4)Internal_1:0.2)Internal_2:0.8)Root;", parser=1)
print(t)

─┬╴A
 ╰─┬╴B
   ╰─┬╴E
     ╰╴D


In [4]:
t.write()

'(A:1,(B:1.5,(E:0.3,D:0.4):0.2):0.8);'

In [5]:
# add outfile= param to write to a file
t.write(parser=1)

'(A:1,(B:1.5,(E:0.3,D:0.4)Internal_1:0.2)Internal_2:0.8);'

### Tree attributes

In [6]:
tsub = t['Internal_2']
print(tsub)  # only print tsub and its descendents

─┬╴B
 ╰─┬╴E
   ╰╴D


In [7]:
tsub.dist # from Internal_2 to its parent

0.8

In [8]:
tsub.support

In [9]:
tsub.name

'Internal_2'

In [10]:
tsub.is_leaf

False

In [11]:
tsub.is_root

False

In [12]:
t.is_root

True

In [13]:
t['A'].is_leaf

True

In [14]:
tsub.root

<Tree 'Root' at 0x147392dc>

In [15]:
t.root

<Tree 'Root' at 0x147392dc>

In [16]:
len(t) # number of leaves

4

In [17]:
[n.name for n in t.leaves()]

['A', 'B', 'E', 'D']

In [18]:
print(len(tsub))
[n.name for n in tsub.leaves()]

3


['B', 'E', 'D']

In [19]:
'A' in t

True

In [20]:
'A' in tsub

False

### Browsing tree

In [21]:
print(t)

─┬╴A
 ╰─┬╴B
   ╰─┬╴E
     ╰╴D


In [22]:
[n.name for n in t.traverse('preorder')]

['Root', 'A', 'Internal_2', 'B', 'Internal_1', 'E', 'D']

In [23]:
[n.name for n in t.traverse('postorder')]

['A', 'B', 'E', 'D', 'Internal_1', 'Internal_2', 'Root']

In [24]:
[n.name for n in t.traverse('levelorder')]

['Root', 'A', 'Internal_2', 'B', 'Internal_1', 'E', 'D']

In [25]:
tsub

<Tree 'Internal_2' at 0x14739306>

In [26]:
tsub.up

<Tree 'Root' at 0x147392dc>

In [27]:
tsub.leaves()

<_cython_3_0_7.generator at 0x14724fc40>

In [28]:
tsub.children

[<Tree 'B' at 0x147392ea>, <Tree 'Internal_1' at 0x147392ff>]

In [29]:
list(tsub.ancestors())

[<Tree 'Root' at 0x147392dc>]

In [30]:
t.common_ancestor(['A', 'B'])  # find the most recent common ancestor

<Tree 'Root' at 0x147392dc>

In [31]:
t.common_ancestor(['E', 'D'])

<Tree 'Internal_1' at 0x147392ff>

In [32]:
[n.name for n in tsub.descendants()]

['B', 'Internal_1', 'E', 'D']

In [33]:
tsub.get_distance('Internal_2', 'B')

1.5

In [34]:
tsub.get_distance('Internal_2', 'Internal_1')

0.2

In [35]:
[n.name for n in t.search_nodes(dist=0.8)]

['Internal_2']

### Node annotation

In [36]:
t2 = Tree('((H:0.3,I:0.1):0.5, A:1, (B:0.4,(C:0.5,(J:1.3, (F:1.2, D:0.1):0.5):0.5):0.5):0.5);' )
print(t2)

 ╭─┬╴H
─┤ ╰╴I
 ├╴A
 ╰─┬╴B
   ╰─┬╴C
     ╰─┬╴J
       ╰─┬╴F
         ╰╴D


In [37]:
list(t2.children)  # unrooted trees has more than two nodes at "root" node.

[<Tree at 0x14745419>, <Tree 'A' at 0x14745420>, <Tree at 0x1474545f>]

In [38]:
list(t2.search_nodes(name='A'))[0]

<Tree 'A' at 0x14745420>

In [39]:
tsub2 = t2.common_ancestor(['C', 'J', 'F'])
print(tsub)

─┬╴B
 ╰─┬╴E
   ╰╴D


In [40]:
tsub2.add_props(name2='anc', name3 = 'alt', confidence=0.2)  # .add_feature(s) is deprecated

In [41]:
tsub2.get_prop('name2')

'anc'

In [42]:
tsub2.del_prop('name2')
tsub2.get_prop('name2')

In [43]:
tsub2.props

{'dist': 0.5, 'name3': 'alt', 'confidence': 0.2}

In [44]:
tsub2.props['confidence']

0.2

In [45]:
del tsub2.props['confidence']

In [46]:
tsub2.props

{'dist': 0.5, 'name3': 'alt'}

In [47]:
print(tsub2.to_str(props=['name', 'dist']))

       ╭╴C,0.5
╴⊗,0.5╶┤
       │       ╭╴J,1.3
       ╰╴⊗,0.5╶┤
               │       ╭╴F,1.2
               ╰╴⊗,0.5╶┤
                       ╰╴D,0.1


In [48]:
# properties are exported with New Hampshire eXtended format (NHX)
# https://phylosoft.org/NHX/
tsub2.write(props=['name3'])

'(C:0.5,(J:1.3,(F:1.2,D:0.1):0.5):0.5):0.5[&&NHX:name3=alt];'

In [49]:
x = tsub2.copy()

In [50]:
x = tsub2.copy('newick-extended')
x.write(props=['name3'])

'(C:0.5,(J:1.3,(F:1.2,D:0.1):0.5):0.5);'

In [51]:
x = tsub2.copy('deepcopy')
x.write(props=['name3'])

'(C:0.5,(J:1.3,(F:1.2,D:0.1):0.5):0.5);'

### Modify tree

In [52]:
t3 = t.copy()
print(t3)

─┬╴A
 ╰─┬╴B
   ╰─┬╴E
     ╰╴D


In [53]:
t3['A'].add_child(name='F', dist=0.05)

<Tree 'F' at 0x147455e0>

In [54]:
print(t3.to_str(props=['name']))

      ╭╴A╶╌╴F
╴Root╶┤
      │            ╭╴B
      ╰╴Internal_2╶┤
                   │            ╭╴E
                   ╰╴Internal_1╶┤
                                ╰╴D


In [55]:
t3['A'].add_child(name='G', dist=0.08)
print(t3.to_str(props=['name']))

          ╭╴F
      ╭╴A╶┤
      │   ╰╴G
╴Root╶┤
      │            ╭╴B
      ╰╴Internal_2╶┤
                   │            ╭╴E
                   ╰╴Internal_1╶┤
                                ╰╴D


In [56]:
t3['F'].add_child(name='H')
t3['F'].add_child(name='I')
print(t3.to_str(props=['name']))

              ╭╴H
          ╭╴F╶┤
      ╭╴A╶┤   ╰╴I
      │   │
╴Root╶┤   ╰╴G
      │
      │            ╭╴B
      ╰╴Internal_2╶┤
                   │            ╭╴E
                   ╰╴Internal_1╶┤
                                ╰╴D


In [57]:
t3['F'].remove_child('H')
print(t3.to_str(props=['name']))

          ╭╴F╶╌╴I
      ╭╴A╶┤
      │   ╰╴G
╴Root╶┤
      │            ╭╴B
      ╰╴Internal_2╶┤
                   │            ╭╴E
                   ╰╴Internal_1╶┤
                                ╰╴D


In [58]:
t3['F'].delete()
print(t3.to_str(props=['name']))

          ╭╴G
      ╭╴A╶┤
      │   ╰╴I
╴Root╶┤
      │            ╭╴B
      ╰╴Internal_2╶┤
                   │            ╭╴E
                   ╰╴Internal_1╶┤
                                ╰╴D


In [59]:
t3['I'].detach()
t3['G'].detach()
print(t3.to_str(props=['name']))

      ╭╴A
╴Root╶┤
      │            ╭╴B
      ╰╴Internal_2╶┤
                   │            ╭╴E
                   ╰╴Internal_1╶┤
                                ╰╴D


### Prune tree

In [60]:
t4 = Tree('((((H,K),(F,I)G),E),((L,(N,Q)O),(P,S)))Root;', parser=1)
print(t4.to_str(props=['name']))

                  ╭╴H
              ╭╴⊗╶┤
              │   ╰╴K
          ╭╴⊗╶┤
          │   │   ╭╴F
      ╭╴⊗╶┤   ╰╴G╶┤
      │   │       ╰╴I
      │   │
      │   ╰╴E
╴Root╶┤
      │       ╭╴L
      │   ╭╴⊗╶┤
      │   │   │   ╭╴N
      │   │   ╰╴O╶┤
      ╰╴⊗╶┤       ╰╴Q
          │
          │   ╭╴P
          ╰╴⊗╶┤
              ╰╴S


In [61]:
n = 1
for node in t4.traverse():
    node.dist = n
    n += 1

In [62]:
t4.get_distance('Root', 'L')

19.0

In [63]:
print(t4.to_str(props=['name', 'dist']))

                                  ╭╴H,14.0
                          ╭╴⊗,8.0╶┤
                          │       ╰╴K,15.0
                  ╭╴⊗,4.0╶┤
                  │       │       ╭╴F,16.0
          ╭╴⊗,2.0╶┤       ╰╴G,9.0╶┤
          │       │               ╰╴I,17.0
          │       │
          │       ╰╴E,5.0
╴Root,1.0╶┤
          │               ╭╴L,10.0
          │       ╭╴⊗,6.0╶┤
          │       │       │        ╭╴N,18.0
          │       │       ╰╴O,11.0╶┤
          ╰╴⊗,3.0╶┤                ╰╴Q,19.0
                  │
                  │       ╭╴P,12.0
                  ╰╴⊗,7.0╶┤
                          ╰╴S,13.0


In [64]:
# note that root node is always preserved when pruning, no matter how many nodes remains
# this may cause misleading results when pruning to a single node
?t4.prune

[0;31mSignature:[0m [0mt4[0m[0;34m.[0m[0mprune[0m[0;34m([0m[0mnodes[0m[0;34m,[0m [0mpreserve_branch_length[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Tree.prune(self, nodes, preserve_branch_length=False)
Prune the topology conserving only the given nodes.

        It will only retain the minimum number of nodes that conserve the
        topological relationships among the requested nodes. The root node is
        always conserved.

        :param nodes: List of node names or objects that should be kept.
        :param bool preserve_branch_length: If True, branch lengths
            of the deleted nodes are transferred (summed up) to its
            parent's branch, thus keeping original distances among nodes.

        Examples::

          t = Tree('(((((A,B)C)D,E)F,G)H,(I,J)K)root;')
          print(t.to_str(props=['name']))
          #                       ╭╴A
          #               ╭╴D╶╌╴C╶┤
          #           ╭╴

In [65]:
t41 = t4.copy()
t41.prune(nodes=['L', 'N', 'Q'], preserve_branch_length=True)
print(t41.to_str(props=['name', 'dist']))

           ╭╴L,10.0
╴Root,10.0╶┤
           │        ╭╴N,18.0
           ╰╴O,11.0╶┤
                    ╰╴Q,19.0


In [66]:
t43 = t4.copy()
t43.prune(nodes=['H', 'L'], preserve_branch_length=True)
print(t43.to_str(props=['name', 'dist']))

          ╭╴H,28.0
╴Root,1.0╶┤
          ╰╴L,19.0


In [67]:
t44 = t4.copy()
t44.prune(nodes=['H', 'L', 'N'], preserve_branch_length=True)
print(t44.to_str(props=['name', 'dist']))

          ╭╴H,28.0
╴Root,1.0╶┤
          │       ╭╴L,10.0
          ╰╴⊗,9.0╶┤
                  ╰╴N,29.0


In [68]:
# 有两个以上节点时，如果这几个节点的最近祖先（MRCA）不是root，那么MRCA到root之间的距离都加到了Root.dist
# 如果只有一个节点，那么这个节点跟root之间的距离都加到了这个节点的dist属性上！
t42 = t4.copy()
t42.prune(nodes=['H'], preserve_branch_length=True)
print(t42.to_str(props=['name', 'dist']))

╴Root,1.0╶╌╴H,28.0


In [69]:
t4.get_distance('Root', 'H')

28.0

In [70]:
# 此时可以拷贝leaf节点或者用subset
tmp = t4['H'].copy()
print(tmp.to_str(props=['name', 'dist']))

╴H,14.0


In [71]:
tmp.up

In [72]:
tmp = t4['H']  # tmp仍然包含完整的树的信息
print(tmp.to_str(props=['name', 'dist']))

╴H,14.0


In [73]:
tmp.up

<Tree at 0x14745657>

### Misc

In [74]:
# resolve polytomies
x = Tree("(( (a, b, c), (d, e, f, g)), (f, i, h));")
print(x)

     ╭╴a
   ╭─┼╴b
 ╭─┤ ╰╴c
 │ │ ╭╴d
 │ ╰─┼╴e
─┤   ├╴f
 │   ╰╴g
 │ ╭╴f
 ╰─┼╴i
   ╰╴h


In [75]:
x.resolve_polytomy(recursive=False)
print(x)

     ╭╴a
   ╭─┼╴b
 ╭─┤ ╰╴c
 │ │ ╭╴d
 │ ╰─┼╴e
─┤   ├╴f
 │   ╰╴g
 │ ╭╴f
 ╰─┼╴i
   ╰╴h


In [76]:
# set root
x = Tree('(A,(H,F),(B,(E,D)));')
print(x)

x.set_outgroup(x.common_ancestor(['E', 'D']))
print(x)

 ╭╴A
─┼─┬╴H
 │ ╰╴F
 ╰─┬╴B
   ╰─┬╴E
     ╰╴D
 ╭─┬╴E
─┤ ╰╴D
 ╰─┬╴B
   ╰─┬╴A
     ╰─┬╴H
       ╰╴F


In [77]:
# mid-point rooting
x = Tree('(A,(H,F),(B,(E,D)));')

for n in x.traverse():
    if not n.is_root:
        n.dist = 0.1

r = x.get_midpoint_outgroup()
x.set_outgroup(r)
print(x)

 ╭─┬╴B
─┤ ╰─┬╴E
 │   ╰╴D
 ╰─┬╴A
   ╰─┬╴H
     ╰╴F


In [78]:
# graft tree: by adding a child consisting of a tree
x['A'].add_child(x.common_ancestor(['E', 'D']))
print(x)

 ╭─┬╴B
 │ ╰─┬╴E
─┤   ╰╴D
 │ ╭─╌─┬╴E
 ╰─┤   ╰╴D
   ╰─┬╴H
     ╰╴F


## Phylogenetic Tree

In [79]:
from ete4 import PhyloTree

In [80]:
fasta_txt = """
>seqA
MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH
>seqB
MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH
>ptAB
MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH
>seqC
MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
>seqD
MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
"""

# 如果部分fasta中的id与tree中的node.name能够对应，则可以link
x = PhyloTree("(((seqA,seqB)ptAB,seqC),seqD);", parser=1)
x.link_to_alignment(alignment=fasta_txt, alg_format="fasta")

# or 
x = PhyloTree("(((seqA,seqB)ptAB,seqC),seqD);", parser=1, alignment=fasta_txt, alg_format="fasta")
print(x)

   ╭─┬╴seqA
 ╭─┤ ╰╴seqB
─┤ ╰╴seqC
 ╰╴seqD


In [81]:
x.props

{}

In [82]:
x['ptAB'].props

{'name': 'ptAB',
 'sequence': 'MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH'}

In [83]:
for n in x.leaves():
    print(n.get_prop('sequence'), n.name)

MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH seqA
MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH seqB
MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH seqC
MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH seqD
