## 利用ETE进行树型分析/可视化实战

## ETE数据结构的操作

### 读取Newick格式的树

In [None]:
from ete3 import Tree

# 以newick格式的字符串读取树 
t = Tree("(A:1,(B:1,(E:1,D:1):0.5):0.5);")

# 以newick文件读取树
t = Tree("data/example_tree_1.nw")

# 生成随机树, 可任意定义node的数量和名称
t = Tree()
t.populate(10,names_library=['A','B','C','D','E','F','G','H','I','J'], random_branches=True, reuse_names=False)
# 打印树
print(t) #若直接打印树则只显示拓扑结构

In [None]:
# 渲染树
t.render('%%inline')

### 写出newick树文件

In [None]:
# 默认格式fromat=0
t.write(outfile='data/example_tree_1.printed.nw')

# 选择输出格式=9，
t.write(outfile='data/example_tree_1.printed9.nw', format=9)

# 也可以选择打开GUI交互式进行交互式操作
# 打开GUI
t.show()

### 树Node的属性(attributes)介绍

In [None]:
from ete3 import Tree
t = Tree("data/example_tree_1.nw")
print(t)
#         /-G
#      /-|
#     |   \-F
#     |
#     |      /-E
#   /-|   /-|
#  |  |  |  |   /-D
#  |  |  |   \-|
#  |  |  |     |   /-C
#  |   \-|      \-|
#  |     |        |   /-B
#--|     |         \-|
#  |     |            \-A
#  |     |
#  |      \-J
#  |
#  |   /-I
#   \-|
#      \-H

# 树中的所有nodes都拥有以下三个属性
print(t.name)
# ''
print(t.dist)
# 0.0
print(t.support)
# 1.0

# 获取不同结点(node)信息

# 在载入树后，变量默认为树的根(root)
print(t.is_root())
# True
# 获取下一级子结点
children = t.get_children())
print(childer[0])
#       /-G
#    /-|
#   |   \-F
#   |
#   |      /-E
# --|   /-|
#   |  |  |   /-D
#   |  |   \-|
#   |  |     |   /-C
#    \-|      \-|
#      |        |   /-B
#      |         \-|
#      |            \-A
#      |
#       \-J

print(childer[1])
#    /-I
# --|
#    \-H

# 获取再下一级结点
ch1, ch2 = children
grandchildren1 = ch1.get_children()
grandchildren2 = ch2.get_children()

# grandchildren2[0]和grandchildren2[1]已经是最后一级的结点
print(grandchildren2[0].is_leaf())
# True
print(grandchildren2[0])
# --I
print(grandchildren2[1])
# --H

print t.is_leaf()
print t.get_tree_root()
print t.children[0].get_tree_root()
print t.children[0].children[0].get_tree_root()

# 也可以用其他方法获取叶节点
leaf_I = t.get_leaves_by_name("I")[0]
leaf_I = t.search_nodes(name="I")[0]

# 获取结点的共同祖先(common ancestor), 注意:若获取单个结点的共同祖先，将返回根
anc = t.get_common_ancestor("J", "B", "D")
print(anc)
#       /-E
#    /-|
#   |  |   /-D
#   |   \-|
#   |     |   /-C
# --|      \-|
#   |        |   /-B
#   |         \-|
#   |            \-A
#   |
#    \-J

# 使用traverse()历遍整棵树所有结点
num = 1
for node in t.traverse():
    #只返回叶节点
    if node.is_leaf():
        print(num, node.name)
        num +=1
        
# 也可以选择只历遍所有叶节点
for leaf in t.iter_leaves():
    print(leaf)
# 1 I
# 2 H
# 3 G
# 4 F
# 5 J
# 6 E
# 7 D
# 8 C
# 9 B
# 10 A

### 对结点进行注释

In [None]:
import random
from ete3 import Tree

# 载入树
t = Tree("data/example_tree_1.nw")

# 对特定的叶节点进行注释
for leaf in t.traverse():
    if leaf.name in "AEIOU":
        leaf.add_features(vowel=True, confidence=random.random())
    else:
        leaf.add_features(vowel=False, confidence=random.random())

print("This tree has", [leaf.name for leaf in t.iter_leaves() if leaf.vowel==True], "vowel leaves")
# This tree has ['E', 'A', 'I'] vowel leaves

# 现在我们可以利用结点的属性对其进行简单的计算，
# 例如, 在结点J，B和D的共同祖先下的结点中，找出所有支长大于0.5的集合

ancestor = t.get_common_ancestor("J", "B", "D")
matches = [leaf for leaf in ancestor.traverse() if leaf.dist>0.5]

# 把符合条件的信息添加到ancestor结点的属性中
ancestor.add_feature("long_branch_nodes", matches)

print("These are nodes under ancestor with long branches", [n.name for n in ancestor.long_branch_nodes])
# These are nodes under ancestor with long branches ['', 'E', '', 'C', '', 'B', 'A']

### 树与树进行比较

In [None]:
# compare()函数能用于比较两棵树的拓扑结构，其中使用了不同计算距离的方法，如Robinson Foulds distance

from ete3 import Tree
t1 = Tree("data/example_tree_1.nw")

# 导入比较树
t2 = Tree("data/example_tree_2.nw")
print(t2)
#          /-D
#       /-|
#      |   \-C
#    /-|
#   |  |   /-B
#   |   \-|
#   |      \-A
#   |
# --|      /-J
#   |   /-|
#   |  |  |   /-I
#   |  |   \-|
#   |  |     |   /-H
#    \-|      \-|
#      |         \-G
#      |
#      |   /-F
#       \-|
#          \-E

# 对两树进行比较计算
rf, max_rf, common_leaves, parts_t1, parts_t2, discard_t1, discard_t2 = t1.robinson_foulds(t2)

# 输出结果为:
# RF距离
# 最大RF距离
# 两树共同拥有的叶节点
# t1所拥有t2没有的partition
# t2所拥有t1没有的partition
# t1舍弃的partition
# t2舍弃的partition

print("RF distance is %s over a total of %s" %(rf, max_rf))
# RF distance is 12 over a total of 16

# 在这里RF=12，即为两树之间有12个不相同的partition

### 计算进化分支(branch)的长度

In [None]:
# 使用get_distance()方法可计算树中两个结点之间的分支长度，该方法可以用于计算: a)两个叶节点的分支长度(通过传递两叶节点名称作为参数); b)当前结点到目标结点的分支长度（仅传递目标结点为参数）

from ete3 import Tree
t1 = Tree("data/example_tree_1.nw")

# 如下图，计算I到A的距离
print(t.get_distance("I","A")) 
# 4.4721491

# 将参数topology_only设为True可计算两者之间的结点树（设分支长度为1）
print(t.get_distance("I","A", topology_only=True))
# 8.0

# 当只输入一个叶节点，即计算叶节点到根的距离
print(t.get_distance("I"))
# 0.2482741

### 对树结构进行添加、删除与修剪

添加

In [None]:
from ete3 import Tree

# 通过add_child()和add_sister()为树添加分支或结点
t = Tree()
ch1 = t.add_child(name="A", dist=0.9, support=70)
ch2 = t.add_child(name="B", dist=0.5, support=80)
ch3 = ch2.add_child(name="C", dist=0.3, support=10)
ch4 = ch2.add_child(name="D", dist=0.8, support=90)
t.show()

删除

In [None]:
# 通过detach()方法对目标结点进行删除
leaf_C = t.get_leaves_by_name("C")[0]
removed_node = leaf_C.detach()
print(t)
#       /-C
# -- /-|
#       \-D

修剪

In [None]:
# 通过prune()方法可对指定树叶节点进行修剪
from ete3 import Tree
t = Tree("data/example_tree_1.nw")
print(t)
#         /-G
#      /-|
#     |   \-F
#     |
#     |      /-E
#   /-|   /-|
#  |  |  |  |   /-D
#  |  |  |   \-|
#  |  |  |     |   /-C
#  |   \-|      \-|
#  |     |        |   /-B
#--|     |         \-|
#  |     |            \-A
#  |     |
#  |      \-J
#  |
#  |   /-I
#   \-|
#      \-H


t.prune(['I','A','F'], preserve_branch_length=True)
# 使用preserve_branch_length参数保留原来叶节点分支支长

print(t)
#      /-F
#   /-|
#--|   \-A
#  |
#   \-I

t.show()

### ETE树的美化与注释

#### 树的基本可视化

In [None]:
#使用以下几种方法可以将树newcik文件生成多种图片文件
from ete3 import Tree
t = Tree("data/example_tree_1.nw")

# 使用show()可直接对树图片文件进行交互操作
t.show()

# 使用render()将树生成png/svg.pdf等图片文件
t.render("imgage.png")
t.render("imgage.svg")
t.render("imgage.pdf")

# 若需要在Jupyter Notebook上渲染, 则为
t.render("%%inline")

#### 通过TreeStyle类对树图像进行美化

修改用于渲染树枝的参数

In [None]:
from ete3 import Tree, TreeStyle
t = Tree("data/example_tree_1.nw")

# 定义TreeStyle
ts = TreeStyle()
ts.show_leaf_name = True
ts.show_branch_length = True
ts.show_branch_support = True
t.render(file_name="%%inline", tree_style=ts)

修改树型

In [None]:
from ete3 import Tree, TreeStyle
t = Tree("data/example_tree_1.nw")

ts = TreeStyle()
ts.mode = "c"  # 将树图改为圈图
ts.arc_start = -180 
ts.arc_span = 180
t.render(file_name="%%inline", w=500, tree_style=ts)

#### 通过NodeStyle()对树结点进行注释

In [None]:
# 用之前例的树示范 
from ete3 import Tree, NodeStyle, TreeStyle
t = Tree("data/example_tree_1.nw")

# 定义基本TreeStyle
ts = TreeStyle()
ts.show_leaf_name = True
ts.show_branch_length = True
ts.show_branch_support = True

# 把叶节点渲染成直径为10p的红色小球，非叶结点为棕色的矩形
for n in t.traverse():  
    # 历遍所有结点并输入对应NodeStyle的属性
    if n.is_leaf(): # Decide if leaf node
        nstyle = NodeStyle()
        nstyle["shape"] = "sphere"
        nstyle["size"] = 10
        nstyle["fgcolor"] = "red"
        n.set_style(nstyle) # 最后使用set_style()将NodeStyle类属性传递到结点中
    else:
        nstyle = NodeStyle()
        nstyle["shape"] = "square"
        nstyle["size"] = 15
        nstyle["fgcolor"] = "brown"
        n.set_style(nstyle)        

# 渲染根结点的作为图片的属性
t.img_style["size"] = 30
t.img_style["fgcolor"] = "black"
t.render(file_name="%%inline", w=500, tree_style=ts)

#### 通过Node faces属性进一步注释结点信息

In [None]:
# 参考以下例子
from ete3 import Tree, TextFace, NodeStyle, TreeStyle

t = Tree("((a,b),c);")

# 定义TextFace的文本信息
right_c0_r0 = TextFace("right_col0_row0")
right_c0_r1 = TextFace("right_col0_row1")
right_c1_r0 = TextFace("right_col1_row0")
right_c1_r1 = TextFace("right_col1_row1")
right_c1_r2 = TextFace("right_col1_row2")

top_c0_r0 = TextFace("top_col0_row0")
top_c0_r1 = TextFace("top_col0_row1")

bottom_c0_r0 = TextFace("bottom_col0_row0")
bottom_c0_r1 = TextFace("bottom_col0_row1")

aligned_c0_r0 = TextFace("aligned_col0_row0")
aligned_c0_r1 = TextFace("aligned_col0_row1")

aligned_c1_r0 = TextFace("aligned_col1_row0")
aligned_c1_r1 = TextFace("aligned_col1_row1")

all_faces = [right_c0_r0, right_c0_r1, right_c1_r0, right_c1_r1, right_c1_r2, 
             top_c0_r0, top_c0_r1, 
             bottom_c0_r0, bottom_c0_r1, 
             aligned_c0_r0, aligned_c0_r1,
             aligned_c1_r0, aligned_c1_r1]

# 修改Face的属性，添加边界
for f in all_faces:
    f.border.width = 1
    f.margin_bottom = 5
    f.margin_top = 5
    f.margin_right = 10

# 为树根结点添加定义好的Textface，并定义好方位
t.add_face(right_c0_r0, column=0, position="branch-right")
t.add_face(right_c0_r1, column=0, position="branch-right")

t.add_face(right_c1_r0, column=1, position="branch-right")
t.add_face(right_c1_r1, column=1, position="branch-right")
t.add_face(right_c1_r2, column=1, position="branch-right")

t.add_face(top_c0_r0, column=0, position="branch-top")
t.add_face(top_c0_r1, column=0, position="branch-top")

t.add_face(bottom_c0_r0, column=0, position="branch-bottom")
t.add_face(bottom_c0_r1, column=0, position="branch-bottom")

# 修改叶节点的结点的背景颜色
a = t.get_leaves_by_name("a")[0]
a.set_style(NodeStyle())
a.img_style["bgcolor"] = "lightgreen"

b = t.get_leaves_by_name("b")[0]
b.set_style(NodeStyle())
b.img_style["bgcolor"] = "indianred"

c = t.get_leaves_by_name("c")[0]
c.set_style(NodeStyle())
c.img_style["bgcolor"] = "lightblue"

t.set_style(NodeStyle())
t.img_style["bgcolor"] = "lavender"
t.img_style["size"] = 12

# 为各叶节点添加其对应的TextFace注释，并要求对齐
for leaf in t.iter_leaves():
    leaf.img_style["size"] = 12
    leaf.add_face(right_c0_r0, 0, "branch-right")
    leaf.add_face(aligned_c0_r1, 0, "aligned")
    leaf.add_face(aligned_c0_r0, 0, "aligned")
    leaf.add_face(aligned_c1_r1, 1, "aligned")
    leaf.add_face(aligned_c1_r0, 1, "aligned")

# 输出树图像
ts = TreeStyle()
ts.show_scale = False
t.render("face_positions.png", w=800, tree_style=ts)

#### 设计和运用layout布局函数

In [None]:
# 以example_tree_1为例，我们将定制布局函数，实现自动化注释:
# 1）修改叶节点名称
# 2）用不同颜色标记不同范围的bootstrap值分支

from ete3 import Tree, NodeStyle, TreeStyle
t = Tree("data/example_tree_1.nw")

# 准备叶节点对应名称
namemap = {'A': 'Austria', 
           'B': 'Britain', 
           'C':'China', 
           'D':'Dutch', 
           'E':'Egypt',
           'F':'France',
           'G':'German',
           'H':'Hungary',
           'I':'Italy',
           'J':'Jamaica'}

    # 准备叶节点对应颜色
colormap = { 'Austria': '#FF3933',
         'Britain': '#FCFF33',
         'China': '#FF3333',
         'Dutch': '#FF8633',
         'Egypt': '#5EFF33',
         'France': '#33FFEC',
         'German': '#3368FF',
         'Hungary':'#5E33FF',
         'Italy': '#F333FF',
         'Jamaica':'#581845'}

# 设计布局函数，自动化注释树结点的外观
def mylayout(node):
    # 更改结点分支外貌特征
    node.img_style['vt_line_width'] = 4
    node.img_style['hz_line_width'] = 4
    node.img_style['vt_line_type'] = 0
    node.img_style['hz_line_type'] = 0
    
    # 设置结点自展值阈值以及相应的颜色
    if node.support > 0.8:
        node.img_style["hz_line_color"] = 'darkblue'  # change horizontal branch color 
        node.img_style["vt_line_color"] = 'darkblue' # Change vertical branch color
    elif 0.5 < node.support < 0.8:
        node.img_style["hz_line_color"] = 'green'  # change horizontal branch color 
        node.img_style["vt_line_color"] = 'green' # Change vertical branch color
    else:
        node.img_style["hz_line_color"] = 'red'  # change horizontal branch color 
        node.img_style["vt_line_color"] = 'red' # Change vertical branch color
        
	
    if node.is_leaf():
		# 添加叶节点信息
        new_name = namemap[node.name]
        new_nameFace = faces.TextFace(new_name)
        faces.add_face_to_node(new_nameFace, node, column=1, aligned=True)

        # 设置叶节点外貌
        node.img_style["size"] = 12
        node.img_style["shape"] = "sphere"
        node.img_style["fgcolor"] = "blue"
        node.img_style["bgcolor"] = colormap[new_name]
        
        node.img_style["hz_line_color"] = 'blue'  # change branch color 
        
    # 设置其他非叶节点的节点外貌
    else:
        node.img_style["size"] = 2
        node.img_style["shape"] = "circle"
        node.img_style["fgcolor"] = "darkred"

# 基本树style设置
ts = TreeStyle()
ts.show_leaf_name = True
ts.show_branch_length = True
ts.show_branch_support = True
# 指定布局函数到TreeStyle类中
ts.layout_fn = mylayout 

t.img_style["size"] = 30
t.img_style["fgcolor"] = "black"
t.render(file_name="ete-api-layout.png",tree_style=ts)

### 综合实战: 系统发育树+热图+气泡效果（自定义叶节点名称、外观）

#### 读入矩阵文件

In [None]:
import pandas as pd
import numpy as np
PATH = "./data/"
data = pd.read_table(PATH+"diauxic.array", header=0, index_col=0)
data.index.name = "#Names"  #修改第一行的名字使其符合ETE的要求
data_mat = data.to_csv(None, sep="\t", float_format="%.2f") #将数据表转化成字符串
header = list(data.columns.values)  #获取列的名字用于标记
data

In [None]:
print(data_mat[:150]) # 取一部分查看格式转化为正确

In [None]:
header

#### 导入树与矩阵

In [None]:
# 导入必需modules，其中ClusterTree是加载热图的module
from ete3 import ClusterTree, TreeStyle, AttrFace, ProfileFace, TextFace
from ete3.treeview.faces import add_face_to_node

# 文件路径
PATH = "./data/"

# 导入树和上一步转化好格式的矩阵
t = ClusterTree(PATH+"diauxic.nw", data_mat)

# 矩阵文件已经负载到树的根结点属性中
array =  t.arraytable

# 一般化矩阵数据以创建热图颜色梯度
matrix_dist = [i for r in range(len(array.matrix))\
               for i in array.matrix[r] if np.isfinite(i)]
matrix_max = np.max(matrix_dist)
matrix_min = np.min(matrix_dist)
matrix_avg = matrix_min+((matrix_max-matrix_min)/2)

#### 设计布局

In [None]:
# 为热图设计一个有8列的AttrFace, 作为叶节点的一个属性
nameFace = AttrFace("name", fsize=8)

# 设计布局函数
def mylayout(node):
    profileFace  = ProfileFace(matrix_max, matrix_min, matrix_avg, 200, 14, "heatmap")
    cbarsFace = ProfileFace(matrix_max,matrix_min,matrix_avg, 200, 70,"cbars")

    # 如果结点为叶节点，加入热图
    if node.is_leaf():
        # And a line profile
        add_face_to_node(profileFace, node, 0, aligned=True)
        node.img_style["size"]=0
        add_face_to_node(nameFace, node, 1, aligned=True)

    # 如果结点为中间结点，根据其silhouette值创建气泡效果
    else:
        # silhouette值大于0，为绿色气泡
        if node.silhouette>0:
            validationFace = TextFace("Silh=%0.2f" %node.silhouette,
                                      "Verdana", 10, "#056600")
            node.img_style["fgcolor"]="#056600"
        # 否则为红色
        else:
            validationFace = TextFace("Silh=%0.2f" %node.silhouette,
                                      "Verdana", 10, "#940000")
            node.img_style["fgcolor"]="#940000"

        # 结点大小与silhouette成正比.
        node.img_style["shape"]="sphere"
        if node.silhouette<=1 and node.silhouette>=-1:
            node.img_style["size"]= 15+int((abs(node.silhouette)*10)**2)

        # 如果结点是内部的，绘制一个带有分区平均表达式的条形图
        add_face_to_node(validationFace, node, 0)
        if len(node)>100:
            add_face_to_node(cbarsFace, node, 1)
    
# 出图
ts = TreeStyle()
ts.layout_fn = mylayout
t.render("ete-cluster-final-result.pdf", tree_style=ts)