In [1]:
import os
os.environ['QT_QPA_PLATFORM']='offscreen'

import Bio
from Bio import Seq, SeqIO

In [2]:
import ete3
from ete3 import Tree, faces, TreeStyle, PhyloTree, NodeStyle, TextFace, AttrFace, SeqMotifFace

In [59]:
treefile = '/depot/jwisecav/data/ceratopteris/her7/08_phylogeny/HER7_simple.tre'
alnfile = '/depot/jwisecav/data/ceratopteris/her7/08_phylogeny/HER7.aln'
speciesfile = '/depot/jwisecav/data/ceratopteris/her7/08_phylogeny/scripts/species_ids.txt'
colorfile = '/depot/jwisecav/data/ceratopteris/her7/08_phylogeny/scripts/lineage_colors.dmp'
branch_min = 95

In [60]:
seqDict = {}
alnLen = 0
for record in SeqIO.parse(alnfile, "fasta"):
    name = record.id
    sequence = str(record.seq)
    alnLen = len(sequence)
    seqDict[name] = sequence

In [61]:
code2name = {}
code2lineage = {}
fi = open(speciesfile)

for line in fi:
    if line[0] == '#':
        continue
    name, spid, lineage = line.rstrip().split('\t')
    #print(name, spid, lineage)
    code2name[spid] = name
    
    code2lineage[spid] = lineage

fi.close()

In [62]:
lin2color = {}
linorder = {}
fi = open(colorfile)

for line in fi:
    if line[0] == '#':
        continue

    order, color_desc, color, cat, lineage, taxids = line.rstrip().split('\t')  
    order = int(order)
    lin2color[lineage] = color
    linorder[order] = lineage

fi.close()

In [68]:
# add species and lineage information to all leaves
# color target gene red 
# color all ME034 gene green

#t = PhyloTree(treefile, alignment=alnfile, alg_format="fasta")
t = Tree(treefile, format=9)
#t.ladderize(direction=1)
leafSet = set()

#print("Custom mode:")
for n in t.get_leaves():
    leafSet.add(n.name)
    #print(n.name)
    tmp = n.name.split("_") 
    spid = tmp.pop(-1)
    tmp.pop(-1)
    speciesname = code2name[spid]
    genename = "_".join(tmp)
    #print(speciesname, genename, code2lineage[spid])
    
    n.add_features(lineage=code2lineage[spid])
    n.add_features(gene=genename)
    n.add_features(species=speciesname)

    #print("spid:", spid, "Species name:", n.species, "Species lineage:", n.lineage, "Color:", lin2color[n.lineage])

    # create a new label with a color attribute
    linF = AttrFace("lineage", fgcolor=lin2color[n.lineage], fsize=1)
    linF.background.color = lin2color[n.lineage]
    linF.margin_top = linF.margin_bottom = linF.margin_left = 10
    
    speciesF = AttrFace("species", fsize=10, fgcolor=lin2color[n.lineage], fstyle="italic")
    speciesF.margin_right = speciesF.margin_left = 10

    if spid == 'cri':
        geneF = AttrFace("gene", fsize=12, fgcolor="#228833", fstyle="bold")
        geneF.margin_right = geneF.margin_left = 5
    
    elif spid == 'ath':
        geneF = AttrFace("gene", fsize=12, fgcolor="#AA3377", fstyle="bold")
        geneF.margin_right = geneF.margin_left = 5
    
    else:
        geneF = AttrFace("gene", fsize=10, fgcolor="black")
        geneF.margin_right = geneF.margin_left = 5

    # labels aligned to the same level
    n.add_face(speciesF, 0, position='aligned')
    n.add_face(geneF, 0, position='branch-right')
    n.add_face(linF, 1, position='aligned')
    
    #my_motifs = [[0, alnLen, "seq", 2, 10, None, None, None]]
    #seqF = SeqMotifFace(seq=seqDict[n.name], motifs=my_motifs, gap_format="blank")
    #seqF.margin_right = seqF.margin_left = 5
    #n.add_face(seqF, 2, "aligned")
    
    seqF = SeqMotifFace(seqDict[n.name][947:1026], seq_format="seq", height=22)
    seqF.margin_right = seqF.margin_left = 5
    #seqF.margin_top = seqF.margin_bottom = 0
    n.add_face(seqF, 2, "aligned")
    

In [69]:
# add lineage information to all internal nodes
style = NodeStyle()

style["size"] = 0
style["hz_line_width"] = 2
style["vt_line_width"] = 2
t.set_style(style)

for n in t.iter_descendants("postorder"):
    #print(n.name)
                
    style["size"] = 0
    style["hz_line_width"] = 2
    style["vt_line_width"] = 2
    n.set_style(style)
    
    lineage_set = set()
    # get descendants, if all descendants are members of same lineage, color lineage color
    #print("NODE CHILDREN:")
    for k in n.iter_descendants("postorder"):
        for l in k.get_leaves():
            lineage_set.add(l.lineage)
            #print("Gene:", l.gene, "Species:", l.species, "Lineage:", l.lineage, "Color:", lin2color[l.lineage])
    
    #print(len(lineage_set), lineage_set)
    if len(lineage_set) == 1:
        node_lin = ''.join(lineage_set)
        #print(len(lineage_set), lineage_set, node_lin, lin2color[node_lin])
    
        newstyle = NodeStyle()
        newstyle["size"] = 0
        newstyle["hz_line_width"] = 2
        newstyle["vt_line_width"] = 2
        newstyle["vt_line_color"] = lin2color[node_lin]
        newstyle["hz_line_color"] = lin2color[node_lin]
        n.img_style = newstyle
        
    #fix branchlengths?
    
    if n.name not in leafSet and n.name[0] != 'n':
        #print(n.name)
        
        if float(n.name) >= branch_min:
            #branch_support = n.name.split('.')[0]
            branch_support = n.name
            #print(branch_support)
            n.add_features(bootstrap=branch_support)
            
            if len(lineage_set) == 1:
                node_lin = ''.join(lineage_set)
                supF = AttrFace("bootstrap", fgcolor=lin2color[node_lin], fsize=8)
                supF.margin_right = supF.margin_left = 3
                n.add_face(supF, 0, position='branch-bottom')
                
            else:
                supF = AttrFace("bootstrap", fgcolor="#000000", fsize=8)
                supF.margin_right = supF.margin_left = 3
                n.add_face(supF, 0, position='branch-bottom')
        

for n in t.get_leaves():
    
    leafstyle = NodeStyle()
    leafstyle["size"] = 0
    leafstyle["hz_line_width"] = 2
    leafstyle["vt_line_width"] = 2
    leafstyle["vt_line_color"] = lin2color[n.lineage]
    leafstyle["hz_line_color"] = lin2color[n.lineage]
    n.img_style = leafstyle
    
    

IndexError: string index out of range

In [70]:
# add legend

ts = TreeStyle()
ts.show_leaf_name = False
#ts.show_branch_support = True
ts.draw_guiding_lines = True

ts.title.add_face(TextFace("Taxonomy:", fsize=10), column=0)
for i in range(1, len(lin2color)+1):
    #print(linorder[i], lin2color[linorder[i]])
    ts.title.add_face(TextFace(linorder[i], fsize=10, fgcolor=lin2color[linorder[i]]), column=0)

In [71]:
# render image on notebook or save to file
t.render("/home/jwisecav/mytree.pdf", tree_style=ts)
#t.render("%%inline", tree_style=ts)

{'nodes': [[45.5, 56.75, 49.5, 60.75, 1, None],
  [93.5, 26.5, 97.5, 30.5, 2, None],
  [93.5, 87.0, 97.5, 91.0, 7, None],
  [141.5, 76.0, 145.5, 80.0, 8, None],
  [141.5, 98.0, 145.5, 102.0, 9, None],
  [45.5, 180.5, 49.5, 184.5, 10, None],
  [93.5, 153.0, 97.5, 157.0, 11, None],
  [141.5, 131.0, 145.5, 135.0, 12, None],
  [189.5, 120.0, 193.5, 124.0, 13, None],
  [189.5, 142.0, 193.5, 146.0, 14, None],
  [141.5, 175.0, 145.5, 179.0, 15, None],
  [189.5, 164.0, 193.5, 168.0, 16, None],
  [189.5, 186.0, 193.5, 190.0, 17, None],
  [93.5, 208.0, 97.5, 212.0, 18, None]],
 'faces': [[147.0, 1.0, 263.0, 23.0, 3, 'AT4G39400.1'],
  [554.5, 0.5, 575.5, 23.5, 3, None],
  [396.0, 3.0, 538.0, 21.0, 3, 'Arabidopsis thaliana'],
  [565.0, 11.0, 571.0, 13.0, 3, 'Angiosperm'],
  [579.5, 0.5, 1370.5, 23.5, 3, ''],
  [192.0, 23.0, 308.0, 45.0, 5, 'AT3G13380.1'],
  [554.5, 22.5, 575.5, 45.5, 5, None],
  [396.0, 25.0, 538.0, 43.0, 5, 'Arabidopsis thaliana'],
  [565.0, 33.0, 571.0, 35.0, 5, 'Angiosperm'],
 