In [26]:
from ete3 import Tree
import toytree
import toyplot
import toyplot.pdf
import math
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import funclib as flib
import statsmodels

### Figure 2

Read trees and alphas

In [27]:
groups = ["Mammals","Birds","Snakes"]
refs = ["Homo_sapiens", "Gallus_gallus", "Thamnophis_elegans"]
trees, alphas = [], []
data_dir = "../.."

# Iterate over groups and read trees & alphas
for i,g in enumerate(groups):
    
    alphas.append(pd.read_csv("{}/scripts/alphas/{}.{}.LM.tsv".format(data_dir, g, refs[i]),sep="\t"))
    phylo = toytree.tree("{}/trees/{}_phast.nh".format(data_dir, g),tree_format=1)
    species_to_remove = [sp for sp in phylo.get_tip_labels() if sp not in list(alphas[i].species)]
    phylo = phylo.drop_tips(species_to_remove)
    trees.append(phylo)

# Keep track of total number of species
total_species = sum([t.get_tip_labels() for t in trees],[])

# Palette
palette = pd.read_csv("{}/data/plot_colors.tsv".format(data_dir),sep="\t").set_index("Order").to_dict()["Color"]

# Latin to common names
latin2common = flib.df2dict(pd.read_csv("{}/data/Table_S2.csv".format(data_dir)), "Species", "Common_name")

# Chromosome level species
with open("{}/data/Species_to_chromosomes.txt".format(data_dir), "r") as fh:
    chrom_level = [line.split()[0] for line in fh]

Sex-specific generation time uncertainty

In [28]:
ratio_interval = [0.9, 1.1]
for a_df in alphas:
    a_df["lwr_genratio"] = a_df["alpha_lwr"].apply(lambda x: flib.miyata(flib.reverse_miyata(x)/flib.reverse_miyata(ratio_interval[1])))
    a_df["upr_genratio"] = a_df["alpha_upr"].apply(lambda x: flib.miyata(flib.reverse_miyata(x)/flib.reverse_miyata(ratio_interval[0])))

DNMs

In [31]:
dnm_file = "{}/data/Table_S2.csv".format(data_dir)
dnm_ci = alpha_from_pedigrees(dnm_file)

Calculate bounds

In [32]:
y_top_margin = 5
y_bottom_margin = 5
y_margin = 1
x_margin = 5
x_start = 5
x_end = 45

total_spaces = len(total_species) + len(trees)-1
unit = (100-2*y_top_margin)/total_spaces

bounds = []
for i,g in enumerate(groups):
    n_species = len(trees[i].get_tip_labels())
    if i==0:
        y_start = y_top_margin 
        y_end = y_start + unit*n_species
    elif i!=len(trees):
        y_start = y_end + y_margin
        y_end = y_start + unit*n_species
    else:
        y_start = y_end + y_margin
        y_end = y_start + unit*n_species
    bounds.append(("{:.9f}%".format(x_start),
                   "{:.9f}%".format(x_end),
                   "{:.9f}%".format(y_start),
                   "{:.9f}%".format(y_end)
                  ))

Plot

In [8]:
mut_type = "mod"
max_alpha_x = 6
style = {
    "tip_labels_align": True,
    "edge_style":{"stroke-width": 1},
    "tip_labels_style": {"font-size": "9px"},
    "edge_align_style": {"stroke-dasharray": "2,5", "stroke-width": 1}
}


# Padding for labels, it aligns trees
max_sp_char = max([len(latin2common[sp]) for sp in total_species])
y_slide = 0
max_x = 6

canvas = toyplot.Canvas(width=500, height=650)
axes = [[] for g in groups]

for i,(t,b) in enumerate(zip(trees, bounds)):
    
    species = t.get_tip_labels()
    c = palette[groups[i]]
    
    # Tree
    max_y = len(species)
    axes[i].append(canvas.cartesian(bounds=b,
                                    ymin=0, ymax=max_y))

    axes[i].append(canvas.cartesian(bounds=("{:.2f}%".format(x_end+x_margin),"95%") + b[2:],
                                    ymin=0, ymax=max_y,
                                    xmin=0, xmax=max_x))
    
    labels = [latin2common[sp] + " "*(max_sp_char-len(latin2common[sp])) if sp not in chrom_level else latin2common[sp] + "*" + " "*(max_sp_char-len(latin2common[sp])-1) for sp in t.get_tip_labels()]
    t.draw(axes=axes[i][0], tip_labels = labels, ybaseline=0, xbaseline=0, scalebar=True, **style)  
    
    #Alpha
    group_alpha = alphas[i]
    group_alpha = group_alpha[group_alpha["mut_type"]==mut_type].reset_index(drop=True)
    alpha_d = group_alpha.set_index("species").to_dict()
    point_alpha = [alpha_d["alpha"][sp] for leaf,sp in enumerate(species)]
    axes[i][1].scatterplot(point_alpha, np.arange(len(species)), color=c, size=3)
    
    for leaf,sp in enumerate(species):

        # CIs
        leaf_mod = leaf #+ y_slide
        axes[i][1].plot([alpha_d["alpha_lwr"][sp], alpha_d["alpha_upr"][sp]],                        
                        [leaf_mod, leaf_mod], color=c,                        
                        style={"stroke-width": 1.5}                       )
        axes[i][1].plot([alpha_d["lwr_genratio"][sp], alpha_d["upr_genratio"][sp]],
                        [leaf_mod, leaf_mod], color=c, opacity = 0.4,
                        style={"stroke-width": 1.5})                       
        # DNMs
        if sp in dnm_ci:
            lower_dnm, alpha, upper_dnm = dnm_ci[sp]
            if upper_dnm>=max_x:
                upper_dnm = max_x
            yrectmargin = 0.3
            axes[i][1].rectangle(lower_dnm,upper_dnm,leaf_mod-yrectmargin,leaf_mod+yrectmargin,opacity=0.15,color="gray")
            axes[i][1].rectangle(alpha-0.02,alpha+0.02,leaf_mod-yrectmargin,leaf_mod+yrectmargin,opacity=0.30,color="red")
        # Discontinous line 
        #axes[i][1].plot([0, alpha_d["lwr_genratio"][sp]], [leaf_mod, leaf_mod], color="gray", style={"stroke-width": 0.2, "stroke-dasharray":"3, 3"})

    # Mean alpha across groups
    group_alpha = np.mean(point_alpha)
    print(group_alpha,groups[i])
    line_space = 0.5
    axes[i][1].plot([group_alpha,group_alpha],[-line_space,len(species)-1+line_space], color=c,style={"stroke-width": 0.15})
    # Alpha = 1
    axes[i][1].plot([1,1],[-line_space,len(species)-1+line_space], color="gray", style={"stroke-width": 0.2, "stroke-dasharray":"2, 2"})     
                                                                
    # Only show axes if last group
    #axes[i][0].y.show = True
    #axes[i][0].x.show = True
    #axes[i][0].x.ticks.show = True
    #axes[i][0].y.show = True
    #axes[i][0].y.ticks.show = True
    #axes[i][1].y.ticks.show = True
    
    # "Invisible" datapoints so axes are preserved across groups
    axes[i][1].scatterplot(max_alpha_x,1, color="white")
    axes[i][1].scatterplot(0,0.5, color="white")
    #axes[i][0].hlines(np.arange(0, max_y, 1), style={"stroke": "red", "stroke-dasharray": "2,4"})
    #axes[i][1].hlines(np.arange(0, max_y, 1), style={"stroke": "red", "stroke-dasharray": "2,4"})
    
    #axes[i][0].show=False
    if i+1!=len(trees):
        axes[i][1].show=False
    if i+1==len(trees):
        axes[i][1].y.show = False
        axes[i][1].x.ticks.show = True
        axes[i][1].x.label.text = "\u03B1"
        
#toyplot.pdf.render(canvas, "../pdfs/Figure2_scalebar.pdf")

2.7595817135423397 Mammals
1.776707380333636 Birds
1.6902130056815778 Snakes


Plot in log scale

In [33]:
alphas_log = []
for df in alphas:
    selected_cols = ["alpha","alpha_lwr","alpha_upr","lwr_genratio","upr_genratio"]
    logdf = pd.DataFrame([np.log(df[c]) for c in selected_cols]).transpose()
    alphas_log.append(pd.concat([logdf, df[[c for c in df.columns if c not in selected_cols]]],axis=1))

alphas = alphas_log
dnm_ci_log = {k:[np.log(v0) for v0 in v] for k,v in dnm_ci.items()}
dnm_ci = dnm_ci_log

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [35]:
mut_type = "mod"
max_alpha_x = np.log(6)
style = {
    "tip_labels_align": True,
    "edge_style":{"stroke-width": 1},
    "tip_labels_style": {"font-size": "9px"},
    "edge_align_style": {"stroke-dasharray": "2,5", "stroke-width": 1}
}


# Padding for labels, it aligns trees
max_sp_char = max([len(latin2common[sp]) for sp in total_species])
y_slide = 0
max_x = np.log(6)
min_x = -0.25

canvas = toyplot.Canvas(width=500, height=650)
axes = [[] for g in groups]

for i,(t,b) in enumerate(zip(trees, bounds)):
    
    species = t.get_tip_labels()
    c = palette[groups[i]]
    
    # Tree
    max_y = len(species)
    axes[i].append(canvas.cartesian(bounds=b,
                                    ymin=0, ymax=max_y))

    axes[i].append(canvas.cartesian(bounds=("{:.2f}%".format(x_end+x_margin),"95%") + b[2:],
                                    ymin=0, ymax=max_y,
                                    xmin=min_x, xmax=max_x))
    
    labels = [latin2common[sp] + " "*(max_sp_char-len(latin2common[sp])) if sp not in chrom_level else latin2common[sp] + "*" + " "*(max_sp_char-len(latin2common[sp])-1) for sp in t.get_tip_labels()]
    t.draw(axes=axes[i][0], tip_labels = labels, ybaseline=0, xbaseline=0, scalebar=True, **style)  
    
    #Alpha
    group_alpha = alphas[i]
    group_alpha = group_alpha[group_alpha["mut_type"]==mut_type].reset_index(drop=True)
    alpha_d = group_alpha.set_index("species").to_dict()
    point_alpha = [alpha_d["alpha"][sp] for leaf,sp in enumerate(species)]
    axes[i][1].scatterplot(point_alpha, np.arange(len(species)), color=c, size=3)
    
    for leaf,sp in enumerate(species):

        # CIs
        leaf_mod = leaf #+ y_slide
        axes[i][1].plot([alpha_d["alpha_lwr"][sp], alpha_d["alpha_upr"][sp]],                        
                        [leaf_mod, leaf_mod], color=c,                        
                        style={"stroke-width": 1.5}                       )
        axes[i][1].plot([alpha_d["lwr_genratio"][sp], alpha_d["upr_genratio"][sp]],
                        [leaf_mod, leaf_mod], color=c, opacity = 0.4,
                        style={"stroke-width": 1.5})                       
        # DNMs
        if sp in dnm_ci:
            if sp!="Microcebus_murinus":                
                lower_dnm, alpha, upper_dnm = dnm_ci[sp]
                if upper_dnm>=max_x:
                    upper_dnm = max_x
                yrectmargin = 0.3
                axes[i][1].rectangle(lower_dnm,upper_dnm,leaf_mod-yrectmargin,leaf_mod+yrectmargin,opacity=0.15,color="gray")
                axes[i][1].rectangle(alpha-0.003,alpha+0.003,leaf_mod-yrectmargin,leaf_mod+yrectmargin,opacity=0.30,color="black")
        # Discontinous line 
        #axes[i][1].plot([0, alpha_d["lwr_genratio"][sp]], [leaf_mod, leaf_mod], color="gray", style={"stroke-width": 0.2, "stroke-dasharray":"3, 3"})

    # Mean alpha across groups
    group_alpha = np.mean(point_alpha)
    line_space = 0.5
    axes[i][1].plot([group_alpha,group_alpha],[-line_space,len(species)-1+line_space], color=c,style={"stroke-width": 0.15})
    # Alpha = 1
    axes[i][1].plot([np.log(1),np.log(1)],[-line_space,len(species)-1+line_space], color="gray", style={"stroke-width": 0.2, "stroke-dasharray":"2, 2"})     
                                                                
    # Only show axes if last group
    #axes[i][0].y.show = True
    #axes[i][0].x.show = True
    #axes[i][0].x.ticks.show = True
    #axes[i][0].y.show = True
    #axes[i][0].y.ticks.show = True
    #axes[i][1].y.ticks.show = True
    
    # "Invisible" datapoints so axes are preserved across groups
    axes[i][1].scatterplot(max_x,1, color="white")
    axes[i][1].scatterplot(min_x,0.5, color="white")
    #axes[i][0].hlines(np.arange(0, max_y, 1), style={"stroke": "red", "stroke-dasharray": "2,4"})
    #axes[i][1].hlines(np.arange(0, max_y, 1), style={"stroke": "red", "stroke-dasharray": "2,4"})
    
    #axes[i][0].show=False
    if i+1!=len(trees):
        axes[i][1].show=False
    if i+1==len(trees):
        axes[i][1].y.show = False
        axes[i][1].x.ticks.show = True
        #xlocations = np.log(np.linspace(1,4,5))
        #xlabels = ["" if x not in [1,2,4] else int(x) for x in np.linspace(1,4,5)]
        xticks = [1,2,3,4,5]
        xlocations = np.log(xticks)
        xlabels = xticks#["" if x not in [1,2,3,4,5] else int(x) for x in xticks]
        axes[i][1].x.ticks.locator = toyplot.locator.Explicit(locations=xlocations, labels=xlabels)
        axes[i][1].x.label.text = "\u03B1"

toyplot.pdf.render(canvas, "../pdfs/Figure2_log.pdf")