In [1]:
from ete3 import Tree
import toytree
import toyplot
import toyplot.pdf
import itertools
import math
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
import sys
import statsmodels.stats.proportion
pd.options.mode.chained_assignment = None

Read trees and alphas

In [2]:
groups = ["Mammals","Birds","Snakes"]
refs = ["Homo_sapiens", "Gallus_gallus", "Thamnophis_elegans"]
trees, alphas = [], []
data_dir = "../.."
# Iterate over groups and read trees & alphas
for i,g in enumerate(groups):
    alphas.append(pd.read_csv("{}/scripts/alphas/{}.{}.LM.tsv".format(data_dir, g, refs[i]),sep="\t"))
    phylo = toytree.tree("{}/trees/{}_phast.nh".format(data_dir, g.replace("5","")),tree_format=1)
    species_to_remove = [sp for sp in phylo.get_tip_labels() if sp not in list(alphas[i].species)]
    phylo = phylo.drop_tips(species_to_remove)
    trees.append(phylo)

# Keep track of total number of species
total_species = sum([t.get_tip_labels() for t in trees],[])

# Palette
palette = pd.read_csv("{}/data/plot_colors.tsv".format(data_dir),sep="\t").set_index("Order").to_dict()["Color"]

# Latin to common names
latin2common = pd.read_csv("{}/data/latin2common_shortened.txt".format(data_dir),sep="\t").set_index("Species").to_dict()["Common_names"]

Sex-specific generation time uncertainty

In [3]:
def miyata(xar):
    """Alpha estimate from X/A subsitution rate ratio"""
    return (4-(3*xar))/((3*xar)-2)

def reverse_miyata(alpha):
    """Given alpha, return X/A ratio"""
    return (2*alpha+4)/float(3*alpha+3)

ratio_interval = [0.9, 1.1]
for a_df in alphas:
    a_df["lwr_genratio"] = a_df["alpha_lwr"].apply(lambda x: miyata(reverse_miyata(x)/reverse_miyata(ratio_interval[1])))
    a_df["upr_genratio"] = a_df["alpha_upr"].apply(lambda x: miyata(reverse_miyata(x)/reverse_miyata(ratio_interval[0])))

DNMs

In [4]:
dnm_file = "{}/data/dnm_est_noDups.tsv".format(data_dir)

def alpha_from_pedigrees(dnm_file):
    """Reads dnms from file and computes alpha and binomial CIs"""
    dnm_df = pd.read_csv(dnm_file, sep="\t")
    alpha_ped = {"_".join(r.Species.split()[:2]): [r["Pat DNMs"],r["Mat DNMs"]]
                 for i,r in dnm_df.iterrows()
                }
    dnm_ci = {}
    for sp in alpha_ped:
        paternal, maternal = alpha_ped[sp]
        alpha = paternal/maternal
        llimit, ulimit = statsmodels.stats.proportion.proportion_confint(paternal,
                                                                         paternal+maternal)
        dnm_ci[sp] = [llimit/(1-llimit),
                      alpha,
                      ulimit/(1-ulimit)
                     ]
    return dnm_ci
dnm_ci = alpha_from_pedigrees(dnm_file)

Calculate bounds

In [5]:
y_top_margin = 5
y_bottom_margin = 5
y_margin = 1
x_margin = 5
x_start = 5
x_end = 45

total_spaces = len(total_species) + len(trees)-1
unit = (100-2*y_top_margin)/total_spaces

bounds = []
for i,g in enumerate(groups):
    n_species = len(trees[i].get_tip_labels())
    if i==0:
        y_start = y_top_margin 
        y_end = y_start + unit*n_species
    elif i!=len(trees):
        y_start = y_end + y_margin
        y_end = y_start + unit*n_species
    else:
        y_start = y_end + y_margin
        y_end = y_start + unit*n_species
    bounds.append(("{:.9f}%".format(x_start),
                   "{:.9f}%".format(x_end),
                   "{:.9f}%".format(y_start),
                   "{:.9f}%".format(y_end)
                  ))

Plot

In [39]:
mut_type = "mod"
max_alpha_x = 6
style = {
    "tip_labels_align": True,
    "edge_style":{"stroke-width": 1},
    "tip_labels_style": {"font-size": "9px"},
    "edge_align_style": {"stroke-dasharray": "2,5", "stroke-width": 1}
}


# Padding for labels, it aligns trees
max_sp_char = max([len(latin2common[sp]) for sp in total_species])
y_slide = 0
max_x = 6

canvas = toyplot.Canvas(width=500, height=650)
axes = [[] for g in groups]

for i,(t,b) in enumerate(zip(trees, bounds)):
    
    species = t.get_tip_labels()
    c = palette[groups[i]]
    
    # Tree
    max_y = len(species)
    axes[i].append(canvas.cartesian(bounds=b,
                                    ymin=0, ymax=max_y))
    
    sloppy_align_max = [0, 0.18, 0.08]
    sloppy_align_min = [0, 0, 0]

    axes[i].append(canvas.cartesian(bounds=("{:.2f}%".format(x_end+x_margin),"95%") + b[2:],
                                    ymin=0, ymax=max_y,
                                    xmin=0, xmax=max_x))
    
    labels = [latin2common[sp] + " "*(max_sp_char-len(latin2common[sp])) for sp in t.get_tip_labels()]
    t.draw(axes=axes[i][0], tip_labels = labels, ybaseline=0, xbaseline=0, **style)  
    
    #Alpha
    group_alpha = alphas[i]
    group_alpha = group_alpha[group_alpha["mut_type"]==mut_type].reset_index(drop=True)
    alpha_d = group_alpha.set_index("species").to_dict()
    point_alpha = [alpha_d["alpha"][sp] for leaf,sp in enumerate(species)]
    axes[i][1].scatterplot(point_alpha, np.arange(len(species)), color=c, size=3)
    
    for leaf,sp in enumerate(species):

        # CIs
        leaf_mod = leaf #+ y_slide
        axes[i][1].plot([alpha_d["alpha_lwr"][sp], alpha_d["alpha_upr"][sp]],                        
                        [leaf_mod, leaf_mod], color=c,                        
                        style={"stroke-width": 1.5}                       )
        axes[i][1].plot([alpha_d["lwr_genratio"][sp], alpha_d["upr_genratio"][sp]],
                        [leaf_mod, leaf_mod], color=c, opacity = 0.4,
                        style={"stroke-width": 1.5})                       
        # DNMs
        if sp in dnm_ci:
            lower_dnm, alpha, upper_dnm = dnm_ci[sp]
            if upper_dnm>=max_x:
                upper_dnm = max_x
            yrectmargin = 0.3
            axes[i][1].rectangle(lower_dnm,upper_dnm,leaf_mod-yrectmargin,leaf_mod+yrectmargin,opacity=0.15,color="gray")
            axes[i][1].rectangle(alpha-0.02,alpha+0.02,leaf_mod-yrectmargin,leaf_mod+yrectmargin,opacity=0.30,color="red")
        # Discontinous line 
        #axes[i][1].plot([0, alpha_d["lwr_genratio"][sp]], [leaf_mod, leaf_mod], color="gray", style={"stroke-width": 0.2, "stroke-dasharray":"3, 3"})

    # Mean alpha across groups
    group_alpha = np.mean(point_alpha)
    axes[i][1].plot([group_alpha,group_alpha],[0,len(species)], color=c,style={"stroke-width": 0.15})
    # Alpha = 1
    axes[i][1].plot([1,1],[-0.2,len(species)], color="gray", style={"stroke-width": 0.2, "stroke-dasharray":"2, 2"})     
                                                                
    # Only show axes if last group
    #axes[i][0].y.show = True
    #axes[i][1].y.show = True
    #axes[i][0].y.ticks.show = True
    #axes[i][1].y.ticks.show = True
    
    # "Invisible" datapoints so axes are preserved across groups
    axes[i][1].scatterplot(max_alpha_x,1, color="white")
    axes[i][1].scatterplot(0,0.5, color="white")
    #axes[i][0].hlines(np.arange(0, max_y, 1), style={"stroke": "red", "stroke-dasharray": "2,4"})
    #axes[i][1].hlines(np.arange(0, max_y, 1), style={"stroke": "red", "stroke-dasharray": "2,4"})
    
    axes[i][0].show=False
    if i+1!=len(trees):
        axes[i][1].show=False
    if i+1==len(trees):
        axes[i][1].y.show = False
        axes[i][1].x.ticks.show = True
        axes[i][1].x.label.text = "\u03B1"
        
#toyplot.pdf.render(canvas, "../pdfs/Figure2_27Oct.pdf")