In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%load_ext rpy2.ipython

import ipyparallel as ipp
import matplotlib.pyplot as plt
import msprime
import numpy as np
import pandas as pd
import PIED
import toytree

from collections import Counter
from IPython.display import display, SVG

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 500)

ipyclient = ipp.Client(cluster_id="cannabis")
len(ipyclient)

40

# No speciation rate shifts

In [None]:
tree = PIED.Core("vartest")
tree.set_param("ntaxa", 100)
tree.set_param("birth_rate", 0.3)
tree.set_param("process", "rate")
tree.set_param("abundance_mean", 5e3)
tree.set_param("abundance_sigma", 0)
tree.set_param("growth_rate_mean", 5e-3)
tree.set_param("growth_rate_sigma", 5e-3)
tree.set_param("ClaDS", "False")
tree.set_param("ClaDS_sigma", 0.2)
tree.set_param("ClaDS_alpha", 0.95)
tree.set_param("sequence_length", 1000)
tree.set_param("mutation_rate", 1e-9)
tree.set_param("sample_size", 5)
_ = tree.get_params(verbose=True)


In [None]:
nsims=10
for r in [5e-3, 5e-4, 5e-5]:
    tree.set_param("growth_rate_sigma", r)
    tree.simulate(nsims=nsims, ipyclient=ipyclient)

In [None]:
simfile = "/home/isaac/PIED/jupyter-notebooks/default_PIED/vartest-SIMOUT.csv"
df = pd.read_csv(simfile, header=0, sep=" ")
sims = []
for rec in df["data(name:abundance:pi:r:lambda)"]:
# split the records for each species, separated by ','
    dat = rec.split(",")
    dat = {x:{"abundance":int(y), "pi":float(z), "r":float(aa), "lambda_":float(bb)} for x, y, z, aa, bb in map(lambda x: x.split(":"), dat)}
    sims.append(dat)
df = pd.DataFrame(sims)

In [None]:
large = df.iloc[:nsims]
med = df.iloc[nsims:nsims*2]
small = df.iloc[nsims*2:]
plt.figure(figsize=(12,8))
for r, dat in zip(["5e-3", "5e-4", "5e-5"], [large, med, small]):
    rs = []
    for row in dat:
        rs.extend(dat[row].apply(lambda x: x["r"]))
    print(np.std(rs))
    plt.hist(rs, bins=500, label=r, alpha=0.4)
plt.legend()

# speciation rate shifts (ClaDS)

In [None]:
tree = PIED.Core("vartest-clads")
tree.set_param("ntaxa", 100)
tree.set_param("birth_rate", 0.3)
tree.set_param("process", "rate")
tree.set_param("abundance_mean", 5e3)
tree.set_param("abundance_sigma", 0)
tree.set_param("growth_rate_mean", 5e-3)
tree.set_param("growth_rate_sigma", 5e-3)
tree.set_param("ClaDS", "True")
tree.set_param("ClaDS_sigma", 0.2)
tree.set_param("ClaDS_alpha", 0.95)
tree.set_param("sequence_length", 1000)
tree.set_param("mutation_rate", 1e-9)
tree.set_param("sample_size", 5)
_ = tree.get_params(verbose=True)


In [None]:
nsims=10
for r in [5e-3, 5e-4, 5e-5]:
    tree.set_param("growth_rate_sigma", r)
    tree.simulate(nsims=nsims, ipyclient=ipyclient)

In [None]:
simfile = "/home/isaac/PIED/jupyter-notebooks/default_PIED/vartest-clads-SIMOUT.csv"
df = pd.read_csv(simfile, header=0, sep=" ")
sims = []
for rec in df["data(name:abundance:pi:r:lambda)"]:
# split the records for each species, separated by ','
    dat = rec.split(",")
    dat = {x:{"abundance":int(y), "pi":float(z), "r":float(aa), "lambda_":float(bb)} for x, y, z, aa, bb in map(lambda x: x.split(":"), dat)}
    sims.append(dat)
df = pd.DataFrame(sims)

In [None]:
large = df.iloc[:nsims]
med = df.iloc[nsims:nsims*2]
small = df.iloc[nsims*2:]
plt.figure(figsize=(12,8))
for r, dat in zip(["5e-3", "5e-4", "5e-5"], [large, med, small]):
    rs = []
    for row in dat:
        rs.extend(dat[row].apply(lambda x: x["r"]))
    print(np.std(rs))
    plt.hist(rs, bins=500, label=r, alpha=0.4)
plt.legend()