# Notebook 4: BPP species tree inference

Estimate parameters of a species tree model under the multi-species coalescent. 

In [1]:
# conda install ipyrad -c ipyrad
# conda install bpp -c eaton-lab
# conda install toytree -c eaton-lab

In [2]:
import ipyrad.analysis as ipa
import ipyparallel as ipp
import pandas as pd
import toytree

In [3]:
print('ipyrad', ipa.__version__)
print('toytree', toytree.__version__)

ipyrad 0.8.0-dev
toytree 0.1.19


### Connect to parallel client

In [5]:
ipyclient = ipp.Client()

### BPP fixed species tree parameter inference (algorithm 00)

#### Species tree hypothesis

In [6]:
sptree = "((bran,fusi),((sagr,oleo),(virg,(mini,gemi))));"
toytree.tree(sptree).draw(use_edge_lengths=False);

#### Set up clade definitions

In [7]:
# which samples are in which clade
imap = {
    "virg" : ["TXWV2", "SCCU3", "LALC2", "FLSF33", "FLBA140"],
    "sagr" : ["CUCA4", "CUMM5", "CUSV6", "CUVN10"],
    "fusi" : ["TXGR3", "TXMD3", "MXED8", "MXGT4"],
    "gemi" : ["FLSF54", "FLSF54", "FLWO6", "FLCK18", "FLAB109"],
    "bran" : ["BJSB3", "BJSL25", "BJSL25", "BJVL19", "BJVL19"],
    "oleo" : ["BZBB1", "CRL0001", "CRL0030", "MXSA3017"],
    "mini" : ["FLCK216", "FLMO62", "FLSA185", "FLSF47"],
}

# minimum samples in each clade for loci to be included in analysis
minmap = {name: 2 for name in imap}

#### BPP -- algorithm 00 -- denovo data set

In [8]:
denovo = ipa.bpp(
    data="../analysis-ipyrad/denovo_outfiles/denovo.loci",
    name="denovo", 
    workdir="../analysis-bpp",
    imap=imap, 
    minmap=minmap,
    guidetree=sptree,
)

In [9]:
denovo.params.burnin = 1000
denovo.params.nsample = 100000
denovo.params.sampfreq = 100
denovo.filters.maxloci = 100

In [10]:
denovo.params

binary          bpp                 
burnin          1000                
cleandata       0                   
delimit_alg     (0, 5)              
finetune        (0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01)
infer_delimit   0                   
infer_sptree    0                   
nsample         100000              
sampfreq        100                 
seed            12345               
tauprior        (2, 2000, 1)        
thetaprior      (2, 2000)           
usedata         1                   

In [12]:
# same settings as denovo but with no data (prior only)
ndenovo = denovo.copy("ndenovo")
ndenovo.params.usedata = 0

#### BPP -- algorithm 00 -- reference data set

In [13]:
# reference data set
reference = denovo.copy("reference")
reference.data = "../analysis-ipyrad/reference_outfiles/reference.loci"

# no-data reference data set
nreference = reference.copy("nreference")
nreference.params.usedata = 0

### Run analyses in parallel

In [14]:
# start denovo job replicates running
denovo.run(ipyclient=ipyclient, nreps=10, force=True, randomize_order=True)
ndenovo.run(ipyclient=ipyclient, nreps=1, force=True, randomize_order=True)

# start reference job replicates running
reference.run(ipyclient=ipyclient, nreps=10, force=True, randomize_order=True)
nreference.run(ipyclient=ipyclient, nreps=1, force=True, randomize_order=True)

submitted 10 bpp jobs [denovo] (100 loci)
submitted 1 bpp jobs [ndenovo] (100 loci)
submitted 10 bpp jobs [reference] (100 loci)
submitted 1 bpp jobs [nreference] (100 loci)


### Summarize results

In [None]:
denovo.summarize_results()

In [17]:
# no-data statistics
btable = pd.read_csv(bdata.files.mcmcfiles[0], sep="\t", index_col=0)
btable.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
theta_1bran,1000.0,0.000564,0.000347,0.000236,0.000306,0.00037,0.00095,0.001448
theta_2fusi,1000.0,0.001175,0.00077,0.000487,0.000629,0.00076,0.00179,0.003803
theta_3gemi,1000.0,0.000527,0.000338,0.000217,0.000281,0.000339,0.000872,0.001509
theta_4mini,1000.0,0.000843,0.000537,0.000348,0.00045,0.000543,0.001417,0.00225
theta_5oleo,1000.0,0.000656,0.000415,0.000271,0.000351,0.000423,0.001088,0.001727
theta_6sagr,1000.0,0.001027,0.000655,0.000435,0.000562,0.000679,0.001483,0.003527
theta_7virg,1000.0,0.000794,0.000465,0.000343,0.000444,0.000536,0.001316,0.001952
theta_8branfusisagroleovirgminigemi,1000.0,0.004582,0.001861,0.001095,0.003015,0.005273,0.005897,0.00765
theta_9branfusi,1000.0,0.002451,0.00078,0.001105,0.001836,0.002246,0.002924,0.005494
theta_10sagroleovirgminigemi,1000.0,0.003086,0.000623,0.001623,0.002607,0.00297,0.003498,0.00535
