## Generation-time branch attraction

This notebook focuses on Ne effects

In [1]:
import toytree
import toyplot, toyplot.svg
import ipcoal
import numpy as np
import ipyrad.analysis as ipa

### Simulation scenario with deep divergences

In [2]:
# get an ultrametric imbalanced tree
tree = toytree.rtree.imbtree(8, treeheight=20e6)
tree.draw(ts='p');

### Make the Ne tree

In [3]:
# set gentime on nodes
ntree = tree.set_node_values(
    feature="Ne", 
    values={i: 100e6 for i in (3,4,5,10,11,12)},
    default=10e6,
)

# show tc (coalescent time units) for all node dists
ntree = ntree.set_node_values(
    feature="tc",
    values={
        i: node.dist / (2 * node.Ne) 
        for i, node in ntree.idx_dict.items()
    },
)

# draw tree showing Ne and g dists
ntree.draw(
    ts='p', 
    width=400, 
    node_sizes=0, 
    node_labels=ntree.get_node_values('tc', 0, 0),
    edge_type='c',
);

# save to file
ntree.write("./trees/n-tree.nwk")

### Simulate a concatenated sequence (chromosome)

In [None]:
# simulate a long chrom
model = ipcoal.Model(ntree)
model.sim_loci(1, 1e6)

### Confirm concatenation is in the anomaly zone 

Here we aim to select a scenario that will lie in the 'anomaly zone', where concatenation will yield incorrect results but a proper MSC method should infer a correct result.

In [4]:
# show the total number of genealogies
print('ngenealogies:', model.df.tidx.max())
print('mean len of gtree: {:.2f} bp'.format(model.df.nbps.mean()))

# infer concat tree
model.infer_gene_trees(inference_args={"T": '20'})

# draw the concat tree
etree = toytree.tree(model.df.inferred_tree[0]).root('r7')
etree.draw(ts='o');

# save the tree
etree.write("./trees/n-concat.nwk")

### Confirm ASTRAL-genealogy is not an anomaly
Astral gets the correct tree when given > ~500 trees as input.

In [7]:
# simulate a long chrom
model = ipcoal.Model(ntree)
model.sim_trees(10000, 1)

In [9]:
# load trees
mtre = toytree.mtree(model.df.genealogy)
print(len(mtre))

# convert bl units to E(subst.)
for tre in mtre.treelist:
    for node in tre.idx_dict.values():
        node.dist = node.dist * 1e-8

# infer sptree
ast = ipa.astral(
    data=[i.write() for i in mtre.treelist], 
    name="n-astral-genealogy",
    #workdir="..."
)
ast.run()

# draw tree
toytree.tree(ast.tree).root("r7").draw(ts='o');

10000
[astral.5.7.1.jar]
inferred tree written to (/home/deren/gentime-attraction/notebooks/analysis-astral/n-astral-genealogy.tre)


### Confirm SNAQ-genealogy is not an anomaly

In [None]:
# simulate a long chrom
model = ipcoal.Model(ntree)
model.sim_trees(10000, 1)

In [11]:
# load trees
mtre = toytree.mtree(model.df.genealogy)
print(len(mtre))

# convert bl units to E(subst.)
for tre in mtre.treelist:
    for node in tre.idx_dict.values():
        node.dist = node.dist * 1e-8

# write results to files
mtre.write("/tmp/trees.nwk")

# infer sptree
snaq = ipa.snaq(
    gtrees="/tmp/trees.nwk",
    netin="./analysis-astral/n-astral-genealogy.tre",
    name="n-snaq-genealogy",
    nedges=1,
    nruns=10,
    nproc=10,
)
snaq.run()

10000
using existing CF table: /home/deren/gentime-attraction/notebooks/analysis-snaq/n-snaq-genealogy.CFs.csv
[SNAQ v.x.y]
[nproc = 10]
julia /home/deren/gentime-attraction/notebooks/analysis-snaq/n-snaq-genealogy.jl
inferred network written to (/home/deren/gentime-attraction/notebooks/analysis-snaq/n-snaq-genealogy.snaq)


In [20]:
# draw tree
toytree.tree(snaq.tree).root("r7").draw(ts='s', admixture_edges=snaq.admix.values());

### Estimate gene trees

In [None]:
# simulate loci that are each 3000bp in length
model = ipcoal.Model(ntree, seed=12345)
model.sim_loci(nloci=10000, nsites=2e3)
model.write_loci_to_hdf5(name="8tips-imb-10K-n", outdir="db")
model.write_snps_to_hdf5(name="8tips-imb-10K-n", outdir="db")

In [24]:
# ipa.treeslider(
#     data="./db/8tips-imb-10K-n.seqs.hdf5",
# )

In [21]:
NSEQS = "./db/8tips-imb-10K-n.seqs.hdf5"

In [None]:
for idx in range(1000):

    # load H5 and write chrom to tmp phylip
    wex = ipa.window_extracter(
        NSEQS,
        name=str(idx), 
        scaffold_idxs=idx,
        quiet=True,
    )
    wex.run(force=True)

    # load phylip and infer raxml tree
    rax = ipa.raxml(
        wex.outfile, 
        name=str(idx), 
        workdir="./analysis-raxml/8tips-1K-5K-{}".format(dname),
        quiet=True,
        T=50, 
        N=10,
        f='d',
        p=12345,
        x=None,
    )
    rax.run(force=True, quiet=True)

In [33]:
import glob
trees = glob.glob("./analysis-raxml/8tips-n/*bestTree*")
mtre = toytree.mtree([toytree.tree(i) for i in trees])
mtre.write("/tmp/trees.nwk")

In [34]:
snaq = ipa.snaq(
    gtrees="/tmp/trees.nwk",
    netin="./analysis-astral/n-astral-genealogy.tre",
    name="n-snaq-genetrees",
    nedges=1,
    nruns=10,
    nproc=10,
)

In [35]:
snaq.run()

[SNAQ v.x.y]
[nproc = 10]
julia /home/deren/gentime-attraction/notebooks/analysis-snaq/n-snaq-genetrees.jl
inferred network written to (/home/deren/gentime-attraction/notebooks/analysis-snaq/n-snaq-genetrees.snaq)


In [36]:
# draw tree
toytree.tree(snaq.tree).root("r7").draw(ts='s', admixture_edges=snaq.admix.values());