# Database generation
This notebook demonstrates how to use `simcat.Database` objects to simulate a database of invariant matrices representing admixture over all edges of an input topology, and under a range of demographic scenarios. 

In [1]:
import simcat
import toytree
import numpy as np
import h5py

In [3]:
## generate a species tree
tree = toytree.rtree.unittree(ntips=6, seed=12345)
tree = tree.mod.node_scale_root_height(3)
c, a = tree.draw(tree_style='c')

In [12]:
# init a database of simulations
db = simcat.Database(
    name="tr5-t5-r2-s1000", 
    workdir="../databases", 
    tree=tree, 
    nedges=1,
    ntests=5,
    nreps=2,
    nsnps=1000,
    theta=(1.0, 0.1),
    force=True,
)

320 sims: /home/deren/Documents/simcat-eaton-lab/databases/tr5-t5-r2-s1000.labels.h5


In [13]:
# run all sims in database
db.run(auto=True)

Box(children=(HTML(value="<span style='font-size:14px; font-family:monospace'>Establishing parallel connection…

Box(children=(HTML(value="<span style='font-size:14px; font-family:monospace'>Parallelization: oud: 4 cores</s…

### View resulting databases

In [59]:
# the counts array (matrix of sim 0, quartet 0)
with h5py.File(db.counts) as io5:
    counts = io5['counts'][0, 0]
    simcat.plot.draw_count_matrix(
        counts, 
        show_invariants=False, 
        height="600px", 
        width="600px",
        **{"font-size": "8px"}
    )

In [53]:
# the counts array (matrix of sim 0, quartet 0)
with h5py.File(db.labels) as io5:
    print(io5["thetas"][0])
    print(io5["admix_sources"][0])
    print(io5["admix_targets"][0])
    print(io5["admix_times"][0])
    print(io5["admix_props"][0])
    print(io5.attrs['tree'])
    print(io5.attrs['nsnps'])

0.39901473699216816
[4]
[7]
[2.625]
[0.49896081]
(4:3,(3:2.25,(2:1.5,(1:0.75,0:0.75)1:0.75)1:0.75)1:0.75);
1000
