# Shadie demo (v.0.1.1)

In [1]:
import numpy as np
import shadie
import toytree

print("shadie", shadie.__version__)
print("toytree", toytree.__version__)

shadie 0.1.1
toytree 3.0.dev1


### Create a simple chromosome

In [2]:
chrom = shadie.chromosome.explicit({
    (0, 1_200_000): shadie.NONCDS,
    (1_200_001, 1_400_000): shadie.EXON,
    (1_400_001, 1_600_000): shadie.INTRON,
    (1_600_001, 1_800_000): shadie.EXON,
    (1_800_001, 5_000_000): shadie.NONCDS,
})
chrom.draw();

### Setup models

In [14]:
with shadie.Model() as model_1:
    model_1.initialize(
        chromosome=chrom, 
        sim_time=2000,
        mutation_rate=1e-8, 
        recomb_rate=1e-8,
        file_out="test-1.trees",
    )
    model_1.reproduction.bryophyte_dioicous(500, 500)

In [15]:
with shadie.Model() as model_2:
    model_2.initialize(
        chromosome=chrom, 
        sim_time=2000,
        mutation_rate=1e-8, 
        recomb_rate=1e-8,
        file_out="test-2.trees",
    )
    model_2.reproduction.bryophyte_dioicous(500, 500)

## Set up serial runs
Each simulation is run with a new random seed. When using simulations for data analysis, we suggest saving a list of random seeds and calling values from that list so that simulations can be re-run. 

In [24]:
#save models and seeds to lists
models = [model_1, model_2]
seeds = []

for i in range(0, len(models)):
    seeds.append(np.random.randint(2**31))

[1343067109, 894743138]

In [None]:
#run simulations serially
for idx, sim in enumerate(models):
    sim.run(seed = seeds[idx])

### Run two populations in parallel

You can also use `ProcessPoolExecutor` to run the simulations in parallel. Each simulation model is run from a different random seed, and writes to a different .trees file path. I also provide the path to my updated `slim` binary as an argument.

In [None]:
from concurrent.futures import ProcessPoolExecutor

In [31]:
with ProcessPoolExecutor(2) as pool:
    for model in [model_1, model_2]:
        kwargs = {"seed": np.random.randint(2**31)}
        pool.submit(model.run, **kwargs)

### Get post-processor

In [25]:
post = shadie.postsim.TwoSims(
    tree_files=["test-1.trees", "test-2.trees"],
    mut=1e-8,
    recomb=1e-8,
    popsize=500,
    chromosome=chrom, #necessary for some drawing functions, but not for anaylsis tools
)



In [26]:
post.tree_sequence.population(2)

Population(id=2, metadata={'description': 'ancestral population simulated by msprime', 'name': 'ancestral', 'slim_id': 2})

### Plot simulation summary

In [27]:
post.draw_tree_sequence(sample=6, seed=333);

### Plot individual trees

In [6]:
post.draw_tree(idx=0, sample=[10, 10], seed=123);

### Calculate statistics

In [7]:
post.stats(sample=10, reps=20)

Unnamed: 0,mean,CI_5%,CI_95%
theta_0,1.4e-05,1.4e-05,1.5e-05
theta_1,1.3e-05,1.2e-05,1.3e-05
Fst_01,0.70654,0.699409,0.71367
Dist_01,7.9e-05,7.8e-05,7.9e-05
D_Taj_0,0.109832,-0.045565,0.265229
D_Taj_1,0.112428,-0.070684,0.295539


## Access Metadata
You can access the parameters settings from SLiM in the tree sequence metadata

In [11]:
post.tree_sequence.metadata["SLiM"]["user_metadata"]

{'gam_archegonia_per': ['10'],
 'gam_clone_rate': ['0.8'],
 'gam_clones_per': ['10'],
 'gam_female_to_male_ratio': ['0.6666666666666666'],
 'gam_k': ['5000'],
 'gam_maternal_effect': ['0.5'],
 'gam_mutation_rate': ['5e-09'],
 'gam_pop_size': ['500'],
 'gam_random_death_chance': ['0.08'],
 'spo_mutation_rate': ['5e-09'],
 'spo_pop_size': ['500'],
 'spo_random_death_chance': ['0.08'],
 'spo_self_rate_per_egg': ['0.1'],
 'spo_spores_per': ['10']}