In [1]:
import logging
import numpy as np
import scipy.stats
import seaborn as sns
import matplotlib.pyplot as plt
import elfi
from sim.utils import ScaledDist, plot_prior
from sim.model import elfi_sim
from sim.sum_stats import elfi_summary
import scipy.stats
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)


### Priors

To ensure proper scaling the parameters (exluding migration rates) are given as ScaledDists, for which the sampling distributions are on similar scales, and they are then scaled up in the simulator.

In [2]:
priors = {
    "bottleneck_strength_domestic": ScaledDist(scipy.stats.uniform(loc=0, scale=1),
                                               scipy.stats.uniform(loc=0, scale=30000)),
    "bottleneck_strength_wild": ScaledDist(scipy.stats.uniform(loc=0, scale=1),
                                           scipy.stats.uniform(loc=0, scale=30000)),
    "bottleneck_time_domestic": ScaledDist(scipy.stats.norm(loc=0, scale=1),
                                           scipy.stats.norm(loc=3500, scale=300)),
    "bottleneck_time_wild": ScaledDist(scipy.stats.norm(loc=0, scale=1),
                                       scipy.stats.norm(loc=3500, scale=300)),
    "captive_time": ScaledDist(scipy.stats.lognorm(s=0.7, loc=0, scale=1),
                               scipy.stats.lognorm(s=0.7, loc=0, scale=np.exp(3))),
    "div_time": ScaledDist(scipy.stats.norm(loc=0, scale=1),
                           scipy.stats.norm(loc=30000, scale=1000)),
    "mig_length_post_split": ScaledDist(scipy.stats.uniform(loc=0, scale=1),
                                        scipy.stats.uniform(loc=0, scale=10000)),
    "mig_length_wild": ScaledDist(scipy.stats.lognorm(s=0.7, loc=0, scale=1),
                                  scipy.stats.lognorm(s=0.7, loc=0, scale=np.exp(3))),
    "mig_rate_captive": scipy.stats.beta(1.2, 10),
    "mig_rate_post_split": scipy.stats.beta(1.2, 10),
    "mig_rate_wild": scipy.stats.beta(1.2, 10),
    "pop_size_wild_1": ScaledDist(scipy.stats.norm(loc=0, scale=1),
                                  scipy.stats.norm(loc=200, scale=20)),
    "pop_size_wild_2": ScaledDist(scipy.stats.norm(loc=0, scale=1),
                                  scipy.stats.norm(loc=200, scale=20)),
    "pop_size_captive": ScaledDist(scipy.stats.norm(loc=0, scale=1),
                                   scipy.stats.norm(loc=100, scale=10)),
    "pop_size_domestic_1": ScaledDist(scipy.stats.norm(loc=0, scale=1),
                                      scipy.stats.norm(loc=200, scale=20)),
    "pop_size_domestic_2": ScaledDist(scipy.stats.norm(loc=0, scale=1),
                                      scipy.stats.norm(loc=200, scale=20)),
}


### Add priors to model and plot

In [3]:
m = elfi.ElfiModel("m")

elfi.Constant(int(5e6), name="length", model=m)
elfi.Constant(1.8e-8, name="recombination_rate", model=m)
elfi.Constant(6e-8, name="mutation_rate", model=m)

for prior_name, prior in priors.items():
    
    if isinstance(prior, ScaledDist):
        elfi.Prior(prior.sampling, name=prior_name, model=m)
        prior.plot(x_lab=prior_name)
    else:
        elfi.Prior(prior, name=prior_name, model=m)
        plot_prior(prior, prior_name)    
    plt.savefig("../plots/prior/{}.png".format(prior_name))
    plt.clf()


DEBUG:matplotlib.font_manager:findfont: Matching :family=sans-serif:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0.
DEBUG:matplotlib.font_manager:findfont: score(<Font 'DejaVu Sans' (DejaVuSans-BoldOblique.ttf) oblique normal bold normal>) = 1.335
DEBUG:matplotlib.font_manager:findfont: score(<Font 'STIXNonUnicode' (STIXNonUniBol.ttf) normal normal bold normal>) = 10.335
DEBUG:matplotlib.font_manager:findfont: score(<Font 'STIXSizeTwoSym' (STIXSizTwoSymReg.ttf) normal normal regular normal>) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(<Font 'DejaVu Serif' (DejaVuSerif-BoldItalic.ttf) italic normal bold normal>) = 11.335
DEBUG:matplotlib.font_manager:findfont: score(<Font 'STIXNonUnicode' (STIXNonUni.ttf) normal normal regular normal>) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(<Font 'cmss10' (cmss10.ttf) normal normal 400 normal>) = 10.05
DEBUG:matplotlib.font_manager:findfont: score(<Font 'cmsy10' (cmsy10.ttf) normal normal 400 normal>) = 10.05

<Figure size 432x288 with 0 Axes>

### POD

In [4]:
y_obs = elfi_sim(length=int(5e6),
            recombination_rate=1.8e-8,
            mutation_rate=6e-8,
            pop_size_domestic_1=[100],
            pop_size_wild_1=[100],
            pop_size_captive=[100],
            captive_time=[20],
            mig_rate_captive=[0.01],
            mig_length_wild=[20],
            mig_rate_wild=[0.01],
            pop_size_domestic_2=[100],
            pop_size_wild_2=[100],
            div_time=[20000],
            mig_rate_post_split=[0.1],
            mig_length_post_split=[1000],
            bottleneck_time_wild=[3000],
            bottleneck_strength_wild=[10000],
            bottleneck_time_domestic=[3000],
            bottleneck_strength_domestic=[3000],
            random_state=np.random.RandomState(3),
            batch_size=1)

DEBUG:root:slim -d pop_size_domestic_1=100 -d pop_size_wild_1=100 -d pop_size_captive=100 -d mig_rate_captive=0.01 -d mig_length_wild=20 -d mig_rate_wild=0.01 -d captive_time=20 -d length=5000000 -d recombination_rate=1.8e-08  -d decap_trees_filename='"../output/decap_218175339.trees"' -s 40 slim_model.slim
DEBUG:kastore.store:Loading file version 1.0
DEBUG:kastore.store:Loading 45 items from 194300 bytes


In [5]:
prior_args = [m[name] for name in m.parameter_names]  # Model only contains priors and constants

y = elfi.Simulator(elfi_sim, *prior_args, m["length"], m["recombination_rate"],
                   m["mutation_rate"], name="simulator", observed=y_obs)
s = elfi.Summary(elfi_summary, y, True, name='s', model=m)

d = elfi.Distance('euclidean', s, name='d', model=m)

In [6]:
s.generate(1)

DEBUG:elfi.compiler:OutputCompiler compiling...
DEBUG:elfi.compiler:ObservedCompiler compiling...
DEBUG:elfi.compiler:AdditionalNodesCompiler compiling...
DEBUG:elfi.compiler:RandomStateCompiler compiling...
DEBUG:elfi.compiler:ReduceCompiler compiling...
DEBUG:elfi.executor:Executing _random_state
DEBUG:elfi.executor:Executing bottleneck_strength_domestic
DEBUG:elfi.executor:Executing bottleneck_strength_wild
DEBUG:elfi.executor:Executing bottleneck_time_domestic
DEBUG:elfi.executor:Executing bottleneck_time_wild
DEBUG:elfi.executor:Executing captive_time
DEBUG:elfi.executor:Executing div_time
DEBUG:elfi.executor:Executing mig_length_post_split
DEBUG:elfi.executor:Executing mig_length_wild
DEBUG:elfi.executor:Executing mig_rate_captive
DEBUG:elfi.executor:Executing mig_rate_post_split
DEBUG:elfi.executor:Executing mig_rate_wild
DEBUG:elfi.executor:Executing pop_size_captive
DEBUG:elfi.executor:Executing pop_size_domestic_1
DEBUG:elfi.executor:Executing pop_size_domestic_2
DEBUG:elfi.e

FileNotFoundError: In executing node 'simulator': [WinError 2] The system cannot find the file specified: '../output/decap_689141242.trees'.

In [10]:
dict_ = {"a": 4, "c": 12}

In [11]:
import inspect

In [51]:
def simulator(a,b,c,d,e=None, multipliers=dict_):
    kwargs = locals()  # Returns dict (regardless of specified as kwargs or args)
    print(kwargs)    
    
    return a+b+c+d

In [52]:
simulator(1,2,3,4)

{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': None, 'multipliers': {'a': 4, 'c': 12}}


10

### Carry out simulations to "train" sum stat scaler

31000.0

In [11]:
seed = 1
pool = elfi.OutputPool(['s'])
rej = elfi.Rejection(m['d'], batch_size=4, seed=1, pool=pool)
rej_res = rej.sample(32, quantile=1)  # Accept all
store = pool.get_store('s')
sum_stats = np.array(list(store.values()))
sum_stats = sum_stats.reshape(-1, sum_stats.shape[2])  # Drop batches axis
scaler = StandardScaler() # StandardScaler()
scaler.fit(sum_stats)

DEBUG:elfi.compiler:OutputCompiler compiling...
DEBUG:elfi.compiler:ObservedCompiler compiling...
DEBUG:elfi.compiler:AdditionalNodesCompiler compiling...
DEBUG:elfi.compiler:RandomStateCompiler compiling...
DEBUG:elfi.compiler:ReduceCompiler compiling...
DEBUG:elfi.methods.parameter_inference:Submitting batch 0
DEBUG:elfi.client:Submitting batch 0
DEBUG:elfi.executor:Executing _s_observed
DEBUG:elfi.executor:Executing _d_observed
DEBUG:elfi.executor:Executing bottleneck_strength_domestic
DEBUG:elfi.executor:Executing bottleneck_strength_wild
DEBUG:elfi.executor:Executing bottleneck_time_domestic
DEBUG:elfi.executor:Executing bottleneck_time_wild
DEBUG:elfi.executor:Executing captive_time
DEBUG:elfi.executor:Executing div_time
DEBUG:elfi.executor:Executing mig_length_post_split
DEBUG:elfi.executor:Executing mig_length_wild
DEBUG:elfi.executor:Executing mig_rate_captive
DEBUG:elfi.executor:Executing mig_rate_post_split
DEBUG:elfi.executor:Executing mig_rate_wild
DEBUG:elfi.executor:Exec

Progress: |--------------------------------------------------| 0.0% Complete

FileNotFoundError: In executing node 'simulator': [WinError 2] The system cannot find the file specified: '../output/decap_1898995473.trees'.