In [9]:
import data
import models
import cache
import seaborn as sns
import numpy as np
import pandas as pd
import stanity
from stancache import stancache

In [10]:
sns.set(context='talk')

In [11]:
by = 'cell_type'
sample_n = 500

## get data, as we did in earlier examples

This will help in case we want to compare estimates for particular genes or samples

In [12]:
sample_df = cache.cached(models.prep_sample_df, sample_n=sample_n)

INFO:stancache.stancache:prep_sample_df: cache_filename set to prep_sample_df.cached.sample_n_500.pkl
INFO:stancache.stancache:prep_sample_df: Loading result from cache


In [13]:
stan_data = models.prep_stan_data(sample_df, by=by)

## get models from cache

In [6]:
model5 = models.get_model_file(model_name='model5')
fit5 = models.cached_stan_fit(file=model5, data=stan_data,
                              model_name='model5', cache_only=True)

INFO:stancache.stancache:Step 1: Get compiled model code, possibly from cache
INFO:stancache.stancache:StanModel: cache_filename set to model5.cython_0_25_1.model_code_12673779526111968781.pystan_2_12_0_0.stanmodel.pkl
INFO:stancache.stancache:StanModel: Loading result from cache
INFO:stancache.stancache:Step 2: Get posterior draws from model, possibly from cache
INFO:stancache.stancache:sampling: cache_filename set to model5.cython_0_25_1.model_code_12673779526111968781.pystan_2_12_0_0.stanfit.chains_4.data_45645478644.iter_2000.seed_1245502385.pkl
INFO:stancache.stancache:sampling: Loading result from cache


In [7]:
fit5_prefix = stancache.cached_stan_file(file=model5, data=stan_data, model_name='model5', prefix_only=True)
fit5_prefix

'model5.cython_0_25_1.model_code_12673779526111968781.pystan_2_12_0_0.stanfit.data_45645478644'

In [14]:
model5_1 = models.get_model_file(model_name='model5.1')
fit5_1 = models.cached_stan_fit(file=model5_1, data=stan_data, model_name='model5.1', cache_only=True)

INFO:stancache.stancache:Step 1: Get compiled model code, possibly from cache
INFO:stancache.stancache:StanModel: cache_filename set to model5_1.cython_0_25_1.model_code_12115479030808689250.pystan_2_12_0_0.stanmodel.pkl
INFO:stancache.stancache:StanModel: Loading result from cache
INFO:stancache.stancache:Step 2: Get posterior draws from model, possibly from cache
INFO:stancache.stancache:sampling: cache_filename set to model5_1.cython_0_25_1.model_code_12115479030808689250.pystan_2_12_0_0.stanfit.chains_4.data_45645478644.iter_2000.seed_1245502385.pkl
INFO:stancache.stancache:sampling: Loading result from cache


In [19]:
fit5_1_prefix = stancache.cached_stan_file(file=model5_1, data=stan_data, model_name='model5.1', prefix_only=True)
fit5_1_prefix

'model5_1.cython_0_25_1.model_code_12115479030808689250.pystan_2_12_0_0.stanfit.data_45645478644'

## compute & compare psis-loo for each model

In [8]:
loo5 = cache.cached(stanity.psisloo,
                    log_likelihood=fit5.extract('log_lik')['log_lik'],
                    cache_filename='{}.loo.pkl'.format(fit5_prefix))
loo5.print_summary()

INFO:stancache.stancache:psisloo: cache_filename set to model5.cython_0_25_1.model_code_12673779526111968781.pystan_2_12_0_0.stanfit.data_45645478644.loo.pkl
INFO:stancache.stancache:psisloo: Loading result from cache


greater than 0.5    0.006127
greater than 1      0.000825
dtype: float64

In [20]:
loo5_1 = cache.cached(stanity.psisloo,
                      log_likelihood=fit5_1.extract('log_lik')['log_lik'],
                      cache_filename='{}.loo.pkl'.format(fit5_1_prefix))
loo5_1.print_summary()

INFO:stancache.stancache:psisloo: cache_filename set to model5_1.cython_0_25_1.model_code_12115479030808689250.pystan_2_12_0_0.stanfit.data_45645478644.loo.pkl
INFO:stancache.stancache:psisloo: Starting execution
  elif sort == 'in-place':
  bs /= 3 * x[sort[np.floor(n/4 + 0.5) - 1]]
  np.exp(temp, out=temp)
INFO:stancache.stancache:psisloo: Execution completed (0:02:09.695405 elapsed)
INFO:stancache.stancache:psisloo: Saving results to cache


greater than 0.5    0.006667
greater than 1      0.000762
dtype: float64

In [21]:
stanity.loo_compare(loo5, loo5_1)

{'diff': -52.312429240899519, 'se_diff': 7.0112463144132633}

This confirms that the model with fixed `theta_mu` is worse than the one without a fixed `theta_mu`.
