In [1]:
import data
import models
import cache
import stanity
import seaborn as sns
%matplotlib inline

  "Cython.Distutils.old_build_ext does not properly handle dependencies "
INFO:stancache.seed:Setting seed to 1245502385
INFO:root:Setting CACHE_DIR = /mnt/modelcache/immune-infiltrate-explorations
INFO:stancache.seed:Setting seed to 1245502385


In [2]:
sns.set(context='talk')

In [3]:
import logging
cache_logger = logging.getLogger('cache')
cache_logger.setLevel(logging.INFO)

In [4]:
by = 'cell_type'
sample_n = 500

## load data

Load data as we did in previous notebooks, so we can compare predictive performance for particular observations

In [5]:
sample_df = cache.cached(models.prep_sample_df, sample_n=sample_n)

INFO:stancache.stancache:prep_sample_df: cache_filename set to prep_sample_df.cached.sample_n_500.pkl
INFO:stancache.stancache:prep_sample_df: Loading result from cache


In [6]:
stan_data = models.prep_stan_data(sample_df, by=by)

## model3 -- using poisson distribution

Load results from model3 from cache

In [13]:
model3 = models.get_model_file(model_name='model3')
fit3 = models.cached_stan_fit(file=model3, data=stan_data, model_name='model3', cache_only=True)

INFO:stancache.stancache:Step 1: Get compiled model code, possibly from cache
INFO:stancache.stancache:StanModel: cache_filename set to model3.cython_0_25_1.model_code_14165043495760001619.pystan_2_12_0_0.stanmodel.pkl
INFO:stancache.stancache:StanModel: Loading result from cache
INFO:stancache.stancache:Step 2: Get posterior draws from model, possibly from cache
INFO:stancache.stancache:sampling: cache_filename set to model3.cython_0_25_1.model_code_14165043495760001619.pystan_2_12_0_0.stanfit.chains_4.data_45645478644.iter_2000.seed_1245502385.pkl
INFO:stancache.stancache:sampling: Loading result from cache


In [17]:
fit3_prefix = models.cached_stan_file(file=model3, data=stan_data, model_name='model3', prefix_only=True)
fit3_prefix

'model3.cython_0_25_1.model_code_14165043495760001619.pystan_2_12_0_0.stanfit.data_45645478644'

In [18]:
models.print_stan_summary(fit3, pars='lp__')

              mean   se_mean         sd          2.5%           50%         97.5%      Rhat
lp__  7.840543e+07  0.864071  32.017284  7.840536e+07  7.840543e+07  7.840549e+07  1.001218


## model4 -- using negative binomial dist

Load results from fitting model4 from cache

In [9]:
model4 = models.get_model_file(model_name='model4')
fit4 = models.cached_stan_fit(file=model4, data=stan_data, model_name='model4', cache_only=True)

INFO:stancache.stancache:Step 1: Get compiled model code, possibly from cache
INFO:stancache.stancache:StanModel: cache_filename set to model4.cython_0_25_1.model_code_1623578005189906009.pystan_2_12_0_0.stanmodel.pkl
INFO:stancache.stancache:StanModel: Loading result from cache
INFO:stancache.stancache:Step 2: Get posterior draws from model, possibly from cache
INFO:stancache.stancache:sampling: cache_filename set to model4.cython_0_25_1.model_code_1623578005189906009.pystan_2_12_0_0.stanfit.chains_4.data_45645478644.iter_2000.seed_1245502385.pkl
INFO:stancache.stancache:sampling: Loading result from cache


In [19]:
fit4_prefix = models.cached_stan_file(file=model4, data=stan_data, model_name='model4', prefix_only=True)
fit4_prefix

'model4.cython_0_25_1.model_code_1623578005189906009.pystan_2_12_0_0.stanfit.data_45645478644'

In [10]:
models.print_stan_summary(fit4, pars='lp__')

              mean   se_mean         sd          2.5%           50%         97.5%      Rhat
lp__  7.976705e+07  0.911986  36.376698  7.976698e+07  7.976705e+07  7.976712e+07  0.999749


## Summarize LOO-psis for each model

Summarize Leave-One-Out (LOO) predictive performance for model3 & model4, using pareto-smoothed importance sampling (PSIS) method to approximate true CV performance.

In [20]:
loo3 = cache.cached(stanity.psisloo,
                    log_likelihood=fit3.extract('log_lik')['log_lik'],
                    cache_filename='{}.loo.pkl'.format(fit3_prefix))

INFO:stancache.stancache:psisloo: cache_filename set to model3.cython_0_25_1.model_code_14165043495760001619.pystan_2_12_0_0.stanfit.data_45645478644.loo.pkl
INFO:stancache.stancache:psisloo: Starting execution
  elif sort == 'in-place':
  bs /= 3 * x[sort[np.floor(n/4 + 0.5) - 1]]
  np.exp(temp, out=temp)
INFO:stancache.stancache:psisloo: Execution completed (0:01:47.874592 elapsed)
INFO:stancache.stancache:psisloo: Saving results to cache


In [21]:
loo3.print_summary()

greater than 0.5    0.164508
greater than 1      0.091778
dtype: float64

The loo-summary for fit3 also suggests a poor model fit. We expect (hope) to have no more than ~ 5% of observations > 0.5, and even fewer greater than 1. These aren't hard endpoints; they are guidelines.

The interpretation is along the lines that:

1. There are too many observations exerting strong influence over our fit results, suggesting our model may be mis-parameterized by failing to account for these "extreme" values (extreme relative to model expectations)
2. This influence invalidates the approximation to LOO performance, which we will be runing next. Essentially, performance would be worse for these observations than estimated by this approximation.

In [22]:
loo4 = cache.cached(stanity.psisloo,
                    log_likelihood=fit4.extract('log_lik')['log_lik'],
                    cache_filename='{}.loo.pkl'.format(fit4_prefix))

INFO:stancache.stancache:psisloo: cache_filename set to model4.cython_0_25_1.model_code_1623578005189906009.pystan_2_12_0_0.stanfit.data_45645478644.loo.pkl
INFO:stancache.stancache:psisloo: Loading result from cache


In [23]:
loo4.print_summary()

greater than 0.5    0.009778
greater than 1      0.001270
dtype: float64

These proportions are more in line with our expectations.

## Compare fit using model3 & model4

In [24]:
stanity.loo_compare(loo3, loo4)

{'diff': 1411296.0494995965, 'se_diff': 69001.985447951331}

A larger value suggests that model4 is a better fit than model3. How much better can be evaluated intuitively by comparing the magnitude of the difference to the SE of the difference.

The short answer for this comparison is .. *much* better.