In [None]:
import seaborn as sns
import numpy as np
from cmdstanpy import CmdStanModel
from baynes.plotter import FitPlotter
from baynes.model_utils import get_stan_file
import pandas as pd
from scipy import stats

# Example 1: fit of a poissonian process
### Generate the data

In [None]:
N=500
lambda_true = 6.3
events = np.random.poisson(lambda_true, N)

data_mean = np.mean(events)
data_sd = np.std(events)
print('- mean of data: ', data_mean)
print('- sd of data: ', data_sd)
bins = np.arange(min(events), max(events))
sns.histplot(events, bins=bins)
sns.despine()

### Compile and print the STAN model


`baynes` allows to retrieve a .stan file from the models' directory specified in the installation procedure

In [None]:
stan_file = get_stan_file('poisson.stan')
model = CmdStanModel(stan_file=stan_file,
                     cpp_options={'STAN_THREADS': True, 'jN': 4})
print(model.code())

The model can also be directly compiled with default arguments

In [None]:
from baynes import get_model
model = get_model('poisson.stan')

### Assemble the data
The model fits a poissonian with a gamma for the only parameter $\lambda$. The variables 'alpha' and 'beta' are the parameters of the gamma distribution, while 'prior' selects whether to sample only from the prior (1) or from the fit (0)

In [None]:
data = {'N': len(events), 'y': events, 'alpha': 5, 'beta': 1, 'prior': 1}

### Prior predictive check
Run the model without warmup and with fixed parameters

In [None]:
fit_prior = model.sample(data, save_warmup=False)

Initialize a FitPlotter object adding the fit and its title (optional), then plot a predictive check. By default, the 5-95% interval of the simulated data is drawn. If the data is unbinned, as in this case, pass the number of bins that will be used in the plot as an additional argument. 

In [None]:
plotter = FitPlotter(fit_prior, fit_title='prior')
plotter.predictive_check('y_rep',
                         data=data,
                         data_key='y',
                         lines=False,
                         n_bins=len(bins))

### Fit the model
Sample from the posterior, specifying the warmup iterations. Use $diagnose()$ to check the convergence of the fit.

In [None]:
data['prior'] = 0
fit = model.sample(data,
                   chains=4,
                   iter_warmup=500,
                   iter_sampling=1000,
                   save_warmup=True,
                   show_progress=True,
                   inits={'lambda':5})
print(fit.diagnose())

Add the new fit to the FitPlotter and inspect the convergence of the only parameter. By default, new fits are added to an internal dictionary and new graphs are generated from the last added fit. All the parameters are plotted if None is passed as the first argument of the plotting function.

In [None]:
plotter.add_fit(fit, fit_title='posterior')
plotter.convergence_plot(wspace=0.2)

Plot the posterior predictive check

In [None]:
plotter.predictive_check('y_rep',
                         data=data,
                         data_key='y',
                         lines = False,
                         n_bins=len(bins))

Print the results for the parameter $lambda$ and plot its posterior distribution, or compare it to the analytical solution

In [None]:
plotter.kde_plot()
plotter.add_lines([lambda_true], color='green', label='true value', ls='--')

In [None]:
gamma_dist=stats.gamma(5+N*np.mean(events), scale=1/(1+N))
x = np.linspace(gamma_dist.ppf(0.0001), gamma_dist.ppf(0.9999), 100)
ax = plotter.plot(x, gamma_dist.pdf(x), color="black", lw=3, alpha=1, label='exact')
sns.histplot(fit.draws_pd(['lambda']), x='lambda', ax=ax, stat='density', label='HMC', element='step', alpha=0.5)
plotter.resize(6,6)

### Compare prior and posterior with histograms or a boxplot 

In [None]:
plotter.dis_plot(fit_titles='all', kind='hist', hue='fit', common_bins=False, element="step")

In [None]:
plotter.cat_plot(fit_titles='all', parameters='lambda')
plotter.add_lines(x_coords=[lambda_true], color='black', lw=2, ls='--', label='data')

In [None]:
exact = {'Mean': gamma_dist.mean(), 'StdDev': gamma_dist.std(), '5%': gamma_dist.ppf(0.05), '95%': gamma_dist.ppf(0.95)}
df = pd.concat([fit.summary().loc[['lambda']], fit_prior.summary().loc[['lambda']]])
d=df[['Mean', 'StdDev', '5%', '95%']]
d = pd.concat([pd.DataFrame.from_dict([exact]), d])
print(d.to_latex(float_format="%.2f"))

### Boxplots of test variables in predictive check and "p-values"

In [None]:
plotter.col_wrap=2
variables = ['mean_y_rep', 'sd_y_rep']
plotter.cat_plot(variables, fit_titles='all')
plotter.add_lines(x_coords=[data_mean, data_sd], color='black', lw=2, ls='--', label='data')
plotter.update_legend(edgecolor='white')
plotter.resize(9, 3)

In [None]:
pars = ['mean_y_rep', 'sd_y_rep']
fits = plotter.get_fit_titles('all')
res = {}
data_val=[data_mean, data_sd]
for f in fits[::-1]:
    res[f] = {}
    for i, v in enumerate(pars):
        d = plotter.draws_df(parameters=[v], fit_titles=[f])
        res[f][v] = len(d.loc[d[v]>data_val[i]])/len(d)

print(pd.DataFrame.from_dict(res).transpose().to_latex(float_format="%.2f"))

### This basic workflow can be automatically executed with standard_analysis:
* Sample from priors
* Use samples from priors as initial parameters
* Sample from the posterior
* Check diagnostics and compare priors with posteriors

In [None]:
from baynes import standard_analysis
fit = standard_analysis(model, data, plotter,
                        {'chains': 4, "save_warmup": True},
                        rep_key='y_rep',
                        data_key='y',
                        n_bins=len(bins))