In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from cmdstanpy import CmdStanModel
from baynes.plotter import FitPlotter
from baynes.model_utils import get_stan_file
import pandas as pd
from scipy import stats
sns.set_style('ticks')
#sns.set_palette('colorblind')
sns.set_context("notebook", font_scale=1.6)
plt.rc("axes.spines", top=False, right=False)

# Example 1: fit of a poissonian process
### Generate the data

In [None]:
N=500
lambda_true = 6.3
events = np.random.poisson(lambda_true, N)

data_mean = np.mean(events)
data_sd = np.std(events)
print('- mean of data: ', data_mean)
print('- sd of data: ', data_sd)
bins = np.arange(min(events), max(events))
sns.histplot(events, bins=bins)
sns.despine()

### Compile and print the STAN model


In [None]:
stan_file = get_stan_file('poisson.stan')
model = CmdStanModel(stan_file=stan_file,
                     cpp_options={'STAN_THREADS': True, 'jN': 4})
print(model.code())

### Assemble the data
The model fits a poissonian with a lognormal prior for the only parameter $\lambda$. The variables 'mu' and 'sigma' are the parameters of the lognormal, while 'prior' selects whether to generate simulated data only from the prior (1) or from the fit (0)

In [None]:
data = {'N': len(events), 'y': events, 'alpha': 5, 'beta': 1, 'prior': 1}

### Prior predictive check
Run the model without warmup and with fixed parameters

In [None]:
fit_prior = model.sample(data,
                         save_warmup=False)

Initialize a FitPlotter object adding the fit and its title (optional), then plot a predictive check. By default, the 5-95% interval of the simulated data is drawn. If the data is unbinned, as in this case, pass the number of bins that will be used in the plot as an additional argument. 

In [None]:
sns.set_style('ticks')
#sns.set_context('paper')
plt.rc("axes.spines", top=False, right=False)
#plt.rc("axes", xmargin=0, ymargin=0)
plotter = FitPlotter(fit_prior, fit_title='prior', fig_scale=6, output_dir="/home/pietro/work/TESI/thesis/figures/ch2/poisson/", output_format='.pdf')
plotter.predictive_check('y_rep', 
                         data=data, 
                         data_key='y', 
                         lines=True,
                         legend=False,
                         n_bins=len(bins),
                         color='orange')

### Fit the model
Allow to generate quantities depending on the fit parameter, then sample specifying the warmup iterations. Use $diagnose()$ to check the convergence of the fit.

In [None]:
data['prior'] = 0
fit = model.sample(data,
                   chains=4,
                   iter_warmup=500,
                   iter_sampling=1000,
                   save_warmup=True,
                   show_progress=True,
                   inits={'lambda':5})
print(fit.diagnose())

Add the new fit to the FitPlotter and inspect the convergence of the only parameter. By default, new fits are added to an internal dictionary and new graphs are generated from the last added fit. All the parameters are plotted if None is passed as the first argument of the plotting function.

In [None]:
plotter.add_fit(fit, fit_title='posterior')
plotter.convergence_plot(wspace=0.2)
plotter.resize(12,5)



Plot the posterior predictive check

In [None]:
plotter.predictive_check('y_rep', 
                         data=data, 
                         data_key='y', 
                         lines = True,
                         n_bins=len(bins),
                         legend=False,
                         color='lightblue')

Print the results for the parameter $lambda$ and plot its posterior distribution.

In [None]:
plotter.kde_plot()
plotter.add_lines([lambda_true], color='green', label='true value', ls='--')
plotter.resize(8,6)
plotter.get_current_figure()
#fit.summary().loc[['lambda']]

In [None]:
plotter.cat_plot(fit_titles='all', aspect=1.5)
plotter.add_lines([lambda_true], color='green')

In [None]:
gamma_dist=stats.gamma(5+N*np.mean(events), scale=1/(1+N))
x = np.linspace(gamma_dist.ppf(0.0001), gamma_dist.ppf(0.9999), 100)
ax = plotter.plot(x, gamma_dist.pdf(x), 'r-', lw=2, alpha=0.6, label='exact')
sns.histplot(fit.draws_pd(['lambda']), x='lambda', ax=ax, stat='density', label='HMC')
plotter.update_legend(bbox_to_anchor=(1.15, 0.6), facecolor='white', edgecolor='white')
plotter.resize(6,6)

In [None]:
exact = {'Mean': gamma_dist.mean(), 'StdDev': gamma_dist.std(), '5%': gamma_dist.ppf(0.05), '95%': gamma_dist.ppf(0.95)}
df = pd.concat([fit.summary().loc[['lambda']], fit_prior.summary().loc[['lambda']]])
d=df[['Mean', 'StdDev', '5%', '95%', 'N_Eff']]
d = pd.concat([pd.DataFrame.from_dict([exact]), d])
 
print(d.to_latex(float_format="%.2f"))

In [None]:
variables = ['mean_y_rep', 'sd_y_rep']
plotter.fig_scale = 8
plotter.cat_plot(variables, fit_titles='all')
plotter.add_lines(x_coords=[data_mean, data_sd], color='green', lw=2, ls='--') 
plotter.resize(12, 4)

Most of the plots support multiple parameters and fit combinations. For example, we can compare the prior and posterior distributions for the simulated data's mean and standard deviation. This can also be done with wrappers of Seaborn's functions $displot$ and $catplot$, allowing for many possibilities

In [None]:
variables = ['mean_y_rep', 'sd_y_rep']
plotter.fig_scale = 6
plotter.dis_plot(variables, kind='hist', hue='variable', common_bins=False, element="step")

It is also possible to add vertical and horizontal lines to the last created figure. In this case we can see that not only the fit has converged to the true value, but also that the simulated data mean and standard deviation converge to that of the real data. This means that the model can qualitatively describe various properties of the data and is a good indicator of its usefulness.