Bayes Rule Book:

https://www.bayesrulesbook.com/chapter-6.html

Materials from the Bayes Rule github:

https://github.com/bayes-rules/bayesrules

# Imports

In [1]:
import pyreadr, math
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from os.path import exists

# Grid Approximation

## Approximating the Beta-Binomial

Simulating the posterior, given:

- $prior = Beta(2, 2)$
- $obs = (Y=9)|Bin(10, \pi)$

### A Sneak Peek

In [10]:
prior_pdf = np.random.beta(2,2, 1000)

def beta_p(a, b, p):
    c = np.math.factorial(a + b - 1) / (np.math.factorial(a - 1)*np.math.factorial(b - 1))
    return c * p**(a-1) * (1-p)**(b-1)

def n_choose_k(n,k):
    return np.math.factorial(n)/(np.math.factorial(k)*np.math.factorial(n-k))

def binomial_p(n, k, p):
    return n_choose_k(n,k) * p**k * (1-p)**(n-k)

def binomial_likelihood(n, k):
    return [binomial_p(n,k,p/1000) for p in range(1,1000)]

likelihood = binomial_likelihood(10, 9)

In [8]:
prior_graph      = ff.create_distplot([prior_pdf], ['Prior'], show_hist=False, show_rug=False)['data'][0]
likelihood_graph = go.Scatter(x=[x/1000 for x in range(1,1000)], y=binomial_likelihood(10, 9), name="likelihood")
posterior_graph  = ff.create_distplot([np.random.beta(11,3, 1000)], ['Posterior'], show_hist=False, show_rug=False)['data'][0]

fig = go.Figure([likelihood_graph, prior_graph, posterior_graph])
fig['data'][0]['line']['color']="Yellow"
fig['data'][1]['line']['color']="Blue"
fig['data'][2]['line']['color']="Green"
fig

Just a sneak peek at the posterior we are now going to simulate

### Simulation

<img src="images/ch6_sim_1.png" width=300/>

In [22]:
possible_p_values = [0, .2, .4, .6, .8, 1.]

<img src="images/ch6_sim_2.png" width=300/>

In [27]:
values = []
for p in possible_p_values:
    prior = beta_p(2, 2, p)
    likelihood = binomial_p(10, 9, p)
    values.append((p, prior, likelihood))

sim_table = pd.DataFrame(data=values, columns=['p', 'prior', 'likelihood'])

<img src="images/ch6_sim_3.png" width=300/>

In [32]:
sim_table['unnormalized_posterior'] = sim_table.prior * sim_table.likelihood
sim_table['posterior'] = sim_table.unnormalized_posterior / sim_table.unnormalized_posterior.sum()

print(f'unnormalized posterior sum: {sim_table.unnormalized_posterior.sum()}\nnormalized posterior sum: {sim_table.posterior.sum()}')

unnormalized posterior sum: 0.31801442304000005
normalized posterior sum: 1.0


<img src="images/ch6_sim_4.png" width=300/>

In [34]:
sim_table.round(2)

Unnamed: 0,p,prior,likelihood,unnormalized_posterior,posterior
0,0.0,0.0,0.0,0.0,0.0
1,0.2,0.96,0.0,0.0,0.0
2,0.4,1.44,0.0,0.0,0.01
3,0.6,1.44,0.04,0.06,0.18
4,0.8,0.96,0.27,0.26,0.81
5,1.0,0.0,0.0,0.0,0.0


<img src="images/ch6_sim_5.png" width=300/>

In [63]:
sample_ps = sim_table.sample(random_state=711, replace=True, n=1000, weights=sim_table.posterior).p
sample_ps_tbl = sample_ps.value_counts().to_frame().reset_index()
sample_ps_tbl.columns = ['p', 'n']
sample_ps_tbl['pct'] = sample_ps_tbl.n / sample_ps_tbl.n.sum()

sample_ps_tbl.append(sample_ps_tbl[['n', 'pct']].sum(), ignore_index=True).fillna('').rename(index={3:'Total'})

Unnamed: 0,p,n,pct
0,0.8,798.0,0.798
1,0.6,194.0,0.194
2,0.4,8.0,0.008
Total,,1000.0,1.0


<img src="images/ch6_sim_6.png" width=300/>

In [79]:
ff.create_distplot([np.random.beta(11,3, 1000), sample_ps], ['Actual', 'Simulated'], show_hist=False, show_rug=False)

The above sim isn't so good (according to book). We now try with 101 potential pi values instead of the above 6

In [92]:
# Step 1: define p values
possible_p_values = [x/100 for x in range(101)]

# Step 2: Evaluate prior & likelihood for each potential p value
values = []
for p in possible_p_values:
    prior = beta_p(2, 2, p)
    likelihood = binomial_p(10, 9, p)
    values.append((p, prior, likelihood))

sim_table = pd.DataFrame(data=values, columns=['p', 'prior', 'likelihood'])

# Step 3: Approximate the posterior
sim_table['unnormalized_posterior'] = sim_table.prior * sim_table.likelihood
sim_table['posterior'] = sim_table.unnormalized_posterior / sim_table.unnormalized_posterior.sum()

# Step 4: Simulate & Sample
sample_ps = sim_table.sample(random_state=711, replace=True, n=1000, weights=sim_table.posterior).p
sample_ps_tbl = sample_ps.value_counts().to_frame().reset_index()
sample_ps_tbl.columns = ['p', 'n']
sample_ps_tbl['pct'] = sample_ps_tbl.n / sample_ps_tbl.n.sum()

# Step 5: Plot Results
ff.create_distplot([np.random.beta(11,3, 1000), sample_ps], ['Actual', 'Simulated'], show_hist=False, show_rug=False)


well well well, would you look at that...

# Markov Chains

Shortcoming of **Grid Approximation**: curse of dimensionality. Whereas grid approx. works well with 1 or 2 params (e.g. $\pi$ or $\theta$), it begins to breakdown as we add more params.

The book uses `RStan` for probabilistic programming. Though a `PyStan` is available, we're ging to try to implement all examples using `Pyro`.

In [124]:
import pyro
import torch as t
from pyro.distributions import Beta, Binomial
from pyro.infer import MCMC
from pyro.infer.mcmc.nuts import HMC

In [273]:
def model():
    pi = pyro.sample("pi", Beta(2, 2))
    pyro.sample("obs", Binomial(10, pi))

# By fixing our obs, the MCMC will try different values of pi to match the observation
conditioned_model = pyro.condition(model, data={"obs": t.Tensor([9.])})

hmc_kernel = HMC(conditioned_model, step_size=0.9, num_steps=4)
mcmc = MCMC(hmc_kernel, num_samples=5000*2, warmup_steps=5000, num_chains=1) # multiple chains not supported in notebooks according to Pyro forum
mcmc.run()

data = mcmc.get_samples()['pi'].detach().numpy()
fig = ff.create_distplot([data, np.random.beta(11,3, 1000)], ['simulated', 'actual'], show_rug=False, bin_size=.01)
fig

Sample: 100%|██████████| 15000/15000 [00:55, 268.71it/s, step size=1.17e+00, acc. prob=0.893]


In [271]:
# Grab lines out of distplots to use as (x, y) vals
sim = pd.concat([pd.Series(fig['data'][2].x, name='x'), pd.Series(fig['data'][2].y, name='y')], axis=1).set_index('x')
act = pd.concat([pd.Series(fig['data'][3].x, name='x'), pd.Series(fig['data'][3].y, name='y')], axis=1).set_index('x')

# Returns DF row with index closest to "x"
def p(x, df):
    return df.iloc[np.abs(df.index - x).argmin()].values[0]

# Kullback–Leibler Divergence
def kld(x, q):
    min = x.index.min()
    max = x.index.max()

    total = 0
    for n in [n/1000 for n in range(math.floor(min*1000), math.floor(max*1000))]:
        total += p(n, x) * np.log(p(n, x)/p(n, q))

    return total

kld(sim, act)

8.77008925324572

In [274]:
mcmc.summary()


                mean       std    median      5.0%     95.0%     n_eff     r_hat
        pi      0.78      0.10      0.80      0.62      0.95  23378.24      1.00

Number of divergences: 0


In [284]:
px.line(mcmc.get_samples()['pi'][5000:])