Bayes Rule Book:

https://www.bayesrulesbook.com/chapter-7.html

Materials from the Bayes Rule github:

https://github.com/bayes-rules/bayesrules

# Imports

In [1]:
import math
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from scipy.stats import norm

import pyro
import torch as t
from pyro.distributions import Normal
from pyro.infer import MCMC
from pyro.infer.mcmc.nuts import HMC

# MCMC Normal - Normal with Pyro

- $Y|\mu \sim N(\mu, 0.75^2)$
- $~~~~\mu \sim N(0, 1)$

In [76]:
def model():
    mu = pyro.sample('mu', Normal(0, 1))
    pyro.sample("obs", Normal(mu, .75))

conditioned_model = pyro.condition(model, data={'obs': t.tensor([6.25])})

In [77]:
kernel = HMC(conditioned_model, step_size=0.9, num_steps=4)
mcmc   = MCMC(kernel, num_samples=2000, warmup_steps=500)
mcmc.run()
mcmc.summary()

Sample: 100%|██████████| 2500/2500 [00:05, 417.72it/s, step size=9.04e-01, acc. prob=0.987]



                mean       std    median      5.0%     95.0%     n_eff     r_hat
        mu      4.00      0.56      4.01      3.09      4.91  13737.80      1.00

Number of divergences: 0


We included the "actual" based on the conjugate relationship:

$\mu | Y=6.25 \sim N(4, 0.6^2)$

In [81]:
data = mcmc.get_samples()['mu'].detach().numpy()
fig = ff.create_distplot([data, np.random.normal(4, 0.6, len(data))], ['simulated', 'actual'], show_rug=False, bin_size=.05)
fig

In [79]:
px.line(mcmc.get_samples()['mu'])

In [80]:
# Grab lines out of distplots to use as (x, y) vals
sim = pd.concat([pd.Series(fig['data'][2].x, name='x'), pd.Series(fig['data'][2].y, name='y')], axis=1).set_index('x')
act = pd.concat([pd.Series(fig['data'][3].x, name='x'), pd.Series(fig['data'][3].y, name='y')], axis=1).set_index('x')

# Returns DF row with index closest to "x"
def p(x, df):
    return df.iloc[np.abs(df.index - x).argmin()].values[0]

# Kullback–Leibler Divergence
def kld(x, q):
    min = x.index.min()
    max = x.index.max()

    total = 0
    for n in [n/1000 for n in range(math.floor(min*1000), math.floor(max*1000))]:
        total += p(n, x) * np.log(p(n, x)/p(n, q))

    return total

kld(sim, act)

27.10564322558096

# Metropolis-Hastings from scratch

- $Y|\mu \sim N(\mu, 0.75^2)$
- $~~~~\mu \sim N(0, 1)$

In [83]:
def step(mu, y, steps=1000, mus=[]):

    # Step 1: Propose new mu
    n = norm(loc=mu, scale=1)
    proposed_mu = n.rvs() # 1 sample

    # Step 2: Compare proposed posterior v current posterior
    proposed_posterior = norm(loc=proposed_mu, scale=0.75).pdf(y) * norm(loc=0,scale=1).pdf(proposed_mu)
    current_posterior  = norm(loc=mu,          scale=0.75).pdf(y) * norm(loc=0,scale=1).pdf(mu)

    alpha = proposed_posterior / current_posterior

    # Step 3: Acceptance Determination
    # If alpha is greater than 1 or random - but since 1 is always greater than the random,, we can simplify
    if alpha > np.random.uniform(low=0.0, high=1.0):
        mu = proposed_mu

    mus.append(mu)

    # Step 4: Loop or return
    if steps>0:
        return step(mu, y, steps=steps-1, mus=mus)
    return mus

init_mu = norm(loc=0,scale=1).rvs()
obs = 6.25
mus = step(init_mu, obs, steps=2500)

In [84]:
ff.create_distplot([mus[500:]], ['sim'], show_rug=False, bin_size=.05)

In [85]:
px.line(mus[500:])

In [86]:
f'mean: {np.array(mus[500:]).mean().round(2)} std: {np.array(mus[500:]).std().round(2)}'

'mean: 3.96 std: 0.63'

Recall, we were hoping for a result of :

$N(4, 0.6^2)$

So this is pretty close.

# Quiz: Beta - Binomial MCMC

Start:

- $Y|\pi \sim Bin(2, \pi)$
- $~~~~\pi \sim Beta(2,3)$

Find: 
- $\pi|(Y=1)$


## From scratch

In [82]:
from scipy.stats import norm, beta, binom

In [125]:
def step(p, y, steps=1000, ps=[]):
    # Step 1: propose new p (using the same beta dist instead of an updating one)
    a, b = [1,1]
    p_proposed = beta(a,b).rvs()

    # Step 2: Compare posteriors
    # L(π|Y=1) * f(π)
    post_proposed = binom(2, p_proposed).pmf(y) * beta(2,3).pdf(p_proposed)
    post_current  = binom(2, p).pmf(y)          * beta(2,3).pdf(p)

    # With asymmetric Beta-Binomial proposal q(π|π')/q(π'|π) != 1, so we include here
    q_prop    = beta(a,b).pdf(p_proposed)
    q_current = beta(a,b).pdf(p)

    # Step 3: Decision
    alpha = (post_proposed / post_current) * (q_current/q_prop)
    if alpha > np.random.uniform(0.0, 1.0):
        p = p_proposed

    ps.append(p)

    if steps > 0:
        return step(p, y, steps=steps-1, ps=ps)

    return ps

init_p = beta(1,1).rvs()
obs = 1.
ps = step(init_p, obs, steps=2500)

In [126]:
ff.create_distplot([ps[250:], np.random.beta(3,4,size=1000)], ['Simulated', 'Conjugate'], show_rug=False, bin_size=0.05)

In [127]:
f'mean: {np.array(ps[250:]).mean().round(2)} std: {np.array(ps[250:]).std().round(2)}'

'mean: 0.43 std: 0.18'

## Pyro

In [121]:
from pyro.distributions import Beta as PBeta, Binomial as PBinomial

In [122]:
def model():
    p = pyro.sample('p', PBeta(2,3))
    pyro.sample('obs', PBinomial(2, p), obs=t.tensor([1.]))

kernel = HMC(model, step_size=0.9, num_steps=4)
mcmc   = MCMC(kernel, num_samples=2000, warmup_steps=500)
mcmc.run()
mcmc.summary()

Sample: 100%|██████████| 2500/2500 [00:12, 197.97it/s, step size=8.86e-01, acc. prob=0.926]


                mean       std    median      5.0%     95.0%     n_eff     r_hat
         p      0.42      0.17      0.41      0.14      0.69   4392.38      1.00

Number of divergences: 0





In [124]:
data = mcmc.get_samples()['p'].detach().numpy()
ff.create_distplot([data, ps[250:], np.random.beta(3,4,size=1000)], ['Pyro MCMC', 'Scratch MCMC', 'Conjugate'], show_rug=False, bin_size=0.05)