# Imports & Config
---

In [1]:
import sys
if 'google.colab' in sys.modules and 'beanmachine' not in sys.modules:
    !pip install beanmachine
import json
from datetime import date, timedelta

import arviz as az
import beanmachine.ppl as bm
import numpy as np
import torch
import torch.distributions as dist
from bokeh.embed import json_item
from bokeh.io import output_notebook, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.models.formatters import NumeralTickFormatter
from bokeh.plotting import figure, gridplot
from torch import tensor

In [2]:
OFFSET = 0.5
COLORS = ["#2a2eec", "#fa7c17", "#328c06", "#c10c90"]

az.rcParams["plot.backend"] = "bokeh"
az.rcParams["plot.bokeh.figure.dpi"] = 60
output_notebook()

In [3]:
def save_bokeh_json(plot, save_path, div_name):
    """ div_name should be train-case """
    plot_json = json_item(plot, div_name)
    with open(f"{save_path}/{div_name}.json", "w") as f:
        json.dump(plot_json, f)

# Why Bean Machine?
---
## Generative Probabilistic Models
### Poisson Distribution

In [4]:
reproduction_rate = 1.5
num_init = 2
x = torch.tensor(np.arange(10))
y = dist.Poisson(reproduction_rate * num_init).log_prob(x).exp()

In [5]:
left = (x - OFFSET).tolist()
top = y.tolist()
right = (x + OFFSET).tolist()
bottom = np.zeros(y.shape).tolist()
source = ColumnDataSource(
    {
        "x": x.tolist(),
        "y": y.tolist(),
        "left": left,
        "top": top,
        "right": right,
        "bottom": bottom,
    }
)
p = figure(
    plot_width=500,
    plot_height=400,
    outline_line_color="black",
    title="Poisson distribution",
    y_axis_label="Probability mass",
    x_axis_label="Number of new cases",
    y_range=[0, 0.25],
)
glyphs = p.quad(
    left="left",
    top="top",
    right="right",
    bottom="bottom",
    source=source,
    fill_color="steelblue",
    fill_alpha=0.7,
    line_color="white",
    line_width=2,
    hover_fill_color="orange",
    hover_line_color="black",
    hover_fill_alpha=1,
    legend_label="Poisson(3.0)",
)
tips = HoverTool(
    renderers=[glyphs], tooltips=[("New cases", "@x"), ("Probability", "@y{0.00}")],
)
p.add_tools(tips)
p.line(
    x=[3, 3], y=[0, 1], line_color="black", line_width=2, legend_label="Mean value",
)
p.grid.grid_line_alpha = 0.2
p.grid.grid_line_color = "grey"
p.grid.grid_line_width = 0.3
p.yaxis.minor_tick_line_color = None
p.xaxis.minor_tick_line_color = None

In [6]:
show(p)
# save_bokeh_json(p, "./why_bean_machine/", "prior_poisson_intro")
del p

### Exponential Distribution

In [7]:
reproduction_rate_rate = 10.0
x = torch.tensor(np.linspace(1e-6, 1, 10000))
y = dist.Exponential(rate=reproduction_rate_rate).log_prob(x).exp()

In [8]:
source = ColumnDataSource({"x": x.tolist(), "y": y.tolist(),})
p = figure(
    plot_width=500,
    plot_height=400,
    outline_line_color="black",
    title="Exponential distribution",
    y_axis_label="Probability density",
    x_axis_label="Reproduction rate",
    y_range=[0, 10],
    x_range=[0, 0.5],
)
glyph = p.line(
    x="x",
    y="y",
    source=source,
    line_color="steelblue",
    line_width=2,
    legend_label="Exponential(10.0)",
)
tips = HoverTool(
    renderers=[glyph],
    tooltips=[("Probability density", "@y{0.00}"), ("Reproduction rage", "@x{0.00}"),],
)
p.add_tools(tips)
p.line(
    x=[0.1, 0.1],
    y=[0, 20],
    line_color="black",
    line_width=2,
    legend_label="Mean value",
)
p.grid.grid_line_alpha = 0.2
p.grid.grid_line_color = "grey"
p.grid.grid_line_width = 0.3
p.yaxis.minor_tick_line_color = None
p.xaxis.minor_tick_line_color = None

In [9]:
show(p)
# save_bokeh_json(p, "./why_bean_machine/", "prior_exponential")
# save_bokeh_json(p, "./quick_start/", "prior_exponential")
del p

# Quick Start
---
## Modeling, Data, Inference

In [10]:
reproduction_rate_rate = 10.0
num_init = 1087980


@bm.random_variable
def reproduction_rate():
    return dist.Exponential(rate=reproduction_rate_rate)


@bm.random_variable
def num_new(num_current):
    return dist.Poisson(reproduction_rate() * num_current)

In [11]:
observations = {num_new(num_init): tensor(238154.0)}

samples = bm.CompositionalInference().infer(
    queries=[reproduction_rate()],
    observations=observations,
    num_samples=7000,
    num_adaptive_samples=3000,
)

Samples collected:   0%|          | 0/10000 [00:00<?, ?it/s]

Samples collected:   0%|          | 0/10000 [00:00<?, ?it/s]

Samples collected:   0%|          | 0/10000 [00:00<?, ?it/s]

Samples collected:   0%|          | 0/10000 [00:00<?, ?it/s]

In [12]:
mean_reproduction_rate = num_init / reproduction_rate_rate
start, stop = 107500, 109900
x = torch.tensor(np.arange(start, stop))
y = dist.Poisson(mean_reproduction_rate).log_prob(x).exp()

In [13]:
source = ColumnDataSource({"x": x.tolist(), "y": y.tolist(),})
p = figure(
    plot_width=500,
    plot_height=400,
    outline_line_color="black",
    title="",
    y_axis_label="Probability mass",
    x_axis_label="Number of new cases",
    y_range=[0, 0.0014],
    x_range=[start, stop],
)
glyph = p.line(
    x="x",
    y="y",
    source=source,
    line_color="steelblue",
    line_width=2,
    legend_label=f"Poisson({mean_reproduction_rate})",
)
p.line(
    x=np.ones(2) * mean_reproduction_rate,
    y=[0, 0.002],
    line_color="black",
    line_width=2,
    legend_label=f"Mean value = {mean_reproduction_rate}",
)
p.grid.grid_line_alpha = 0.2
p.grid.grid_line_color = "grey"
p.grid.grid_line_width = 0.3
p.yaxis.minor_tick_line_color = None
p.xaxis.minor_tick_line_color = None
p.legend.location = "bottom_center"
p.yaxis.formatter.use_scientific = False
formatter = NumeralTickFormatter(format="0,0")
p.xaxis.formatter = formatter

In [14]:
show(p)
# save_bokeh_json(p, "./quick_start/", "prior_poisson")
del p

## Analysis

In [15]:
def plot_posterior(samples):
    hist, bins = np.histogram(reproduction_rate_samples, bins=30)
    left = bins[:-1].tolist()
    top = hist.tolist()
    right = bins[1:].tolist()
    bottom = np.zeros(len(hist)).tolist()
    ceiling = 1.2 * hist.max()
    tips = [
        f"{round(item[0], 0)}–{round(item[1], 0)}" for item in zip(bins[:-1], bins[1:])
    ]
    source = ColumnDataSource(
        {"left": left, "top": top, "right": right, "bottom": bottom, "tip": tips,}
    )
    p = figure(
        plot_width=500,
        plot_height=400,
        outline_line_color="black",
        title="",
        y_axis_label="Probability density",
        x_axis_label="Reproduction rate",
        y_range=[0, ceiling],
    )
    glyph = p.quad(
        left="left",
        top="top",
        right="right",
        bottom="bottom",
        source=source,
        fill_color="steelblue",
        fill_alpha=0.7,
        line_color="white",
        line_width=2,
        hover_fill_color="orange",
        hover_line_color="black",
        hover_fill_alpha=1,
        legend_label="Posterior",
    )
    p.line(
        x=[float(reproduction_rate_samples.mean())] * 2,
        y=[0, ceiling],
        line_color="black",
        line_width=2,
        legend_label=f"Posterior mean = {round(float(reproduction_rate_samples.mean()), 4)}",
    )
    p.grid.grid_line_alpha = 0.2
    p.grid.grid_line_color = "grey"
    p.grid.grid_line_width = 0.3
    p.yaxis.minor_tick_line_color = None
    p.xaxis.minor_tick_line_color = None
    p.legend.location = "top_right"
    return p

In [16]:
reproduction_rate_samples = samples[reproduction_rate()][0]
p = plot_posterior(reproduction_rate_samples)

In [17]:
show(p)
# save_bokeh_json(p, "./quick_start/", "posterior_rate_static")
del p

# Analysis
---

In [18]:
time = [date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3)]
case_history = tensor([num_init, 1381734.0, 1630446.0])

In [19]:
@bm.functional
def num_total(today):
    if today <= time[0]:
        return num_init
    else:
        yesterday = today - timedelta(days=1)
        return num_new(today) + num_total(yesterday)


@bm.random_variable
def num_new(today):
    yesterday = today - timedelta(days=1)
    return dist.Poisson(reproduction_rate() * num_total(yesterday))

In [20]:
observations = {num_new(t): d for t, d in zip(time[1:], case_history.diff())}
assert time[0] not in (o.arguments[0] for o in observations.keys())

samples = bm.CompositionalInference().infer(
    queries=[reproduction_rate()],
    observations=observations,
    num_samples=7000,
    num_adaptive_samples=3000,
    num_chains=4,
)

Samples collected:   0%|          | 0/10000 [00:00<?, ?it/s]

Samples collected:   0%|          | 0/10000 [00:00<?, ?it/s]

Samples collected:   0%|          | 0/10000 [00:00<?, ?it/s]

Samples collected:   0%|          | 0/10000 [00:00<?, ?it/s]

## Results of Inference

In [21]:
samples

<beanmachine.ppl.inference.monte_carlo_samples.MonteCarloSamples at 0x7fd5102dd130>

In [22]:
list(samples.keys())

[RVIdentifier(wrapper=<function reproduction_rate at 0x7fd510212700>, arguments=())]

In [23]:
samples.get_variable(reproduction_rate())

tensor([[0.2195, 0.2193, 0.2194,  ..., 0.2198, 0.2198, 0.2195],
        [0.2192, 0.2196, 0.2193,  ..., 0.2198, 0.2200, 0.2190],
        [0.2195, 0.2189, 0.2194,  ..., 0.2202, 0.2201, 0.2192],
        [0.2198, 0.2199, 0.2192,  ..., 0.2189, 0.2197, 0.2192]])

In [24]:
samples[reproduction_rate()]

tensor([[0.2195, 0.2193, 0.2194,  ..., 0.2198, 0.2198, 0.2195],
        [0.2192, 0.2196, 0.2193,  ..., 0.2198, 0.2200, 0.2190],
        [0.2195, 0.2189, 0.2194,  ..., 0.2202, 0.2201, 0.2192],
        [0.2198, 0.2199, 0.2192,  ..., 0.2189, 0.2197, 0.2192]])

In [25]:
chain = samples.get_chain(chain=0)
chain

<beanmachine.ppl.inference.monte_carlo_samples.MonteCarloSamples at 0x7fd50ded3730>

In [26]:
chain[reproduction_rate()]

tensor([0.2194, 0.2194, 0.2192,  ..., 0.2196, 0.2203, 0.2198])

In [27]:
reproduction_rate_samples = samples[reproduction_rate()][0][100:]
p = plot_posterior(reproduction_rate_samples)

In [28]:
show(p)
# save_bokeh_json(p, "./analysis/", "posterior_rate_dynamic")
del p

## Diagnostics

In [29]:
az.rcParams["stats.hdi_prob"] = 0.89
az.summary(samples.to_xarray(), round_to=5)

Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
reproduction_rate(),0.21964,0.0003,0.21916,0.22011,0.0,0.0,19172.24154,17561.02418,1.00022


In [30]:
trace_plots = az.plot_trace(
    {"Reproduction rate": samples[reproduction_rate()].numpy()[:, 100:]},
    compact=False,
    show=False,
)[0][1]
trace_plots.plot_width = 500
trace_plots.plot_height = 400
trace_plots.grid.grid_line_alpha = 0.2
trace_plots.grid.grid_line_color = "grey"
trace_plots.grid.grid_line_width = 0.3
trace_plots.outline_line_color = "black"

In [31]:
show(trace_plots)
# save_bokeh_json(trace_plots, "./analysis/", "mcmc_trace")

In [32]:
autocorr_plots = az.plot_autocorr(
    {"Reproduction rate": samples[reproduction_rate()].numpy()[:, 100:]}, show=False,
)
autocorr_plots = gridplot([*autocorr_plots])

In [33]:
show(autocorr_plots)
# save_bokeh_json(autocorr_plots, "./analysis/", "mcmc_autocorr")