In [1]:
import pyro
import pyro.distributions as dist
import torch
from pyro.infer import SVI, TraceEnum_ELBO, Predictive, config_enumerate
from pyro.optim import ClippedAdam
from pyro.infer.autoguide import AutoNormal, AutoDiagonalNormal, AutoDelta
from pprint import pprint

### Example 1: Mixed continuous-discrete Bayesian network

In [2]:
# The @config_enumerate is "required" (not strictly) because we have a discrete variable, A
# See https://pyro.ai/examples/enumeration.html
@config_enumerate
def BN_model(A_obs=None, B_obs=None, C_obs=None, N=None):
    if A_obs is not None:
        if B_obs is not None:
            assert len(A_obs) == len(B_obs)
        if C_obs is not None:
            assert len(A_obs) == len(C_obs)
        if N is not None:
            assert N == len(A_obs)
        else:
            N = len(A_obs)

        A_obs = A_obs.squeeze()

    if B_obs is not None:
        if N is not None:
            assert N == len(B_obs)
        else:
            N = len(B_obs)

        B_obs = B_obs.squeeze()

    if C_obs is not None:
        if N is not None:
            assert N == len(C_obs)
        else:
            N = len(C_obs)

        C_obs = C_obs.squeeze()

    if N is None:
        N = 1

    # prior distribution over weights of the categorical distribution from which A is drawn
    # pyro distinguishes between the "batch_shape" (=shape of samples drawn) and the "event_shape" (=shape of a single
    # RV drawn from this distribution) of a tensor. We need to tell it that this 3-D thing describes a single RV (and
    # similarly for the other priors below). See https://pyro.ai/examples/tensor_shapes.html for details.
    weights = pyro.sample('weights', dist.Dirichlet(torch.ones(3)).to_event())

    # prior distribution over parameters (> 0) of the beta distribution from which B is drawn
    beta_concentrations = pyro.sample('beta_concentrations', dist.Gamma(concentration=torch.tensor([2., 2.]),
                                                                        rate=torch.tensor([0.5, 0.5])).to_event())

    # prior distribution over weigths k in p_C = B*k(A)
    C_weights = pyro.sample('C_weights', dist.Beta(torch.tensor([1., 1., 1.]), torch.tensor([1., 1., 1.])).to_event())

    if N > 0:
        with pyro.plate('data', N):
            A = pyro.sample('A', dist.Categorical(weights), obs=A_obs, infer={"enumerate": "parallel"})
            B = pyro.sample('B', dist.Beta(beta_concentrations[0], beta_concentrations[1]), obs=B_obs)
            C = pyro.sample('C', dist.Binomial(probs=B * C_weights[A]), obs=C_obs)

In [3]:
# Visualize the model (needs graphviz - don't have it, cannot test)
#pyro.render_model(lambda: BN_model(N=100), render_distributions=True)

In [4]:
# Helper function
def summarize_samples(samples):
    # Adapted from https://pyro.ai/examples/bayesian_regression.html#Model-Evaluation.
    param_stats = {}
    for k, v in samples.items():
        if torch.is_floating_point(v):
            param_stats[k] = {
                "mean": torch.mean(v, 0),
                "std": torch.std(v, 0),
                "5%": v.kthvalue(int(len(v) * 0.05), dim=0)[0],
                "95%": v.kthvalue(int(len(v) * 0.95), dim=0)[0],
            }
        else:
            print(f'Dropping variable {k} from summary statistics since it is not a float.')
    return param_stats

In [5]:
# Sample from the prior distribution, see here: https://forum.pyro.ai/t/samples-from-prior-distribution/1740/2
prior_samples = Predictive(BN_model, posterior_samples={}, num_samples=1000)()
print("SUMMARY: SAMPLES FROM PRIOR DISTRIBUTION\n")
pprint(summarize_samples(prior_samples))

SUMMARY: SAMPLES FROM PRIOR DISTRIBUTION

Dropping variable A from summary statistics since it is not a float.
{'B': {'5%': tensor([0.0493]),
       '95%': tensor([0.9502]),
       'mean': tensor([0.5027]),
       'std': tensor([0.2781])},
 'C': {'5%': tensor([0.]),
       '95%': tensor([1.]),
       'mean': tensor([0.2610]),
       'std': tensor([0.4394])},
 'C_weights': {'5%': tensor([[0.0520, 0.0519, 0.0496]]),
               '95%': tensor([[0.9472, 0.9503, 0.9433]]),
               'mean': tensor([[0.4884, 0.5059, 0.5031]]),
               'std': tensor([[0.2873, 0.2875, 0.2881]])},
 'beta_concentrations': {'5%': tensor([[0.7030, 0.7028]]),
                         '95%': tensor([[9.3248, 8.5537]]),
                         'mean': tensor([[3.9635, 3.7916]]),
                         'std': tensor([[2.7857, 2.5253]])},
 'weights': {'5%': tensor([[0.0299, 0.0250, 0.0338]]),
             '95%': tensor([[0.7756, 0.7545, 0.7737]]),
             'mean': tensor([[0.3442, 0.3132, 0.3426]]

In [6]:
# Specify some parameters and sample from the parametrized model
# we'll see below whether we can then estimate those params
weights = torch.tensor([0.2, 0.2, 0.6])
beta_concentrations = torch.tensor([0.5, 2.0])
C_weights = torch.tensor([0.5, 1.0, 0.2])
BN_model_conditioned = pyro.poutine.condition(BN_model, data={'weights': weights,
                                                              'beta_concentrations': beta_concentrations,
                                                              'C_weights': C_weights})
parametrized_samples = Predictive(BN_model_conditioned, posterior_samples={}, num_samples=5000)()
print("SUMMARY: SAMPLES FROM CONDITIONED DISTRIBUTION\n")
pprint(summarize_samples(parametrized_samples))

SUMMARY: SAMPLES FROM CONDITIONED DISTRIBUTION

Dropping variable A from summary statistics since it is not a float.
{'B': {'5%': tensor([0.0011]),
       '95%': tensor([0.6523]),
       'mean': tensor([0.2009]),
       'std': tensor([0.2133])},
 'C': {'5%': tensor([0.]),
       '95%': tensor([1.]),
       'mean': tensor([0.0840]),
       'std': tensor([0.2774])},
 'C_weights': {'5%': tensor([[0.5000, 1.0000, 0.2000]]),
               '95%': tensor([[0.5000, 1.0000, 0.2000]]),
               'mean': tensor([[0.5000, 1.0000, 0.2000]]),
               'std': tensor([[0., 0., 0.]])},
 'beta_concentrations': {'5%': tensor([[0.5000, 2.0000]]),
                         '95%': tensor([[0.5000, 2.0000]]),
                         'mean': tensor([[0.5000, 2.0000]]),
                         'std': tensor([[0., 0.]])},
 'weights': {'5%': tensor([[0.2000, 0.2000, 0.6000]]),
             '95%': tensor([[0.2000, 0.2000, 0.6000]]),
             'mean': tensor([[0.2000, 0.2000, 0.6000]]),
           

In [7]:
# Now let's try to estimate those parameters using SVI
pyro.clear_param_store()
# If you want to do MAP estimation instead (no uncertainty required), use AutoDelta instead. See https://pyro.ai/examples/mle_map.html.
guide = AutoNormal(pyro.poutine.block(BN_model, hide=["A", "B", "C"])) 

svi = SVI(model=BN_model,
          guide=guide,
          optim=ClippedAdam({"lr": 0.005, 'clip_norm': 1.0}),  # See here for how to use a more elaborate optimization scheme, e.g., with lr scheduling: https://pyro.ai/examples/svi_part_iv.html
          loss=TraceEnum_ELBO(max_plate_nesting=1))  # if we didn't have a discrete variable, we'd use Trace_ELBO

for i in range(5000):
    loss = svi.step(parametrized_samples['A'], parametrized_samples['B'], parametrized_samples['C'])

In [8]:
# Did we estimate the parameters correctly?
posterior_predictive = Predictive(BN_model, guide=guide, num_samples=1000, return_sites=("weights", "beta_concentrations", "C_weights"))
posterior_samples = posterior_predictive()
print("SUMMARY: SAMPLES FROM POSTERIOR DISTRIBUTION\n")
pprint(summarize_samples(posterior_samples))

SUMMARY: SAMPLES FROM POSTERIOR DISTRIBUTION

{'C_weights': {'5%': tensor([[0.4581, 0.7719, 0.1719]]),
               '95%': tensor([[0.6564, 0.9916, 0.2455]]),
               'mean': tensor([[0.5603, 0.9311, 0.2078]]),
               'std': tensor([[0.0598, 0.0733, 0.0224]])},
 'beta_concentrations': {'5%': tensor([[0.4933, 1.9288]]),
                         '95%': tensor([[0.5293, 2.0992]]),
                         'mean': tensor([[0.5111, 2.0118]]),
                         'std': tensor([[0.0111, 0.0519]])},
 'weights': {'5%': tensor([[0.1835, 0.1877, 0.5827]]),
             '95%': tensor([[0.2088, 0.2178, 0.6180]]),
             'mean': tensor([[0.1962, 0.2029, 0.6009]]),
             'std': tensor([[0.0078, 0.0090, 0.0109]])}}


### Example 2: Purely discrete Bayesian network

In [9]:
# The @config_enumerate is "required" (not strictly) because we have a discrete variable, A
# See https://pyro.ai/examples/enumeration.html
@config_enumerate
def BN_model(A_obs=None, B_obs=None, N=None):
    if A_obs is not None:
        if B_obs is not None:
            assert len(A_obs) == len(B_obs)
        if N is not None:
            assert N == len(A_obs)
        else:
            N = len(A_obs)

        A_obs = A_obs.squeeze()

    if B_obs is not None:
        if N is not None:
            assert N == len(B_obs)
        else:
            N = len(B_obs)

        B_obs = B_obs.squeeze()

    if N is None:
        N = 1
        
    # prior distribution over weights of the categorical distribution from which A is drawn
    # pyro distinguishes between the "batch_shape" (=shape of samples drawn) and the "event_shape" (=shape of a single
    # RV drawn from this distribution) of a tensor. We need to tell it that this 3-D thing describes a single RV (and
    # similarly for the other priors below). See https://pyro.ai/examples/tensor_shapes.html for details.
    A_weights = pyro.sample('A_weights', dist.Dirichlet(torch.ones(3)).to_event())

    # prior distribution over weigths k in p_C = B*k(A)
    #B_weight_params = 
    B_weights = pyro.sample('B_weights', dist.Beta(torch.tensor([1., 1., 1.]), torch.tensor([1., 1., 1.])).to_event())

    with pyro.plate('data', N):
        A = pyro.sample('A', dist.Categorical(A_weights), obs=A_obs, infer={"enumerate": "parallel"})
        B = pyro.sample('B', dist.Binomial(probs=B_weights[A]), obs=B_obs, infer={"enumerate": "parallel"})

In [10]:
# Specify some parameters and sample from the parametrized model
# we'll see below whether we can then estimate those params
A_weights = torch.tensor([0.2, 0.2, 0.6])
B_weights = torch.tensor([0.5, 1.0, 0.2])
BN_model_conditioned = pyro.poutine.condition(BN_model, data={'A_weights': A_weights,
                                                              'B_weights': B_weights})
parametrized_samples = Predictive(BN_model_conditioned, posterior_samples={}, num_samples=5000)()
print("SUMMARY: SAMPLES FROM CONDITIONED DISTRIBUTION\n")
pprint(summarize_samples(parametrized_samples))

SUMMARY: SAMPLES FROM CONDITIONED DISTRIBUTION

Dropping variable A from summary statistics since it is not a float.
{'A_weights': {'5%': tensor([[0.2000, 0.2000, 0.6000]]),
               '95%': tensor([[0.2000, 0.2000, 0.6000]]),
               'mean': tensor([[0.2000, 0.2000, 0.6000]]),
               'std': tensor([[0., 0., 0.]])},
 'B': {'5%': tensor([0.]),
       '95%': tensor([1.]),
       'mean': tensor([0.4194]),
       'std': tensor([0.4935])},
 'B_weights': {'5%': tensor([[0.5000, 1.0000, 0.2000]]),
               '95%': tensor([[0.5000, 1.0000, 0.2000]]),
               'mean': tensor([[0.5000, 1.0000, 0.2000]]),
               'std': tensor([[0., 0., 0.]])}}


In [11]:
# Now let's try to estimate those parameters using SVI
pyro.clear_param_store()
# If you want to do MAP estimation instead (no uncertainty required), use AutoDelta instead. See https://pyro.ai/examples/mle_map.html.
guide = AutoNormal(pyro.poutine.block(BN_model, hide=["A", "B"])) 

svi = SVI(model=BN_model,
          guide=guide,
          optim=ClippedAdam({"lr": 0.005, 'clip_norm': 1.0}),  # See here for how to use a more elaborate optimization scheme, e.g., with lr scheduling: https://pyro.ai/examples/svi_part_iv.html
          loss=TraceEnum_ELBO(max_plate_nesting=1))  # if we didn't have a discrete variable, we'd use Trace_ELBO

for i in range(5000):
    loss = svi.step(parametrized_samples['A'], parametrized_samples['B'])

In [12]:
# Did we estimate the parameters correctly?
posterior_predictive = Predictive(BN_model, guide=guide, num_samples=1000, return_sites=("A_weights", "B_weights"))
posterior_samples = posterior_predictive()
print("SUMMARY: SAMPLES FROM POSTERIOR DISTRIBUTION\n")
pprint(summarize_samples(posterior_samples))

SUMMARY: SAMPLES FROM POSTERIOR DISTRIBUTION

{'A_weights': {'5%': tensor([[0.1867, 0.1828, 0.5880]]),
               '95%': tensor([[0.2142, 0.2075, 0.6210]]),
               'mean': tensor([[0.1998, 0.1952, 0.6050]]),
               'std': tensor([[0.0084, 0.0074, 0.0100]])},
 'B_weights': {'5%': tensor([[0.4745, 0.9939, 0.1898]]),
               '95%': tensor([[0.5497, 0.9999, 0.2260]]),
               'mean': tensor([[0.5125, 0.9980, 0.2070]]),
               'std': tensor([[0.0229, 0.0044, 0.0111]])}}
