In [None]:
%run notebook_config

# Nested sampling

In this example, we fit the same event with the same model as in the annual parallax example, except using dynamic nested sampling as implemented in [dynesty](https://dynesty.readthedocs.io/en/latest/index.html) instead of Hamiltonian Monte Carlo.

In [None]:
import numpy as np
from matplotlib import pyplot as plt

import pymc3 as pm
import theano.tensor as T

import caustic as ca
import exoplanet as xo

np.random.seed(42)

# Load event data
event_dir = "../../data/OB05086/"
event = ca.data.OGLEData(event_dir)

# Plot data
fig, ax = plt.subplots(figsize=(10, 5))
event.plot_standardized_data(ax);

In [None]:
# Initialize a SingleLensModel object
parallax_model = ca.models.SingleLensModel(event)

In [None]:
n_bands = len(event.light_curves)
BoundedNormal = pm.Bound(pm.Normal, lower=0.0)
BoundedNormal_1 = pm.Bound(pm.Normal, lower=1.0)

with parallax_model:
    # Flux parameters
    Delta_F = BoundedNormal(
        "Delta_F",
        mu=T.zeros(n_bands),
        sd=50.0 * T.ones(n_bands),
        testval=5.0 * T.ones(n_bands),
        shape=(n_bands),
    )

    F_base = pm.Normal(
        "F_base",
        mu=T.zeros(n_bands),
        sd=0.6 * T.ones(n_bands),
        testval=T.zeros(n_bands),
        shape=(n_bands),
    )

    # Other parameters
    t_0 = pm.Uniform(
        "t_0", parallax_model.t_min, parallax_model.t_max, testval=ca.estimate_t0(event)
    )

    # In the parallax model, u_0 can be negative
    u_0 = pm.Normal("u_0", mu=0.0, sd=1.5, testval=-0.41)
    teff = BoundedNormal("t_eff", mu=0.0, sd=365.0, testval=20.0)

    # Initialize the two parallax parameters
    pi_EE = pm.Normal("pi_EE", mu=0.0, sigma=1.0, testval=0.1)
    pi_EN = pm.Normal("pi_EN", mu=0.0, sigma=1.0, testval=-0.3)

    # Deterministic transformations
    t_E = pm.Deterministic("t_E", teff / T.abs_(u_0))
    m_source, g = ca.compute_source_mag_and_blend_fraction(
        event, parallax_model, Delta_F, F_base, u_0
    )
    pm.Deterministic("m_source", m_source)
    pm.Deterministic("g", g)
    pm.Deterministic("pi_E", T.sqrt(pi_EE ** 2 + pi_EN ** 2))

    # Compute the trajectory including parallax
    trajectory = ca.trajectory.Trajectory(event, t_0, u_0, t_E, pi_EE, pi_EN)
    u = trajectory.compute_trajectory(parallax_model.t)

    # Compute the magnification
    mag = parallax_model.compute_magnification(u, u_0)

    # Compute the mean model
    mean = Delta_F * mag + F_base

    # Let's allow for rescaling of the error bars by a constant factor
    c_1 = BoundedNormal_1(
        "c_1",
        mu=T.ones(n_bands),
        sd=2.0 * T.ones(n_bands),
        testval=1.5 * T.ones(n_bands),
        shape=(n_bands),
    )

    # Diagonal terms of the covariance matrix
    var_F = (c_1 * parallax_model.sig_F) ** 2

    # Compute the Gaussian log_likelihood, add it as a potential term to the model
    ll = parallax_model.compute_log_likelihood(parallax_model.F - mean, var_F)
    pm.Potential("log_likelihood", ll)

    pm.Deterministic("log_likelihood_", ll)

In [None]:
print(parallax_model.vars)

To use dynamic nested sampling, we have to re write the priors in the form of a prior transfer function which maps i.i.d uniformly distributed parameters defined on a unit cube to our parameters of interest. The `ppf` function associated with probability distributions defined in `scipy.stats` does exactly that. To learn why this step is necessary, check out the [dynesty docs](https://dynesty.readthedocs.io/en/latest/quickstart.html#prior-transforms). 

In [None]:
import scipy


def prior_transform(u):
    """
    Transforms the uniform random variables `u ~ Unif[0., 1.)`
    to the parameters of interest.
    """
    x = np.array(u)  # copy u

    # Delta_F_lowerbound__
    x[0] = scipy.stats.norm.ppf(u[0], loc=2.0, scale=1.0)

    # F_base__
    x[1] = scipy.stats.norm.ppf(u[1], loc=0.0, scale=0.5)

    # t_0_interval__
    x[2] = scipy.stats.norm.ppf(u[2], loc=0.1, scale=0.5)

    # u_0
    x[3] = scipy.stats.norm.ppf(u[3], loc=0.0, scale=1.0)

    # t_eff_lowerbound__
    x[4] = scipy.stats.norm.ppf(u[4], loc=3.0, scale=2.0)

    # pi_EE
    x[5] = scipy.stats.norm.ppf(u[5], loc=0.0, scale=0.5)

    # pi_EN
    x[6] = scipy.stats.norm.ppf(u[6], loc=0.0, scale=0.5)

    # c_1_lowerbound
    x[7] = scipy.stats.norm.ppf(u[7], loc=-3, scale=3.0)

    return x

Let's run the sampling, this will take some time...

In [None]:
# This will take a long time
loglike = ca.utils.get_log_likelihood_function(
    parallax_model, parallax_model.log_likelihood
)
sampler = dynesty.DynamicNestedSampler(
    loglike, prior_transform, ndim, **{"sample": "rwalk"}
)


sampler.run_nested(
    wt_kwargs={"pfrac": 1.0}, print_progress=True, **{"nlive_init": 1000}
)

results = sampler.results

# Resample samples such that they have equal weight
samples, weights = results.samples, np.exp(results.logwt - results.logz[-1])
new_samples = dyfunc.resample_equal(samples, weights)

The sampling appears to have converged according to internal criteria specified in dynesty with a total of ~20M likelihood calls, let's plot the diagnostics plots.

In [None]:
from dynesty import plotting as dyplot

# Plot a summary of the run.
# rfig, raxes = dyplot.runplot(results)

# Plot traces and 1-D marginalized posteriors.
tfig, taxes = dyplot.traceplot(results)

# Plot the 2-D marginalized posteriors.
cfig, caxes = dyplot.cornerplot(results)

Let's plot the posterior over the $u_0$ parameter, which we expect to be multi-modal

In [None]:
# Plot posterior for u_0
fig, ax = plt.subplots()

ax.hist(samples[:, 3], bins=200, density=True, alpha=0.5)
ax.set_xlim(-0.6, 0.6)
ax.grid()
ax.set_xlabel("$u_0$")

Looks like `dynesty` not only managed to discover both significant modes in the posterior, but also the relative height between the two modes matches what we've oberved in the [annual parallax example](annual_parallax.ipynb).

Let's plot the model to check that it makes sense, and plot the posterior trajectories from the multi-modal pdf.

In [None]:
with parallax_model:
    # Create dense grid
    t_dense = np.tile(
        np.linspace(parallax_model.t_min, parallax_model.t_max, 1000), (n_bands, 1)
    )
    t_dense_tensor = T.as_tensor_variable(t_dense)

    # Evaluate trajectory components on dense grid
    u_n, u_e = trajectory.compute_trajectory(t_dense_tensor, return_components=True)

    # Compute the magnification
    mag_dense = parallax_model.compute_magnification(T.sqrt(u_n ** 2 + u_e ** 2), u_0)

    # Compute the mean model
    mean_dense = Delta_F * mag_dense + F_base

In [None]:
# Plot model
fig, ax = plt.subplots(
    2, 1, gridspec_kw={"height_ratios": [3, 1]}, figsize=(10, 8), sharex=True
)

ca.plot_model_and_residuals(
    ax, event, parallax_model, samples, t_dense_tensor, mean_dense, n_samples=50
)

In [None]:
# Plot trajectory
fig, ax = plt.subplots(figsize=(8, 8))

ca.plot_trajectory_from_samples(
    ax,
    event,
    parallax_model,
    samples,
    t_dense_tensor,
    u_n,
    u_e,
    n_samples=100,
    color="C0",
)

Looks good. Whereas in the parallax example with HMC we had to manually discover the different modes and had no idea about their relative importance, `dynesty` properly sampled the full pdf.