# TASK
Follow directly the example from the CMAFLikelihood.
However use ConditionalGaussian for the distribution choice.

Instead of fitting the likelihood and recovering the posterior (NLE), fit the posterior directly, with prior imposed from the data (NPE).

Compare the samples of the posterior with the analytical posterior.

In [None]:
# setup
%matplotlib inline
%load_ext autoreload
%autoreload 2

from functools import partial
import logging
import corner
import ultranest
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_probability as tfp

from py21cmlikelihoods import ConditionalGaussian
from py21cmlikelihoods.utils import prepare_dataset

tfd = tfp.distributions
tfb = tfp.bijectors

ultranest_logger = logging.getLogger("ultranest")
ultranest_logger.addHandler(logging.NullHandler())
ultranest_logger.setLevel(logging.INFO)

In [None]:
# constructing the posterior
NDE = ConditionalGaussian(
    n_parameters = 5, 
    n_data = 2, 
    diagonal_covariance = False,
    n_hidden = [50] * 10,
    optimizer = tf.optimizers.Adam(1e-4), 
    kernel_initializer = "glorot_uniform",
    kernel_initializer_kwargs = {},
    # kernel_initializer = tf.keras.initializers.RandomNormal,
    # kernel_initializer_kwargs = {"mean":0.0, "stddev": 1e-3, "seed":None},
    bias_initializer = "zeros",
    bias_initializer_kwargs = {},
    last_layer_bias_initializer = None,
    kernel_regularizer = tf.keras.regularizers.L1L2(l1=0.0, l2=1e-1),
    bias_regularizer = tf.keras.regularizers.L1L2(l1=0.0, l2=1e-3),
    regularize_last_layer = False,
)

In [None]:
# constructing the training set
mu_0 = np.random.normal(0.0, 1.0, size = 100000)
sigma_0 = np.random.uniform(0.5, 5.0, size = 100000)
params = np.stack([mu_0, sigma_0], axis = -1)
data = np.array([np.random.multivariate_normal(np.arange(1, 6)**2 * m, np.diag(np.arange(1, 6)**2 * s**2)) for m, s in params])

training_set = prepare_dataset(NDE, data_samples=params, param_samples=data, batch_size=100)

In [None]:
# training the posterior
NDE.train(
    epochs = 50,
    dataset = training_set,
    pretrain = False,
    save = False,
    save_history = False,
    verbose = 1,
)

In [None]:
# make a mock measurement
mock_measurement = np.random.multivariate_normal(np.arange(1, 6)**2, np.diag(np.arange(1, 6)**2), 10)

In [None]:
def log_gauss(x, mu, sigma):
    return -0.5 * np.log(2 * np.pi * sigma) - 0.5 * (x - mu)**2 / sigma**2 

def analytic_log_likelihood(d, mu_0, sigma_0):
    mu = (np.arange(1, 6)**2).reshape(1, -1) * mu_0.reshape(-1, 1)
    cov = (np.arange(1, 6)**2).reshape(1, -1) * sigma_0.reshape(-1, 1)**2
    c = -2.5 * np.log(2 * np.pi) - 0.5 * np.log(np.prod(cov, axis = -1))
    l = -0.5 * np.sum((d - mu)**2 / cov, axis = -1)
    return c + l

# One measurement
Recover the posteriors for one measurement, `mock_measurement[0]`

In [None]:
def ultranest_analytic_posterior(p):
    likelihood = analytic_log_likelihood(mock_measurement[0], p[:, 0], p[:, 1])
    prior = log_gauss(0.0, p[:, 0], 1.0)
    return prior + likelihood

def transformation(p):
    x = np.zeros(p.shape, dtype = p.dtype)
    x[:, 0] = -5 + 10 * p[:, 0]
    x[:, 1] = 0.5 + 4.5 * p[:, 1]
    return x

In [None]:
sampler_analytic = ultranest.ReactiveNestedSampler(
    ["mu_0", "sigma_0"], 
    loglike = ultranest_analytic_posterior, 
    transform = transformation,
    vectorized = True,
    draw_multiple = True,
    ndraw_min = 1000,
    ndraw_max = 100000,
)
result_analytic = sampler_analytic.run(
    min_num_live_points = 1000,
    min_ess = 1000,
)
sampler_analytic.print_results()

Recovering posterior for one mock observation $P(\mu, \sigma | d_1)$ is trivial. One would simply call `NDE.conditional_sample`. Let's do that for the first mock measurement.

In [None]:
NDE_sample = NDE.conditional_sample(100000, mock_measurement[0])

In [None]:
def cornerplot(results, fig = None, color = None):
    data = np.array(results['weighted_samples']['points'])
    weights = np.array(results['weighted_samples']['weights'])
    cumsumweights = np.cumsum(weights)

    mask = cumsumweights > 1e-4

    fig = corner.corner(
    data[mask, :],
    weights = weights[mask], 
    fig = fig, 
    color = color, 
    truths = [1.0, 1.0], 
    levels = (0.68, 0.95), 
    plot_contour=True,
    plot_density=False,
    plot_datapoints=False,
    labels = ["$\mu_0$", "$\sigma_0$"]
)
    return fig

In [None]:
fig = plt.figure(figsize = (5, 5))
fig = corner.corner(
    NDE_sample.numpy(),
    weights = np.ones(100000) / 100000,
    truths = [1.0, 1.0], 
    levels = (0.68, 0.95), 
    plot_contour=True,
    plot_density=False,
    plot_datapoints=False,
    color = "blue",
    labels = ["$\mu_0$", "$\sigma_0$"],
)
fig = cornerplot(result_analytic, fig, "red")

However, as we have multiple measurements, and we want to recover full posterior $P(\mu, \sigma | x_1, x_2, \ldots, x_{10})$ we can use the following trick.
$$
\begin{aligned}
P(\mu, \sigma | x_1, x_2, \ldots, x_N) &= \frac{P(x_1, x_2, \ldots, x_N | \mu, \sigma) \cdot P(\mu, \sigma)}{P(x_1, x_2, \ldots, x_N)} \\
&= \frac{1}{P(\mu, \sigma)^{N-1}}\prod_{i=1}^N \frac{P(x_i | \mu, \sigma) \cdot P(\mu, \sigma)}{P(x_i)} \\
&= P(\mu, \sigma)^{1 - N}\prod_{i=1}^N P(\mu, \sigma | x_i)
\end{aligned}
$$

Use this trick to recover the full posterior.

In [None]:
def ultranest_NDE_posterior(p):
    N = 10
    posterior_sum = np.sum(np.array([NDE.log_prob(p, mock).numpy() for mock in mock_measurement]), axis = 0)
    prior = log_gauss(0.0, p[:, 0], 1.0)
    return posterior_sum + (1 - N) * prior

In [None]:
def ultranest_analytic_posterior(p):
    likelihood = np.sum(np.array([analytic_log_likelihood(mock, p[:, 0], p[:, 1]) for mock in mock_measurement]), axis = 0)
    prior = log_gauss(0.0, p[:, 0], 1.0)
    return prior + likelihood
    pass

In [None]:
sampler_analytic = ultranest.ReactiveNestedSampler(
    ["mu_0", "sigma_0"], 
    loglike = ultranest_analytic_posterior, 
    transform = transformation,
    vectorized = True,
    draw_multiple = True,
    ndraw_min = 1000,
    ndraw_max = 100000,
)
result_analytic = sampler_analytic.run(
    min_num_live_points = 1000,
    min_ess = 1000,
)
sampler_analytic.print_results()

In [None]:
sampler_NDE = ultranest.ReactiveNestedSampler(
    ["mu_0", "sigma_0"], 
    loglike = ultranest_NDE_posterior, 
    transform = transformation,
    vectorized = True,
    draw_multiple = True,
    ndraw_min = 1000,
    ndraw_max = 100000,
)
result_NDE = sampler_NDE.run(
    min_num_live_points = 1000,
    min_ess = 1000,
)
sampler_NDE.print_results()

In [None]:
fig = plt.figure(figsize = (5, 5))
fig = cornerplot(result_NDE, fig, "blue")
fig = cornerplot(result_analytic, fig, "red")