In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from   scipy import stats
from   copy import deepcopy
import os
import sys
import glob

sys.path.append('/Users/research/projects/alderaan/')
from alderaan.utils import weighted_percentile

# Generate synthetic posteriors

In [None]:
NPL = 100

mu_true = stats.beta(0.9, 3.0).rvs(NPL)

data = []

for i in range(NPL):
    d = pd.DataFrame(columns=['precise noisy unbounded symmetric mixed'.split()])
    
    # precise (bounded) measurements
    sd = 0.03
    mu = stats.truncnorm(-mu_true[i]/sd, (1-mu_true[i])/sd, loc=mu_true[i], scale=sd).rvs(1)
    d.precise = stats.truncnorm(-mu/sd, (1-mu)/sd, loc=mu, scale=sd).rvs(3000)
    
    # noisy (bounded) measurements
    sd = 0.2
    mu = stats.truncnorm(-mu_true[i]/sd, (1-mu_true[i])/sd, loc=mu_true[i], scale=sd).rvs(1)
    d.noisy = stats.truncnorm(-mu/sd, (1-mu)/sd, loc=mu, scale=sd).rvs(3000)
    
    # unbounded (noisy) measurements
    sd = 0.2
    mu = stats.norm(loc=mu_true[i], scale=sd).rvs(1)
    d.unbounded = stats.norm(loc=mu, scale=sd).rvs(3000)
    
    # symmetric (noisy, unbounded) measurements
    sd = 0.2
    mu = stats.norm(loc=mu_true[i], scale=sd).rvs(1) * np.random.choice([-1,1])
    d.symmetric = stats.norm(loc=mu, scale=sd).rvs(3000)
    
    # 3-Gaussian Mixture (unbounded)
    mu = [0.1, 0.5, 0.7]
    sd = [0.15, 0.3, 0.05]
    
    mixed = np.zeros(3000)
    for i in range(3):
        mixed[i*1000:(i+1)*1000] = stats.norm(mu[i], sd[i]).rvs(1000)
    d.mixed = mixed
        
    # store simulated posteriors
    data.append(d)

## Run the hierarchical model

In [None]:
import aesara_theano_fallback.tensor as T
from   aesara_theano_fallback import aesara as theano
from   celerite2.theano import GaussianProcess
from   celerite2.theano import terms as GPterms
import pymc3 as pm
import pymc3_ext as pmx
import corner

#### Non-parametric

In [None]:
NPL = len(data)
Nbin = 100

bin_edges = np.linspace(-2, 2, Nbin+1)
bin_widths = bin_edges[1:] - bin_edges[:-1] 
bin_centers = 0.5*(bin_edges[:-1] + bin_edges[1:])

with pm.Model() as model:
    # hyperpriors on GP
    log_s = pm.Uniform("log_s", lower=-1, upper=5, testval=0)
    log_r = pm.Uniform("log_r", lower=-1, upper=5, testval=0)
    kernel = GPterms.Matern32Term(sigma=T.exp(log_s), rho=T.exp(log_r))
    
    # calculate bin heights from latent draws
    latent = pm.Normal("latent", mu=0, sd=1, shape=Nbin)
    LS = T.exp(log_s)*latent

    gp = GaussianProcess(kernel, mean=T.mean(LS))
    gp.compute(bin_centers, diag=T.var(LS[1:]-LS[:-1])/T.sqrt(2)*T.ones(Nbin))
    
    beta  = gp.predict(LS)
    ln_pdf = pm.Deterministic("ln_pdf", beta - T.log(T.sum(T.exp(beta)*bin_widths)))
    
    # hierarchical model
    X = [None]*NPL
    C = [None]*NPL
    Z = T.zeros(NPL, dtype='float')
    
    for i, d in enumerate(data):
        inds = np.digitize(d.symmetric.values, bin_edges[1:], right=True)
        X[i] = ln_pdf[inds]
        C[i] = T.max(X[i])
        Z = T.set_subtensor(Z[i], C[i] + T.log(T.sum(T.exp(X[i]-C[i]))))
    
    # likelihood
    pm.Potential("ln_like", T.sum(Z))

In [None]:
with model:
    trace = pmx.sample(tune=5000, draws=1000, chains=2, target_accept=0.9, return_inferencedata=True)

In [None]:
x = np.exp(trace.posterior.ln_pdf.values)
x = x.reshape(-1,x.shape[-1])

plt.figure()
plt.fill_between(bin_centers, np.percentile(x, 16, axis=0), np.percentile(x, 84, axis=0), color='r', alpha=0.3)
plt.plot(bin_centers, np.median(x, axis=0), color='r', lw=2, label='posterior')
plt.plot(bin_centers, stats.beta(0.9,3.0).pdf(bin_centers)/2, 'k--')
plt.hist(d.mixed.values, bins=20, color='lightgrey')
plt.ylim(0, None)
#plt.xlim(-0.1, 1)
plt.yticks([])
plt.xlabel("e", fontsize=24)
plt.ylabel("P(e)", fontsize=24)
plt.legend(fontsize=16)
plt.show()

In [None]:
_ = corner.corner(trace, var_names=['log_s', 'log_r'])