# Evaluating the KLD from posterior samples of cosmological parameters

_Alex I. Malz (GCCL@RUB)_

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle as pkl
from scipy import stats as sps
import sys

We will begin with samples of $(w, \Omega_{m})$ pairs, where one set of samples is defined as the reference sample corresponding to a best-case scenario of a 100% pure SN Ia data set.

In [None]:
# # replace with reading in the data
# def measure(n, w_bar, w_sig, Omm_bar,Omm_sig):
#     "Measurement model, return two coupled measurements."
#     w = np.random.normal(loc=w_bar, scale=w_sig, size=n)
#     Omm = np.random.normal(loc=Omm_bar, scale=Omm_sig, size=n)
#     return w, Omm

def measure(path, cols):
    alldims = pkl.load(open(path, 'rb'))
    return [alldims[col] for col in cols]

In [None]:
refpath = '/media/RESSPECT/data/PLAsTiCC/SALT2mu_posteriors/perfect_classifier/chains_plasticc_perfect.pkl'
comppath = '/media/RESSPECT/data/PLAsTiCC/SALT2mu_posteriors/static/DDF/train_10/batch_10/UncSampling/chains/chains_loop_99.pkl'

In [None]:
# w_ref, Omm_ref = measure(1000, -1., 0.1, 0.5, 0.1)
# w_comp, Omm_comp = measure(1000, -1.1, 0.2, 0.25, 0.05)

[w_ref, Omm_ref] = measure(refpath, ['w', 'om'])
[w_comp, Omm_comp] = measure(comppath, ['w', 'om'])

[`chippr`](https://github.com/aimalz/chippr/) contains code for calculating the KLD of PDFs evaluated on a grid, so we start by fitting a 2D KDE to the samples.
The PDFs must be $\geq0$ over the entire range of the grid, so we make a grid based on the reference sample's range.

In [None]:
# replace with min and max of w, Omega in the set of reference samples
ngrid_x = 100
ngrid_y = 100
xmin = w_ref.min()
xmax = w_ref.max()
ymin = Omm_ref.min()
ymax = Omm_ref.max()

w_grid, Omm_grid = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
dw = (xmax - xmin) / ngrid_x
dOmm = (ymax - ymin) / ngrid_y
# use meshgrid instead of mgrid

In [None]:
eps = 2. * sys.float_info.min

def safe_log(arr, threshold=eps):
    """
    Takes the natural logarithm of an array that might contain zeros.

    Parameters
    ----------
    arr: ndarray, float
        array of values to be logged
    threshold: float, optional
        small, positive value to replace zeros and negative numbers

    Returns
    -------
    logged: ndarray
        logged values, with small value replacing un-loggable values
    """
    arr = np.asarray(arr)
    arr[arr < threshold] = threshold
    logged = np.log(arr)
    return logged

def make_kde(Xgrid, Ygrid, Xsamps, Ysamps):
    positions = np.vstack([Xgrid.ravel(), Ygrid.ravel()])
    values = np.vstack([Xsamps, Ysamps])
    kernel = sps.gaussian_kde(values)
    Z = safe_log(np.reshape(kernel(positions).T, Xgrid.shape))
    return Z
# TODO: normalize up here before log!

In [None]:
kde_ref = make_kde(w_grid, Omm_grid, w_ref, Omm_ref)
plt.imshow(kde_ref)

In [None]:
# replace with reading in other sets of posteriors
kde_comp = make_kde(w_grid, Omm_grid, w_comp, Omm_comp)
plt.imshow(kde_comp)

Now that we have the 2D PDFs, let's define the KLD.

In [None]:
# stolen from chippr
def calculate_kld(lpe, lqe, dx, vb=True):
    """
    Calculates the Kullback-Leibler Divergence between two N-dimensional PDFs 
    evaluated on a shared, regular grid (sorry, too lazy to deal with irregular grid)

    Parameters
    ----------
    lpe: numpy.ndarray, float
        log-probability distribution evaluated on a grid whose distance from `q`
        will be calculated.
    lqe: numpy.ndarray, float
        log-probability distribution evaluated on a grid whose distance to `p` will
        be calculated.
    dx: numpy.ndarray, float
        separation of grid values in each dimension
    vb: boolean
        report on progress to stdout?

    Returns
    -------
    Dpq: float
        the value of the Kullback-Leibler Divergence from `q` to `p`
    """
    # Normalize the evaluations, so that the integrals can be done
    gridnorm = np.ones_like(lpe) * np.prod(dx)
    pe = np.exp(lpe)
    qe = np.exp(lqe)
#     print(np.prod(dx))
#     print(gridnorm)
    pi = pe * gridnorm
    qi = qe * gridnorm
    # (very approximately!) by simple summation:
    pn = pe / pi
    qn = qe / qi
    # Compute the log of the normalized PDFs
    logp = safe_log(pn)
    logq = safe_log(qn)
    # Calculate the KLD from q to p
    Dpq = np.sum(pn * (logp - logq))
    return Dpq

Now we can evaluate it for our reference sample and a comparison sample.

In [None]:
calculate_kld(kde_ref, kde_comp, np.array([dw, dOmm]))