Skip to content

Commit

Permalink
Added code to compute insertion indices (#117)
Browse files Browse the repository at this point in the history
* Added code to compute insertion indices

* Added code for p-value calculations to utils

* Added a test doing it explicitly on a nested sampling run
  • Loading branch information
williamjameshandley committed Jul 31, 2020
1 parent ae3ceb4 commit 2351380
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 4 deletions.
7 changes: 6 additions & 1 deletion anesthetic/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
fastkde_contour_plot_2d,
kde_contour_plot_2d, hist_plot_2d)
from anesthetic.read.samplereader import SampleReader
from anesthetic.utils import compute_nlive, is_int, logsumexp
from anesthetic.utils import (compute_nlive, compute_insertion_indexes,
is_int, logsumexp)
from anesthetic.gui.plot import RunPlotter
from anesthetic.weighted_pandas import WeightedDataFrame, WeightedSeries

Expand Down Expand Up @@ -652,6 +653,10 @@ def _compute_nlive(self, logL_birth):
self.tex['nlive'] = r'$n_{\rm live}$'
self.beta = self._beta

def _compute_insertion_indexes(self):
self['insertion'] = compute_insertion_indexes(self.logL.values,
self.logL_birth.values)

_metadata = MCMCSamples._metadata + ['_beta']

@property
Expand Down
92 changes: 92 additions & 0 deletions anesthetic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pandas
from scipy import special
from scipy.interpolate import interp1d
from scipy.stats import kstwobign
from matplotlib.tri import Triangulation


Expand Down Expand Up @@ -189,6 +190,30 @@ def compute_nlive(death, birth):
return nlive.values


def compute_insertion_indexes(death, birth):
"""Compute the live point insertion index for each point.
For more detail, see https://arxiv.org/abs/2006.03371
Parameters
----------
death, birth : array-like
list of birth and death contours
Returns
-------
indexes: np.array
live point index at which each live point was inserted
"""
indexes = np.zeros_like(birth, dtype=int)
for i, (b, d) in enumerate(zip(birth, death)):
i_live = (death > b) & (birth <= b)
live = death[i_live]
live.sort()
indexes[i] = np.searchsorted(live, d)
return indexes


def unique(a):
"""Find unique elements, retaining order."""
b = []
Expand Down Expand Up @@ -372,3 +397,70 @@ def match_contour_to_contourf(contours, vmin, vmax):
vmin = (c0 * c2 - c1 ** 2 + 2 * vmin * (c1 - c0)) / (c2 - c0)
vmax = (c0 * c2 - c1 ** 2 + 2 * vmax * (c1 - c0)) / (c2 - c0)
return vmin, vmax


def insertion_p_value(indexes, nlive, batch=0):
"""Compute the p-value from insertion indexes, assuming constant nlive.
Note that this function doesn't use scipy.stats.kstest as the latter
assumes continuous distributions.
For more detail, see https://arxiv.org/abs/2006.03371
For a rolling test, you should provide the optional parameter batch!=0. In
this case the test computes the p value on consecutive batches of size
nlive * batch, selects the smallest one and adjusts for multiple
comparisons using a Bonferroni correction.
Parameters
----------
indexes: array-like
list of insertion indexes, sorted by death contour
nlive: int
number of live points
batch: float
batch size in units of nlive for a rolling p-value
Returns
-------
ks_result: dict
Kolmogorov-Smirnov test results:
D: Kolmogorov-Smirnov statistic
sample_size: sample size
p-value: p-value
# if batch != 0
iterations: bounds of batch with minimum p-value
nbatches: the number of batches in total
uncorrected p-value: p-value without Bonferroni correction
"""
if batch == 0:
bins = np.arange(-0.5, nlive + 0.5, 1.)
empirical_pmf = np.histogram(indexes, bins=bins, density=True)[0]
empirical_cmf = np.cumsum(empirical_pmf)
uniform_cmf = np.arange(1., nlive + 1., 1.) / nlive

D = abs(empirical_cmf - uniform_cmf).max()
sample_size = len(indexes)
K = D * np.sqrt(sample_size)

ks_result = {}
ks_result["D"] = D
ks_result["sample_size"] = sample_size
ks_result["p-value"] = kstwobign.sf(K)
return ks_result
else:
batch = int(batch * nlive)
batches = [indexes[i:i + batch] for i in range(0, len(indexes), batch)]
ks_results = [insertion_p_value(c, nlive) for c in batches]
ks_result = min(ks_results, key=lambda t: t["p-value"])
index = ks_results.index(ks_result)

ks_result["iterations"] = (index * batch, (index + 1) * batch)
ks_result["nbatches"] = n = len(batches)
ks_result["uncorrected p-value"] = p = ks_result["p-value"]
ks_result["p-value"] = 1. - (1. - p)**n
if ks_result["p-value"] == 0.:
ks_result["p-value"] = p * n
return ks_result
24 changes: 22 additions & 2 deletions tests/test_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
from matplotlib.patches import Rectangle
from anesthetic import MCMCSamples, NestedSamples, make_1d_axes, make_2d_axes
from anesthetic.samples import merge_nested_samples
from numpy.testing import assert_array_equal, assert_array_almost_equal
from numpy.testing import (assert_array_equal, assert_array_almost_equal,
assert_array_less)
from matplotlib.colors import to_hex
from scipy.stats import ks_2samp
from scipy.stats import ks_2samp, kstest
try:
import montepython # noqa: F401
except ImportError:
Expand Down Expand Up @@ -606,3 +607,22 @@ def test_contour_plot_2d_nan():
# Check this error is removed in the case of zero weights
ns._weight[:10] = 0
ns.plot_2d(['x0', 'x1'])


def test_compute_insertion():
np.random.seed(3)
ns = NestedSamples(root='./tests/example_data/pc')
assert 'insertion' not in ns
ns._compute_insertion_indexes()
assert 'insertion' in ns

nlive = ns.nlive.mode()[0]
assert_array_less(ns.insertion, nlive)

u = ns.insertion.values/nlive
assert kstest(u[nlive:-nlive], 'uniform').pvalue > 0.05

pvalues = [kstest(u[i:i+nlive], 'uniform').pvalue
for i in range(nlive, len(ns)-2*nlive, nlive)]

assert kstest(pvalues, 'uniform').pvalue > 0.05
53 changes: 52 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import numpy as np
from scipy import special as sp
from numpy.testing import assert_array_equal
from anesthetic import NestedSamples
from anesthetic.utils import (nest_level, compute_nlive, unique, is_int,
logsumexp, sample_compression_1d,
triangular_sample_compression_2d)
triangular_sample_compression_2d,
insertion_p_value)


def test_nest_level():
Expand Down Expand Up @@ -78,6 +80,7 @@ def test_is_int():


def test_logsumexpinf():
np.random.seed(0)
a = np.random.rand(10)
b = np.random.rand(10)
assert logsumexp(-np.inf, b=[-np.inf]) == -np.inf
Expand All @@ -91,3 +94,51 @@ def test_logsumexpinf():
RuntimeWarning)
assert np.isnan(sp.logsumexp(a, b=b))
assert np.isfinite(logsumexp(a, b=b))


def test_insertion_p_value():
np.random.seed(3)
nlive = 500
ndead = nlive*20
indexes = np.random.randint(0, nlive, ndead)
ks_results = insertion_p_value(indexes, nlive)
assert 'D' in ks_results
assert 'p-value' in ks_results
assert 'sample_size' in ks_results

assert 'iterations' not in ks_results
assert 'nbatches' not in ks_results
assert 'p_value_uncorrected' not in ks_results

assert ks_results['p-value'] > 0.05
assert ks_results['sample_size'] == ndead

ks_results = insertion_p_value(indexes, nlive, 1)
assert 'D' in ks_results
assert 'p-value' in ks_results
assert 'sample_size' in ks_results
assert 'iterations' in ks_results
assert 'nbatches' in ks_results
assert 'uncorrected p-value' in ks_results

assert ks_results['p-value'] > 0.05
assert ks_results['uncorrected p-value'] < ks_results['p-value']

iterations = ks_results['iterations']
assert isinstance(iterations, tuple)
assert len(iterations) == 2
assert iterations[1] - iterations[0] == nlive
assert ks_results['nbatches'] == 20


def test_p_values_from_sample():
np.random.seed(3)
ns = NestedSamples(root='./tests/example_data/pc')
ns._compute_insertion_indexes()
nlive = len(ns.live_points())

ks_results = insertion_p_value(ns.insertion[nlive:-nlive], nlive)
assert ks_results['p-value'] > 0.05

ks_results = insertion_p_value(ns.insertion[nlive:-nlive], nlive, batch=1)
assert ks_results['p-value'] > 0.05

0 comments on commit 2351380

Please sign in to comment.