set values <-1e150 to -1e150 to fix overflow issues which happen beca… (

#71) * set values <-1e150 to -1e150 to fix overflow issues which happen because of cobaya's logzero=1e300 definition * add logzero kwarg, set logL<=logzero to -numpy.inf, add logsumexpinf() function to handle -inf input to logsumexp * test if logL_birth isinstance(int), special MultiNest case for old MN data * add test for utils function logsumexpinf * make logsumexpinf docstring conform with convention=numpy * catch expected warning in test_logsumexpinf * rename utils.logsumexpinf to utils.logsumexp and replace scipy's logsumexp completely with the utils.py version * add doc entry for logzero in NestedSamples * remove unnecessary abbreviation for scipy's * remove unnecessary newline * make utils.logsumexp match signature of scipy.special.logsumexp * add simple arg test to test_build_mcmc for using logzero kwarg in NestedSamples * add test for effect of logzero conversion, move logzero conversion from root reading branch (if root is not None) to kwarg branch (else) where it acts on both * add test for logzero not changing anything and for logzero not affecting rest of logL * change logzero default from -numpy.inf to -1e30 (as in CosmoMC and MontePython) * move logzero check up in hierarchy from NestedSamples to MCMCSamples * add tests for using logzero kwarg with MCMCSamples * correct indent for PEP8 Co-authored-by: lukas hergt <lh561@cam.ac.uk>
handley-lab · Apr 1, 2020 · 6e953fd · 6e953fd
1 parent 2626a52
commit 6e953fd
Show file tree

Hide file tree

Showing 4 changed files with 86 additions and 4 deletions.
diff --git a/anesthetic/samples.py b/anesthetic/samples.py
@@ -6,13 +6,12 @@
 import os
 import numpy
 import pandas
-from scipy.special import logsumexp
 from anesthetic.plot import (make_1d_axes, make_2d_axes, fastkde_plot_1d,
                              kde_plot_1d, hist_plot_1d, scatter_plot_2d,
                              fastkde_contour_plot_2d,
                              kde_contour_plot_2d, hist_plot_2d)
 from anesthetic.read.samplereader import SampleReader
-from anesthetic.utils import compute_nlive, is_int
+from anesthetic.utils import compute_nlive, is_int, logsumexp
 from anesthetic.gui.plot import RunPlotter
 from anesthetic.weighted_pandas import WeightedDataFrame, WeightedSeries
 
@@ -54,6 +53,11 @@ class MCMCSamples(WeightedDataFrame):
     label: str
         Legend label
 
+    logzero: float
+        The threshold for `log(0)` values assigned to rejected sample points.
+        Anything equal or below this value is set to `-numpy.inf`.
+        default: -1e30
+
     """
 
     def __init__(self, *args, **kwargs):
@@ -75,7 +79,10 @@ def __init__(self, *args, **kwargs):
                           tex=tex, limits=limits, *args, **kwargs)
             self.root = root
         else:
+            logzero = kwargs.pop('logzero', -1e30)
             logL = kwargs.pop('logL', None)
+            if logL is not None:
+                logL = numpy.where(logL <= logzero, -numpy.inf, logL)
             self.tex = kwargs.pop('tex', {})
             self.limits = kwargs.pop('limits', {})
             self.label = kwargs.pop('label', None)
@@ -389,6 +396,11 @@ class NestedSamples(MCMCSamples):
     beta: float
         thermodynamic temperature
 
+    logzero: float
+        The threshold for `log(0)` values assigned to rejected sample points.
+        Anything equal or below this value is set to `-numpy.inf`.
+        default: -1e30
+
     """
 
     def __init__(self, *args, **kwargs):
@@ -405,9 +417,15 @@ def __init__(self, *args, **kwargs):
                           tex=tex, limits=limits, *args, **kwargs)
             self.root = root
         else:
+            logzero = kwargs.pop('logzero', -1e30)
             self._beta = kwargs.pop('beta', 1.)
             logL_birth = kwargs.pop('logL_birth', None)
-            super(NestedSamples, self).__init__(*args, **kwargs)
+            if not isinstance(logL_birth, int) and logL_birth is not None:
+                logL_birth = numpy.where(logL_birth <= logzero,
+                                         -numpy.inf, logL_birth)
+
+            super(NestedSamples, self).__init__(logzero=logzero,
+                                                *args, **kwargs)
             if logL_birth is not None:
                 self._compute_nlive(logL_birth)
 

diff --git a/anesthetic/utils.py b/anesthetic/utils.py
@@ -1,10 +1,32 @@
 """Data-processing utility functions."""
 import numpy
 import pandas
+from scipy import special
 from scipy.interpolate import interp1d
 from matplotlib.tri import Triangulation
 
 
+def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False):
+    r"""Compute the log of the sum of exponentials of input elements.
+
+    This function has the same call signature as `scipy.special.logsumexp`
+    and mirrors scipy's behaviour except for `-numpy.inf` input. If a and b
+    are both -inf then scipy's function will output `nan` whereas here we use:
+
+    .. math::
+
+        \lim_{x \to -\infty} x \exp(x) = 0
+
+    Thus, if a=-inf in `log(sum(b * exp(a))` then we can set b=0 such that
+    that term is ignored in the sum.
+    """
+    if b is None:
+        b = numpy.ones_like(a)
+    b = numpy.where(a == -numpy.inf, 0, b)
+    return special.logsumexp(a, axis=axis, b=b, keepdims=keepdims,
+                             return_sign=return_sign)
+
+
 def channel_capacity(w):
     r"""Channel capacity (effective sample size).
 

diff --git a/tests/test_samples.py b/tests/test_samples.py
@@ -59,6 +59,29 @@ def test_build_mcmc():
     for p in params:
         assert(mcmc.limits[p] == limits[p])
 
+    ns = NestedSamples(data=samples, logL=logL, w=w)
+    assert(len(ns) == nsamps)
+    assert(numpy.all(numpy.isfinite(ns.logL)))
+    logL[:10] = -1e300
+    w[:10] = 0.
+    mcmc = MCMCSamples(data=samples, logL=logL, w=w, logzero=-1e29)
+    ns = NestedSamples(data=samples, logL=logL, w=w, logzero=-1e29)
+    assert_array_equal(mcmc.columns, numpy.array([0, 1, 2, 'logL', 'weight'],
+                                                 dtype=object))
+    assert_array_equal(ns.columns, numpy.array([0, 1, 2, 'logL', 'weight'],
+                                               dtype=object))
+    assert(numpy.all(mcmc.logL[:10] == -numpy.inf))
+    assert(numpy.all(ns.logL[:10] == -numpy.inf))
+    assert(numpy.all(mcmc.logL[10:] == logL[10:]))
+    assert(numpy.all(ns.logL[10:] == logL[10:]))
+
+    mcmc = MCMCSamples(data=samples, logL=logL, w=w, logzero=-1e301)
+    ns = NestedSamples(data=samples, logL=logL, w=w, logzero=-1e301)
+    assert(numpy.all(numpy.isfinite(mcmc.logL)))
+    assert(numpy.all(numpy.isfinite(ns.logL)))
+    assert(numpy.all(mcmc.logL == logL))
+    assert(numpy.all(ns.logL == logL))
+
     assert(mcmc.root is None)
 
 

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1,7 +1,10 @@
+import warnings
 import numpy
+from scipy import special as sp
 from numpy.testing import assert_array_equal
 from anesthetic.utils import (nest_level, compute_nlive, unique, is_int,
-                              triangular_sample_compression_2d)
+                              triangular_sample_compression_2d,
+                              logsumexp)
 
 
 def test_nest_level():
@@ -60,3 +63,19 @@ def test_is_int():
     assert is_int(numpy.int64(1))
     assert not is_int(1.)
     assert not is_int(numpy.float64(1.))
+
+
+def test_logsumexpinf():
+    a = numpy.random.rand(10)
+    b = numpy.random.rand(10)
+    assert logsumexp(-numpy.inf, b=[-numpy.inf]) == -numpy.inf
+    assert logsumexp(a, b=b) == sp.logsumexp(a, b=b)
+    a[0] = -numpy.inf
+    assert logsumexp(a, b=b) == sp.logsumexp(a, b=b)
+    b[0] = -numpy.inf
+    with warnings.catch_warnings():
+        warnings.filterwarnings('ignore',
+                                'invalid value encountered in multiply',
+                                RuntimeWarning)
+        assert numpy.isnan(sp.logsumexp(a, b=b))
+    assert numpy.isfinite(logsumexp(a, b=b))