Skip to content

Commit

Permalink
Improvements for 1D kde plotting (quantiles and compression) (#99)
Browse files Browse the repository at this point in the history
* add option to not compress the data for 1d kde plots

* revert ncompress_1d

* use uniform subsample for 1d compression

* discard zero weights in 1d kde in case there are nans, analogous to 2d

* Fix for 1d compression

* copy only if necessary and sort inplace in sample_compression_1d

* add optional kwarg 'percentile' to allow manually adjusting plot ranges in termes of percentiles

* make latest change pep conform

* rename percentile to q, add docstring for q, fix bug in conversion from sigma to q

* change quantiles also for fastkde; move quantile comprehension into function quantile_plot_interval

* add test for kwarg q in 1d kde plots; add tests for quantile_plot_interval

* add docstring for quantile_plot_interval

Co-authored-by: Will Handley <wh260@cam.ac.uk>
  • Loading branch information
Lukas Hergt and williamjameshandley committed Jun 24, 2020
1 parent 4f392d9 commit 8afa048
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 49 deletions.
29 changes: 27 additions & 2 deletions anesthetic/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,13 +268,15 @@ def fastkde_plot_1d(ax, data, *args, **kwargs):

xmin = kwargs.pop('xmin', None)
xmax = kwargs.pop('xmax', None)
q = kwargs.pop('q', '5sigma')
q = quantile_plot_interval(q=q)

try:
x, p = fastkde_1d(data, xmin, xmax)
except NameError:
raise ImportError("You need to install fastkde to use fastkde")
p /= p.max()
i = (x < quantile(x, 0.99, p)) & (x > quantile(x, 0.01, p)) | (p > 0.1)
i = ((x > quantile(x, q[0], p)) & (x < quantile(x, q[1], p)))

ans = ax.plot(x[i], p[i], *args, **kwargs)
ax.set_xlim(*check_bounds(x[i], xmin, xmax), auto=True)
Expand Down Expand Up @@ -323,11 +325,18 @@ def kde_plot_1d(ax, data, *args, **kwargs):
xmax = kwargs.pop('xmax', None)
weights = kwargs.pop('weights', None)
ncompress = kwargs.pop('ncompress', 1000)
q = kwargs.pop('q', '5sigma')
q = quantile_plot_interval(q=q)

if weights is not None:
data = data[weights != 0]
weights = weights[weights != 0]

x, w = sample_compression_1d(data, weights, ncompress)
kde = gaussian_kde(x, weights=w)
p = kde(x)
p /= p.max()
i = ((x < quantile(x, 0.999, w)) & (x > quantile(x, 0.001, w))) | (p > 0.1)
i = ((x > quantile(x, q[0], w)) & (x < quantile(x, q[1], w)))
if xmin is not None:
i = i & (x > xmin)
if xmax is not None:
Expand Down Expand Up @@ -792,3 +801,19 @@ def set_ylim(self, bottom=None, top=None, emit=True, auto=False,
emit=emit, auto=auto,
**kwargs)
ax.__class__ = DiagonalAxes


def quantile_plot_interval(q):
"""Interpret quantile q input to quantile plot range tuple."""
if isinstance(q, str):
sigmas = {'1sigma': 0.682689492137086,
'2sigma': 0.954499736103642,
'3sigma': 0.997300203936740,
'4sigma': 0.999936657516334,
'5sigma': 0.999999426696856}
q = (1 - sigmas[q]) / 2
if isinstance(q, float) or isinstance(q, int):
if q > 0.5:
q = 1 - q
q = (q, 1-q)
return q
11 changes: 11 additions & 0 deletions anesthetic/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,17 @@ def plot(self, ax, paramname_x, paramname_y=None, *args, **kwargs):
Number of samples to use in plotting routines.
optional, Default dynamically chosen
q: str, float, (float, float)
Plot the `q` inner posterior quantiles in 1d 'kde' plots. To plot
the full range, set `q=0` or `q=1`.
* if str: any of {'1sigma', '2sigma', '3sigma', '4sigma', '5sigma'}
Plot within mean +/- #sigma of posterior.
* if float: Plot within the symmetric confidence interval
`(1-q, q)` or `(q, 1-q)`.
* if tuple: Plot within the (possibly asymmetric) confidence
interval `q`.
optional, (Default: '5sigma')
Returns
-------
fig: matplotlib.figure.Figure
Expand Down
25 changes: 10 additions & 15 deletions anesthetic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,28 +335,23 @@ def sample_compression_1d(x, w=None, n=1000):
x, w, array-like
Compressed samples and weights
"""
x = pandas.Series(x)
x = np.array(x)
if w is None:
w = pandas.Series(index=x.index, data=np.ones_like(x))
w = np.ones_like(x)
w = np.array(w)

# Select inner samples for triangulation
if sum(w != 0) < n:
i = w.index
if len(x) > n:
x_ = np.random.choice(x, size=n, replace=False)
else:
i = np.random.choice(w.index, size=n, replace=False, p=w/w.sum())

# Define sub-samples
x_ = np.sort(x[i])
x_ = x.copy()
x_.sort()

# Compress mass onto these subsamples
j1 = np.digitize(x, x_) - 1
k1 = (j1 > -1) & (j1 < n)
j2 = np.digitize(x, x_, right=True) - 1
k2 = (j2 > -1) & (j2 < n)

centers = (x_[1:] + x_[:-1])/2
j = np.digitize(x, centers)
w_ = np.zeros_like(x_)
np.add.at(w_, j1[k1], w[k1])
np.add.at(w_, j2[k2], w[k2])
np.add.at(w_, j, w)

return x_, w_

Expand Down
92 changes: 63 additions & 29 deletions tests/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from anesthetic.plot import (make_1d_axes, make_2d_axes, kde_plot_1d,
fastkde_plot_1d, hist_plot_1d, hist_plot_2d,
fastkde_contour_plot_2d, kde_contour_plot_2d,
scatter_plot_2d)
scatter_plot_2d, quantile_plot_interval)
from numpy.testing import assert_array_equal

from matplotlib.contour import QuadContourSet
Expand Down Expand Up @@ -187,40 +187,49 @@ def test_2d_axes_limits():
assert(axes[z][y].get_ylim() == (c, d))


def test_kde_plot_1d():
@pytest.mark.parametrize('plot_1d', [kde_plot_1d, fastkde_plot_1d])
def test_kde_plot_1d(plot_1d):
fig, ax = plt.subplots()
np.random.seed(0)
data = np.random.randn(1000)

for plot_1d in [kde_plot_1d, fastkde_plot_1d]:
try:
# Check height
line, = plot_1d(ax, data)
assert(isinstance(line, Line2D))
assert(line.get_ydata().max() <= 1)

# Check arguments are passed onward to underlying function
line, = plot_1d(ax, data, color='r')
assert(line.get_color() == 'r')

# Check xmin
xmin = -0.5
line, = plot_1d(ax, data, xmin=xmin)
assert((line.get_xdata() >= xmin).all())
try:
# Check height
line, = plot_1d(ax, data)
assert(isinstance(line, Line2D))
assert(line.get_ydata().max() <= 1)

# Check arguments are passed onward to underlying function
line, = plot_1d(ax, data, color='r')
assert(line.get_color() == 'r')

# Check xmin
xmin = -0.5
line, = plot_1d(ax, data, xmin=xmin)
assert((line.get_xdata() >= xmin).all())

# Check xmax
xmax = 0.5
line, = plot_1d(ax, data, xmax=xmax)
assert((line.get_xdata() <= xmax).all())

# Check xmin and xmax
line, = plot_1d(ax, data, xmin=xmin, xmax=xmax)
assert((line.get_xdata() <= xmax).all())
assert((line.get_xdata() >= xmin).all())
plt.close("all")

# Check xmax
xmax = 0.5
line, = plot_1d(ax, data, xmax=xmax)
assert((line.get_xdata() <= xmax).all())
# Check q
plot_1d(ax, data, q='1sigma')
plot_1d(ax, data, q=0)
plot_1d(ax, data, q=1)
plot_1d(ax, data, q=0.1)
plot_1d(ax, data, q=0.9)
plot_1d(ax, data, q=(0.1, 0.9))

# Check xmin and xmax
line, = plot_1d(ax, data, xmin=xmin, xmax=xmax)
assert((line.get_xdata() <= xmax).all())
assert((line.get_xdata() >= xmin).all())
plt.close("all")
except ImportError:
if 'fastkde' not in sys.modules:
pass
except ImportError:
if 'fastkde' not in sys.modules:
pass


def test_hist_plot_1d():
Expand Down Expand Up @@ -400,3 +409,28 @@ def test_scatter_plot_2d():
scatter_plot_2d(ax, data_x, data_y, ymax=ymax)
assert(ax.get_ylim()[1] <= ymax)
plt.close()


@pytest.mark.parametrize('sigmas', [('1sigma', 0.682689492137086),
('2sigma', 0.954499736103642),
('3sigma', 0.997300203936740),
('4sigma', 0.999936657516334),
('5sigma', 0.999999426696856)])
def test_quantile_plot_interval_str(sigmas):
q1, q2 = quantile_plot_interval(q=sigmas[0])
assert q1 == 0.5 - sigmas[1] / 2
assert q2 == 0.5 + sigmas[1] / 2


@pytest.mark.parametrize('floats', [0, 1, 0.1, 0.9])
def test_quantile_plot_interval_float(floats):
q1, q2 = quantile_plot_interval(q=floats)
assert q1 == min(floats, 1 - floats)
assert q2 == max(floats, 1 - floats)


@pytest.mark.parametrize('q1, q2', [(0, 1), (0.1, 0.9), (0, 0.9), (0.1, 1)])
def test_quantile_plot_interval_tuple(q1, q2):
_q1, _q2 = quantile_plot_interval(q=(q1, q2))
assert _q1 == q1
assert _q2 == q2
2 changes: 1 addition & 1 deletion tests/test_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ def test_contour_plot_2d_nan():
ns = NestedSamples(root='./tests/example_data/pc')

ns.loc[:9, 'x0'] = np.nan
with pytest.raises((np.linalg.LinAlgError, RuntimeError)):
with pytest.raises((np.linalg.LinAlgError, RuntimeError, ValueError)):
ns.plot_2d(['x0', 'x1'])

# Check this error is removed in the case of zero weights
Expand Down
16 changes: 14 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from scipy import special as sp
from numpy.testing import assert_array_equal
from anesthetic.utils import (nest_level, compute_nlive, unique, is_int,
triangular_sample_compression_2d,
logsumexp)
logsumexp, sample_compression_1d,
triangular_sample_compression_2d)


def test_nest_level():
Expand Down Expand Up @@ -58,6 +58,18 @@ def test_triangular_sample_compression_2d():
assert np.isclose(sum(W), sum(w), rtol=1e-1)


def test_sample_compression_1d():
np.random.seed(0)
N = 10000
x_ = np.random.rand(N)
w_ = np.random.rand(N)
n = 1000
x, w = sample_compression_1d(x_, w_, n)
assert len(x) == n
assert len(w) == n
assert np.isclose(w.sum(), w_.sum())


def test_is_int():
assert is_int(1)
assert is_int(np.int64(1))
Expand Down

0 comments on commit 8afa048

Please sign in to comment.