-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add a new module: pycraf.mc containing MC helpers
- Loading branch information
Showing
7 changed files
with
289 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
.. pycraf-mc: | ||
********************************* | ||
Monte-Carlo helpers (`pycraf.mc`) | ||
********************************* | ||
|
||
.. currentmodule:: pycraf.mc | ||
|
||
Introduction | ||
============ | ||
|
||
The `~pycraf.mc` subpackage | ||
|
||
|
||
See Also | ||
======== | ||
|
||
- `Kernel density estimation (KDE) <https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/>`__, blog by Jake Vanderplas. | ||
- `Scipy gaussian_kde <https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.gaussian_kde.html#scipy.stats.gaussian_kde>`__ | ||
|
||
Reference/API | ||
============= | ||
|
||
.. automodapi:: pycraf.mc | ||
:no-inheritance-diagram: | ||
:include-all-objects: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
''' | ||
This subpackage contains various little convenience functions to | ||
help with Monte-Carlo type compatiblity studies. | ||
''' | ||
|
||
from .sampler import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
import numpy as np | ||
|
||
|
||
__all__ = ['HistogramSampler'] | ||
|
||
|
||
class HistogramSampler(object): | ||
''' | ||
Sampler to get random values obeying a discrete(!) density distribution. | ||
With this class, one can use discrete densities (think of them as | ||
binned entities, aka histograms) to get random samples that follow | ||
the same (binned) distribution as the histogram. For simplicity | ||
the returned values are the N-dim indices that need to be fed | ||
into the histogram bin-ranges if one wants to convert them | ||
to physical values (see examples, below). | ||
Parameters | ||
---------- | ||
histvals : N-D `~numpy.ndarray` | ||
Discrete density distribution. (This is the histogram array, which | ||
one would get out of `~numpy.histogram` functions.) | ||
Returns | ||
------- | ||
hist_sampler : `~pycraf.mc.HistogramSampler` | ||
A `~pycraf.mc.HistogramSampler` instance. | ||
Examples | ||
-------- | ||
A trivial one-dimensional use case:: | ||
>>> import numpy as np | ||
>>> from astropy.utils.misc import NumpyRNGContext | ||
>>> from pycraf import mc | ||
>>> with NumpyRNGContext(1): | ||
... x = np.random.normal(0, 1, 100) | ||
>>> hist, bins = np.histogram(x, bins=16, range=(-4, 4)) | ||
>>> mid_points = (bins[1:] + bins[:-1]) / 2 | ||
>>> my_sampler = mc.HistogramSampler(hist) | ||
>>> with NumpyRNGContext(1): | ||
... indices = my_sampler.sample(10) | ||
>>> print(indices) | ||
[7 9 3 7 6 5 6 7 7 8] | ||
>>> print(mid_points[indices]) | ||
[-0.25 0.75 -2.25 -0.25 -0.75 -1.25 -0.75 -0.25 -0.25 0.25] | ||
Works equally simple in 2D:: | ||
>>> with NumpyRNGContext(1): | ||
... x = np.random.normal(0, 1, 1000) | ||
... y = np.random.normal(2, 2, 1000) | ||
>>> hist2d, xbins, ybins = np.histogram2d( | ||
... x, y, bins=(16, 16), range=((-4, 4), (-6, 10)) | ||
... ) | ||
>>> xmids = (xbins[1:] + xbins[:-1]) / 2 | ||
>>> ymids = (ybins[1:] + ybins[:-1]) / 2 | ||
>>> my_sampler = mc.HistogramSampler(hist2d) | ||
>>> with NumpyRNGContext(1): | ||
... indices = my_sampler.sample(10) | ||
>>> print(list(zip(*indices))) | ||
[(7, 8), (9, 6), (1, 7), (7, 5), (6, 4), | ||
(5, 7), (6, 7), (7, 6), (7, 8), (8, 6)] | ||
>>> print(list(zip(xmids[indices[0]], ymids[indices[1]]))) | ||
[(-0.25, 2.5), (0.75, 0.5), (-3.25, 1.5), (-0.25, -0.5), | ||
(-0.75, -1.5), (-1.25, 1.5), (-0.75, 1.5), (-0.25, 0.5), | ||
(-0.25, 2.5), (0.25, 0.5)] | ||
It is also easily possible to apply weights. Just assume | ||
that one bin was observed exceptionally frequent:: | ||
>>> weights = np.ones_like(x) | ||
>>> weights[500] = 1000 | ||
>>> hist2d, xbins, ybins = np.histogram2d( | ||
... x, y, bins=(16, 16), range=((-4, 4), (-6, 10)), | ||
... weights=weights | ||
... ) | ||
>>> my_sampler = mc.HistogramSampler(hist2d) | ||
>>> with NumpyRNGContext(1): | ||
... indices = my_sampler.sample(10) | ||
>>> print(list(zip(xmids[indices[0]], ymids[indices[1]]))) | ||
[(-1.75, 4.5), (-0.25, 3.5), (-3.25, 1.5), (-1.75, 4.5), | ||
(-1.75, 4.5), (-1.75, 4.5), (-1.75, 4.5), (-1.75, 4.5), | ||
(-1.75, 4.5), (-1.25, 0.5)] | ||
As can be seen, the value `((1.25, -1.5))` is now exceptionally | ||
often sampled from the distribution. | ||
As discussed in the notes, for some use-cases a KDE might | ||
be the better tool: | ||
>>> from scipy.stats import gaussian_kde | ||
>>> kernel = gaussian_kde((x, y)) | ||
>>> with NumpyRNGContext(1): | ||
... values = kernel.resample(10) | ||
>>> print('\\n'.join( | ||
... '({:9.6f}, {:9.6f})'.format(*t) | ||
... for t in zip(values[0], values[1]) | ||
... )) | ||
(-1.470808, 0.730811) | ||
( 0.088397, 3.407584) | ||
... | ||
(-2.097790, -0.251477) | ||
( 0.261941, -0.936229) | ||
Notes | ||
----- | ||
Even if you have continuous data to start with, using the histogram | ||
approach obviously works, as well (as is demonstrated in the examples). | ||
However, one looses the "continuous" property in the process. | ||
The resulting samples will always be just be able to work | ||
out the bin, but no continuous quantity can be reconstructed. | ||
For many use cases this is probably fine, but in others one | ||
might be better of by using Kernel Density Estimation. | ||
There is a function for this in `~scipy.stats` | ||
(`~scipy.stats.gaussian_kde`), which even allows works with | ||
multi-variate data and allows one to sample from the KDE PDF | ||
(see also the examples). Unfortunately, one cannot work with | ||
weighted data. | ||
''' | ||
|
||
def __init__(self, histvals): | ||
|
||
histvals = np.atleast_1d(histvals) | ||
|
||
self._hshape = histvals.shape | ||
self._ndim = histvals.ndim | ||
# cdf is flat, will need to unravel indices later | ||
self._cdf = np.cumsum( | ||
histvals.flatten().astype(np.float64, copy=False) | ||
) | ||
self._cdf /= self._cdf[-1] | ||
|
||
def sample(self, n): | ||
''' | ||
Sample from the (discrete) density distribution. | ||
Parameters | ||
---------- | ||
n : int | ||
Number of samples to draw. | ||
Returns | ||
------- | ||
Indices : tuple of `~numpy.ndarray` | ||
The indices of the drawn samples. See | ||
`~pycraf.mc.HistogramSampler` for examples of use. | ||
''' | ||
|
||
rsamples = np.random.rand(n) | ||
rbins = np.searchsorted(self._cdf, rsamples) | ||
|
||
indices = np.unravel_index(rbins, self._hshape) | ||
|
||
if self._ndim == 1: | ||
return indices[0] | ||
else: | ||
return indices |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
import os | ||
import pytest | ||
import numpy as np | ||
from numpy.testing import assert_equal, assert_allclose | ||
from astropy.tests.helper import assert_quantity_allclose, remote_data | ||
from astropy.utils.misc import NumpyRNGContext | ||
from astropy import units as apu | ||
from ...utils import check_astro_quantities | ||
from ... import mc | ||
|
||
|
||
TOL_KWARGS = {'atol': 1.e-4, 'rtol': 1.e-4} | ||
|
||
|
||
class TestHistogramSamper(): | ||
|
||
def setup(self): | ||
|
||
with NumpyRNGContext(1): | ||
self.x = np.random.normal(0, 1, 1000) | ||
self.y = np.random.normal(2, 2, 1000) | ||
|
||
self.xbins = np.linspace(-4, 4, 17) | ||
self.ybins = np.linspace(-6, 10, 17) | ||
self.xmids = (self.xbins[1:] + self.xbins[:-1]) / 2 | ||
self.ymids = (self.ybins[1:] + self.ybins[:-1]) / 2 | ||
|
||
def test_sample1d(self): | ||
|
||
hist, _ = np.histogram(self.x, bins=self.xbins) | ||
|
||
my_sampler = mc.HistogramSampler(hist) | ||
with NumpyRNGContext(1): | ||
indices = my_sampler.sample(10) | ||
assert_equal( | ||
indices, | ||
np.array([7, 9, 1, 7, 6, 5, 6, 7, 7, 8]), | ||
) | ||
|
||
assert_allclose( | ||
self.xmids[indices], | ||
np.array([-0.25, 0.75, -3.25, -0.25, -0.75, | ||
-1.25, -0.75, -0.25, -0.25, 0.25]), | ||
) | ||
|
||
def test_sample2d(self): | ||
|
||
hist, *_ = np.histogram2d( | ||
self.x, self.y, bins=[self.xbins, self.ybins], | ||
) | ||
|
||
my_sampler = mc.HistogramSampler(hist) | ||
with NumpyRNGContext(1): | ||
indices = my_sampler.sample(10) | ||
assert_equal( | ||
indices, | ||
np.array([ | ||
[7, 9, 1, 7, 6, 5, 6, 7, 7, 8], | ||
[8, 6, 7, 5, 4, 7, 7, 6, 8, 6] | ||
]), | ||
) | ||
|
||
assert_allclose( | ||
self.xmids[indices[0]], | ||
np.array([-0.25, 0.75, -3.25, -0.25, -0.75, | ||
-1.25, -0.75, -0.25, -0.25, 0.25]), | ||
) | ||
|
||
assert_allclose( | ||
self.ymids[indices[1]], | ||
np.array([2.5, 0.5, 1.5, -0.5, -1.5, | ||
1.5, 1.5, 0.5, 2.5, 0.5]), | ||
) |