Skip to content

Commit

Permalink
add a new module: pycraf.mc containing MC helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
bwinkel committed Aug 1, 2017
1 parent 3db3118 commit 232d4a3
Show file tree
Hide file tree
Showing 7 changed files with 289 additions and 2 deletions.
3 changes: 2 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Available modules
geospatial/index
satellite/index
geometry/index
mc/index

.. automodapi:: pycraf
:no-heading:
Expand All @@ -64,7 +65,7 @@ Project details
Acknowledgments
***************

This code is makes use of the excellent work provided by the
This code makes use of the excellent work provided by the
`Astropy <http://www.astropy.org/>`__ community. pycraf uses the Astropy package and also the
`Astropy Package Template <https://github.com/astropy/package-template>`__
for the packaging.
Expand Down
26 changes: 26 additions & 0 deletions docs/mc/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
.. pycraf-mc:
*********************************
Monte-Carlo helpers (`pycraf.mc`)
*********************************

.. currentmodule:: pycraf.mc

Introduction
============

The `~pycraf.mc` subpackage


See Also
========

- `Kernel density estimation (KDE) <https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/>`__, blog by Jake Vanderplas.
- `Scipy gaussian_kde <https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.gaussian_kde.html#scipy.stats.gaussian_kde>`__

Reference/API
=============

.. automodapi:: pycraf.mc
:no-inheritance-diagram:
:include-all-objects:
3 changes: 2 additions & 1 deletion pycraf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
from . import conversions
from . import geometry
from . import geospatial
from . import utils
from . import mc
from . import pathprof
from . import protection
from . import satellite
from . import utils
9 changes: 9 additions & 0 deletions pycraf/mc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

'''
This subpackage contains various little convenience functions to
help with Monte-Carlo type compatiblity studies.
'''

from .sampler import *
174 changes: 174 additions & 0 deletions pycraf/mc/sampler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np


__all__ = ['HistogramSampler']


class HistogramSampler(object):
'''
Sampler to get random values obeying a discrete(!) density distribution.
With this class, one can use discrete densities (think of them as
binned entities, aka histograms) to get random samples that follow
the same (binned) distribution as the histogram. For simplicity
the returned values are the N-dim indices that need to be fed
into the histogram bin-ranges if one wants to convert them
to physical values (see examples, below).
Parameters
----------
histvals : N-D `~numpy.ndarray`
Discrete density distribution. (This is the histogram array, which
one would get out of `~numpy.histogram` functions.)
Returns
-------
hist_sampler : `~pycraf.mc.HistogramSampler`
A `~pycraf.mc.HistogramSampler` instance.
Examples
--------
A trivial one-dimensional use case::
>>> import numpy as np
>>> from astropy.utils.misc import NumpyRNGContext
>>> from pycraf import mc
>>> with NumpyRNGContext(1):
... x = np.random.normal(0, 1, 100)
>>> hist, bins = np.histogram(x, bins=16, range=(-4, 4))
>>> mid_points = (bins[1:] + bins[:-1]) / 2
>>> my_sampler = mc.HistogramSampler(hist)
>>> with NumpyRNGContext(1):
... indices = my_sampler.sample(10)
>>> print(indices)
[7 9 3 7 6 5 6 7 7 8]
>>> print(mid_points[indices])
[-0.25 0.75 -2.25 -0.25 -0.75 -1.25 -0.75 -0.25 -0.25 0.25]
Works equally simple in 2D::
>>> with NumpyRNGContext(1):
... x = np.random.normal(0, 1, 1000)
... y = np.random.normal(2, 2, 1000)
>>> hist2d, xbins, ybins = np.histogram2d(
... x, y, bins=(16, 16), range=((-4, 4), (-6, 10))
... )
>>> xmids = (xbins[1:] + xbins[:-1]) / 2
>>> ymids = (ybins[1:] + ybins[:-1]) / 2
>>> my_sampler = mc.HistogramSampler(hist2d)
>>> with NumpyRNGContext(1):
... indices = my_sampler.sample(10)
>>> print(list(zip(*indices)))
[(7, 8), (9, 6), (1, 7), (7, 5), (6, 4),
(5, 7), (6, 7), (7, 6), (7, 8), (8, 6)]
>>> print(list(zip(xmids[indices[0]], ymids[indices[1]])))
[(-0.25, 2.5), (0.75, 0.5), (-3.25, 1.5), (-0.25, -0.5),
(-0.75, -1.5), (-1.25, 1.5), (-0.75, 1.5), (-0.25, 0.5),
(-0.25, 2.5), (0.25, 0.5)]
It is also easily possible to apply weights. Just assume
that one bin was observed exceptionally frequent::
>>> weights = np.ones_like(x)
>>> weights[500] = 1000
>>> hist2d, xbins, ybins = np.histogram2d(
... x, y, bins=(16, 16), range=((-4, 4), (-6, 10)),
... weights=weights
... )
>>> my_sampler = mc.HistogramSampler(hist2d)
>>> with NumpyRNGContext(1):
... indices = my_sampler.sample(10)
>>> print(list(zip(xmids[indices[0]], ymids[indices[1]])))
[(-1.75, 4.5), (-0.25, 3.5), (-3.25, 1.5), (-1.75, 4.5),
(-1.75, 4.5), (-1.75, 4.5), (-1.75, 4.5), (-1.75, 4.5),
(-1.75, 4.5), (-1.25, 0.5)]
As can be seen, the value `((1.25, -1.5))` is now exceptionally
often sampled from the distribution.
As discussed in the notes, for some use-cases a KDE might
be the better tool:
>>> from scipy.stats import gaussian_kde
>>> kernel = gaussian_kde((x, y))
>>> with NumpyRNGContext(1):
... values = kernel.resample(10)
>>> print('\\n'.join(
... '({:9.6f}, {:9.6f})'.format(*t)
... for t in zip(values[0], values[1])
... ))
(-1.470808, 0.730811)
( 0.088397, 3.407584)
...
(-2.097790, -0.251477)
( 0.261941, -0.936229)
Notes
-----
Even if you have continuous data to start with, using the histogram
approach obviously works, as well (as is demonstrated in the examples).
However, one looses the "continuous" property in the process.
The resulting samples will always be just be able to work
out the bin, but no continuous quantity can be reconstructed.
For many use cases this is probably fine, but in others one
might be better of by using Kernel Density Estimation.
There is a function for this in `~scipy.stats`
(`~scipy.stats.gaussian_kde`), which even allows works with
multi-variate data and allows one to sample from the KDE PDF
(see also the examples). Unfortunately, one cannot work with
weighted data.
'''

def __init__(self, histvals):

histvals = np.atleast_1d(histvals)

self._hshape = histvals.shape
self._ndim = histvals.ndim
# cdf is flat, will need to unravel indices later
self._cdf = np.cumsum(
histvals.flatten().astype(np.float64, copy=False)
)
self._cdf /= self._cdf[-1]

def sample(self, n):
'''
Sample from the (discrete) density distribution.
Parameters
----------
n : int
Number of samples to draw.
Returns
-------
Indices : tuple of `~numpy.ndarray`
The indices of the drawn samples. See
`~pycraf.mc.HistogramSampler` for examples of use.
'''

rsamples = np.random.rand(n)
rbins = np.searchsorted(self._cdf, rsamples)

indices = np.unravel_index(rbins, self._hshape)

if self._ndim == 1:
return indices[0]
else:
return indices
Empty file added pycraf/mc/tests/__init__.py
Empty file.
76 changes: 76 additions & 0 deletions pycraf/mc/tests/test_sampler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import pytest
import numpy as np
from numpy.testing import assert_equal, assert_allclose
from astropy.tests.helper import assert_quantity_allclose, remote_data
from astropy.utils.misc import NumpyRNGContext
from astropy import units as apu
from ...utils import check_astro_quantities
from ... import mc


TOL_KWARGS = {'atol': 1.e-4, 'rtol': 1.e-4}


class TestHistogramSamper():

def setup(self):

with NumpyRNGContext(1):
self.x = np.random.normal(0, 1, 1000)
self.y = np.random.normal(2, 2, 1000)

self.xbins = np.linspace(-4, 4, 17)
self.ybins = np.linspace(-6, 10, 17)
self.xmids = (self.xbins[1:] + self.xbins[:-1]) / 2
self.ymids = (self.ybins[1:] + self.ybins[:-1]) / 2

def test_sample1d(self):

hist, _ = np.histogram(self.x, bins=self.xbins)

my_sampler = mc.HistogramSampler(hist)
with NumpyRNGContext(1):
indices = my_sampler.sample(10)
assert_equal(
indices,
np.array([7, 9, 1, 7, 6, 5, 6, 7, 7, 8]),
)

assert_allclose(
self.xmids[indices],
np.array([-0.25, 0.75, -3.25, -0.25, -0.75,
-1.25, -0.75, -0.25, -0.25, 0.25]),
)

def test_sample2d(self):

hist, *_ = np.histogram2d(
self.x, self.y, bins=[self.xbins, self.ybins],
)

my_sampler = mc.HistogramSampler(hist)
with NumpyRNGContext(1):
indices = my_sampler.sample(10)
assert_equal(
indices,
np.array([
[7, 9, 1, 7, 6, 5, 6, 7, 7, 8],
[8, 6, 7, 5, 4, 7, 7, 6, 8, 6]
]),
)

assert_allclose(
self.xmids[indices[0]],
np.array([-0.25, 0.75, -3.25, -0.25, -0.75,
-1.25, -0.75, -0.25, -0.25, 0.25]),
)

assert_allclose(
self.ymids[indices[1]],
np.array([2.5, 0.5, 1.5, -0.5, -1.5,
1.5, 1.5, 0.5, 2.5, 0.5]),
)

0 comments on commit 232d4a3

Please sign in to comment.