Skip to content

Commit

Permalink
Merge pull request #70 from bashtage/relax-sign-test
Browse files Browse the repository at this point in the history
BUG: Correct handling of nans in distributions
  • Loading branch information
bashtage committed Apr 10, 2019
2 parents 5d0d8fd + b731988 commit fea5a6c
Show file tree
Hide file tree
Showing 14 changed files with 749 additions and 594 deletions.
702 changes: 351 additions & 351 deletions README.rst

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions doc/source/change-log.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Change Log
----------

Since v1.16.2
=============
- Added broadcasting to multinomial (see
`NumPy issue 9710 <https://github.com/numpy/numpy/pull/9710>`_)

v1.16.2
=======
- Updated Xoroshiro120 to use AUthor's latest parameterization
Expand Down
1 change: 1 addition & 0 deletions randomgen/common.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ cdef enum ConstraintType:
CONS_NONE
CONS_NON_NEGATIVE
CONS_POSITIVE
CONS_POSITIVE_NOT_NAN
CONS_BOUNDED_0_1
CONS_BOUNDED_0_1_NOTNAN
CONS_BOUNDED_GT_0_1
Expand Down
42 changes: 25 additions & 17 deletions randomgen/common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -297,52 +297,60 @@ cdef uint64_t MAXSIZE = <uint64_t>sys.maxsize

cdef int check_array_constraint(np.ndarray val, object name, constraint_type cons) except -1:
if cons == CONS_NON_NEGATIVE:
if np.any(np.signbit(val)) or np.any(np.isnan(val)):
if np.any(np.logical_and(np.logical_not(np.isnan(val)), np.signbit(val))):
raise ValueError(name + " < 0")
elif cons == CONS_POSITIVE:
if not np.all(np.greater(val, 0)):
elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN:
if cons == CONS_POSITIVE_NOT_NAN and np.any(np.isnan(val)):
raise ValueError(name + " must not be NaN")
elif np.any(np.less_equal(val, 0)):
raise ValueError(name + " <= 0")
elif cons == CONS_BOUNDED_0_1:
if not np.all(np.greater_equal(val, 0)) or \
not np.all(np.less_equal(val, 1)):
raise ValueError(name + " < 0 or " + name + " > 1")
raise ValueError("{0} < 0 , {0} > 1 or {0} contains NaNs".format(name))
elif cons == CONS_BOUNDED_GT_0_1:
if not np.all(np.greater(val, 0)) or not np.all(np.less_equal(val, 1)):
raise ValueError(name + " <= 0 or " + name + " > 1")
raise ValueError("{0} <= 0 , {0} > 1 or {0} contains NaNs".format(name))
elif cons == CONS_GT_1:
if not np.all(np.greater(val, 1)):
raise ValueError(name + " <= 1")
raise ValueError("{0} <= 1 or {0} contains NaNs".format(name))
elif cons == CONS_GTE_1:
if not np.all(np.greater_equal(val, 1)):
raise ValueError(name + " < 1")
raise ValueError("{0} < 1 or {0} contains NaNs".format(name))
elif cons == CONS_POISSON:
if not np.all(np.less_equal(val, POISSON_LAM_MAX)):
raise ValueError(name + " value too large")
if not np.all(np.greater_equal(val, 0.0)):
raise ValueError(name + " < 0")
raise ValueError("{0} value too large".format(name))
elif not np.all(np.greater_equal(val, 0.0)):
raise ValueError("{0} < 0 or {0} contains NaNs".format(name))

return 0


cdef int check_constraint(double val, object name, constraint_type cons) except -1:
cdef bint is_nan
if cons == CONS_NON_NEGATIVE:
if np.signbit(val) or np.isnan(val):
if not np.isnan(val) and np.signbit(val):
raise ValueError(name + " < 0")
elif cons == CONS_POSITIVE:
if not (val > 0):
elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN:
if cons == CONS_POSITIVE_NOT_NAN and np.isnan(val):
raise ValueError(name + " must not be NaN")
elif val <= 0:
raise ValueError(name + " <= 0")
elif cons == CONS_BOUNDED_0_1:
if not (val >= 0) or not (val <= 1):
raise ValueError(name + " < 0 or " + name + " > 1")
raise ValueError("{0} < 0 , {0} > 1 or {0} is NaN".format(name))
elif cons == CONS_BOUNDED_GT_0_1:
if not val >0 or not val <= 1:
raise ValueError("{0} <= 0 , {0} > 1 or {0} contains NaNs".format(name))
elif cons == CONS_GT_1:
if not (val > 1):
raise ValueError(name + " <= 1")
raise ValueError("{0} <= 1 or {0} is NaN".format(name))
elif cons == CONS_GTE_1:
if not (val >= 1):
raise ValueError(name + " < 1")
raise ValueError("{0} < 1 or {0} is NaN".format(name))
elif cons == CONS_POISSON:
if not (val >= 0):
raise ValueError(name + " < 0")
raise ValueError("{0} < 0 or {0} is NaN".format(name))
elif not (val <= POISSON_LAM_MAX):
raise ValueError(name + " value too large")

Expand Down
2 changes: 2 additions & 0 deletions randomgen/distributions.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,5 @@ cdef extern from "src/distributions/distributions.h":
np.npy_bool off, np.npy_bool rng, np.npy_intp cnt,
bint use_masked,
np.npy_bool *out) nogil
void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix,
double *pix, np.npy_intp d, binomial_t *binomial) nogil
104 changes: 64 additions & 40 deletions randomgen/generator.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import randomgen.pickle

np.import_array()


cdef class RandomGenerator:
"""
RandomGenerator(brng=None)
Expand Down Expand Up @@ -908,7 +907,7 @@ cdef class RandomGenerator:
Parameters
----------
d0, d1, ..., dn : int, optional
The dimensions of the returned array, should all be positive.
The dimensions of the returned array, must be non-negative.
If no argument is given a single Python float is returned.
dtype : {str, dtype}, optional
Desired dtype of the result, either 'd' (or 'float64') or 'f'
Expand Down Expand Up @@ -958,7 +957,7 @@ cdef class RandomGenerator:
Parameters
----------
d0, d1, ..., dn : int, optional
The dimensions of the returned array, should be all positive.
The dimensions of the returned array, must be non-negative.
If no argument is given a single Python float is returned.
dtype : {str, dtype}, optional
Desired dtype of the result, either 'd' (or 'float64') or 'f'
Expand Down Expand Up @@ -1447,7 +1446,7 @@ cdef class RandomGenerator:
Samples are drawn from an F distribution with specified parameters,
`dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
freedom in denominator), where both parameters should be greater than
freedom in denominator), where both parameters must be greater than
zero.
The random variate of the F distribution (also known as the
Expand All @@ -1458,9 +1457,9 @@ cdef class RandomGenerator:
Parameters
----------
dfnum : float or array_like of floats
Degrees of freedom in numerator, should be > 0.
Degrees of freedom in numerator, must be > 0.
dfden : float or array_like of float
Degrees of freedom in denominator, should be > 0.
Degrees of freedom in denominator, must be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1541,15 +1540,15 @@ cdef class RandomGenerator:
Parameters
----------
dfnum : float or array_like of floats
Numerator degrees of freedom, should be > 0.
Numerator degrees of freedom, must be > 0.
.. versionchanged:: 1.14.0
Earlier NumPy versions required dfnum > 1.
dfden : float or array_like of floats
Denominator degrees of freedom, should be > 0.
Denominator degrees of freedom, must be > 0.
nonc : float or array_like of floats
Non-centrality parameter, the sum of the squares of the numerator
means, should be >= 0.
means, must be >= 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1618,7 +1617,7 @@ cdef class RandomGenerator:
Parameters
----------
df : float or array_like of floats
Number of degrees of freedom, should be > 0.
Number of degrees of freedom, must be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1684,12 +1683,12 @@ cdef class RandomGenerator:
Parameters
----------
df : float or array_like of floats
Degrees of freedom, should be > 0.
Degrees of freedom, must be > 0.
.. versionchanged:: 1.10.0
Earlier NumPy versions required dfnum > 1.
nonc : float or array_like of floats
Non-centrality, should be non-negative.
Non-centrality, must be non-negative.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1830,7 +1829,7 @@ cdef class RandomGenerator:
Parameters
----------
df : float or array_like of floats
Degrees of freedom, should be > 0.
Degrees of freedom, must be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -2020,7 +2019,7 @@ cdef class RandomGenerator:
Parameters
----------
a : float or array_like of floats
Shape of the distribution. Must all be positive.
Shape of the distribution. Must be positive.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -2836,9 +2835,9 @@ cdef class RandomGenerator:
Lower limit.
mode : float or array_like of floats
The value where the peak of the distribution occurs.
The value should fulfill the condition ``left <= mode <= right``.
The value must fulfill the condition ``left <= mode <= right``.
right : float or array_like of floats
Upper limit, should be larger than `left`.
Upper limit, must be larger than `left`.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -3133,7 +3132,7 @@ cdef class RandomGenerator:
"""
return disc(&random_negative_binomial, self._brng, size, self.lock, 2, 0,
n, 'n', CONS_POSITIVE,
n, 'n', CONS_POSITIVE_NOT_NAN,
p, 'p', CONS_BOUNDED_0_1,
0.0, '', CONS_NONE)

Expand All @@ -3149,7 +3148,7 @@ cdef class RandomGenerator:
Parameters
----------
lam : float or array_like of floats
Expectation of interval, should be >= 0. A sequence of expectation
Expectation of interval, must be >= 0. A sequence of expectation
intervals must be broadcastable over the requested size.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
Expand Down Expand Up @@ -3488,7 +3487,7 @@ cdef class RandomGenerator:
Notes
-----
The probability density for the Log Series distribution is
The probability mass function for the Log Series distribution is
.. math:: P(k) = \\frac{-p^k}{k \\ln(1-p)},
Expand Down Expand Up @@ -3702,7 +3701,7 @@ cdef class RandomGenerator:
x.shape = tuple(final_shape)
return x

def multinomial(self, np.npy_intp n, object pvals, size=None):
def multinomial(self, object n, object pvals, size=None):
"""
multinomial(n, pvals, size=None)
Expand All @@ -3718,7 +3717,7 @@ cdef class RandomGenerator:
Parameters
----------
n : int
n : int or array-like of ints
Number of experiments.
pvals : sequence of floats, length p
Probabilities of each of the ``p`` different outcomes. These
Expand Down Expand Up @@ -3757,6 +3756,18 @@ cdef class RandomGenerator:
For the first run, we threw 3 times 1, 4 times 2, etc. For the second,
we threw 2 times 1, 4 times 2, etc.
Now, do one experiment throwing the dice 10 time, and 10 times again,
and another throwing the dice 20 times, and 20 times again:
>>> randomgen.generator.multinomial([[10], [20]], [1/6.]*6, size=2)
array([[[2, 4, 0, 1, 2, 1],
[1, 3, 0, 3, 1, 2]],
[[1, 4, 4, 4, 4, 3],
[3, 3, 2, 5, 5, 2]]]) # random
The first array shows the outcomes of throwing the dice 10 times, and
the second shows the outcomes from throwing the dice 20 times.
A loaded die is more likely to land on number 6:
>>> randomgen.generator.multinomial(100, [1/7.]*5 + [2/7.])
Expand All @@ -3777,19 +3788,43 @@ cdef class RandomGenerator:
array([100, 0])
"""
cdef np.npy_intp d, i, j, dn, sz
cdef np.ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr"

cdef np.npy_intp d, i, sz, offset
cdef np.ndarray parr, mnarr, on, temp_arr
cdef double *pix
cdef int64_t *mnix
cdef double Sum
cdef int64_t ni
cdef np.broadcast it

d = len(pvals)
on = <np.ndarray>np.PyArray_FROM_OTF(n, np.NPY_INT64, np.NPY_ALIGNED)
parr = <np.ndarray>np.PyArray_FROM_OTF(pvals, np.NPY_DOUBLE, np.NPY_ALIGNED)
pix = <double*>np.PyArray_DATA(parr)

if kahan_sum(pix, d-1) > (1.0 + 1e-12):
raise ValueError("sum(pvals[:-1]) > 1.0")

if np.PyArray_NDIM(on) != 0: # vector
if size is None:
it = np.PyArray_MultiIterNew1(on)
else:
temp = np.empty(size, dtype=np.int8)
temp_arr = <np.ndarray>temp
it = np.PyArray_MultiIterNew2(on, temp_arr)
shape = it.shape + (d,)
multin = np.zeros(shape, dtype=np.int64)
mnarr = <np.ndarray>multin
mnix = <int64_t*>np.PyArray_DATA(mnarr)
offset = 0
sz = it.size
with self.lock, nogil:
for i in range(sz):
ni = (<int64_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial)
offset += d
np.PyArray_MultiIter_NEXT(it)
return multin

if size is None:
shape = (d,)
else:
Expand All @@ -3802,23 +3837,12 @@ cdef class RandomGenerator:
mnarr = <np.ndarray>multin
mnix = <int64_t*>np.PyArray_DATA(mnarr)
sz = np.PyArray_SIZE(mnarr)

ni = n
offset = 0
with self.lock, nogil:
i = 0
while i < sz:
Sum = 1.0
dn = n
for j in range(d-1):
mnix[i+j] = random_binomial(self._brng, pix[j]/Sum, dn,
self._binomial)
dn = dn - mnix[i+j]
if dn <= 0:
break
Sum = Sum - pix[j]
if dn > 0:
mnix[i+d-1] = dn

i = i + d
for i in range(sz // d):
random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial)
offset += d

return multin

Expand Down

0 comments on commit fea5a6c

Please sign in to comment.