Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add cupyx.scipy.stats.entropy #4369

Merged
merged 6 commits into from
Dec 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cupyx/scipy/stats/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from cupyx.scipy.stats.distributions import entropy # NOQA
59 changes: 59 additions & 0 deletions cupyx/scipy/stats/distributions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import math

import cupy
from cupyx.scipy import special


def _normalize(x, axis):
"""Normalize, preserving floating point precision of x."""
x_sum = x.sum(axis=axis, keepdims=True)
if x.dtype.kind == 'f':
x /= x_sum
else:
x = x / x_sum
return x


def entropy(pk, qk=None, base=None, axis=0):
"""Calculate the entropy of a distribution for given probability values.

If only probabilities ``pk`` are given, the entropy is calculated as
``S = -sum(pk * log(pk), axis=axis)``.

If ``qk`` is not None, then compute the Kullback-Leibler divergence
``S = sum(pk * log(pk / qk), axis=axis)``.

This routine will normalize ``pk`` and ``qk`` if they don't sum to 1.

Args:
pk (ndarray): Defines the (discrete) distribution. ``pk[i]`` is the
(possibly unnormalized) probability of event ``i``.
qk (ndarray, optional): Sequence against which the relative entropy is
computed. Should be in the same format as ``pk``.
base (float, optional): The logarithmic base to use, defaults to ``e``
(natural logarithm).
axis (int, optional): The axis along which the entropy is calculated.
Default is 0.

Returns:
S (cupy.ndarray): The calculated entropy.

"""
if pk.dtype.kind == 'c' or qk is not None and qk.dtype.kind == 'c':
raise TypeError("complex dtype not supported")

float_type = cupy.float32 if pk.dtype.char in 'ef' else cupy.float64
pk = pk.astype(float_type, copy=False)
pk = _normalize(pk, axis)
if qk is None:
vec = special.entr(pk)
else:
if qk.shape != pk.shape:
raise ValueError("qk and pk must have same shape.")
qk = qk.astype(float_type, copy=False)
qk = _normalize(qk, axis)
vec = special.rel_entr(pk, qk)
s = cupy.sum(vec, axis=axis)
if base is not None:
s /= math.log(base)
return s
1 change: 1 addition & 0 deletions docs/source/reference/scipy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ These functions cover a subset of
sparse
special
signal
stats
2 changes: 1 addition & 1 deletion docs/source/reference/statistics.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Statistical Functions
=====================

.. https://docs.scipy.org/doc/scipy/reference/stats.html
.. https://numpy.org/doc/stable/reference/routines.statistics.html

Order statistics
----------------
Expand Down
16 changes: 16 additions & 0 deletions docs/source/reference/stats.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
.. module:: cupyx.scipy.stats

Statistical functions (``scipy.stats``)
=======================================

.. https://docs.scipy.org/doc/scipy/reference/stats.html


Summary statistics
------------------

.. autosummary::
:toctree: generated/
:nosignatures:

cupyx.scipy.stats.entropy
140 changes: 140 additions & 0 deletions tests/cupyx_tests/scipy_tests/stats_tests/test_distributions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import math
import unittest

import numpy
import pytest

import cupy
from cupy import testing
import cupyx.scipy.stats # NOQA
from cupyx.scipy import stats
from cupyx.scipy.stats import distributions

try:
import scipy.stats # NOQA
except ImportError:
pass


@testing.gpu
class TestEntropyBasic(unittest.TestCase):
def test_entropy_positive(self):
# See ticket SciPy's gh-497
pk = cupy.asarray([0.5, 0.2, 0.3])
qk = cupy.asarray([0.1, 0.25, 0.65])
eself = stats.entropy(pk, pk)
edouble = stats.entropy(pk, qk)
assert 0.0 == eself
assert edouble >= 0.0

def test_entropy_base(self):
pk = cupy.ones(16, float)
s = stats.entropy(pk, base=2.0)
assert abs(s - 4.0) < 1.0e-5

qk = cupy.ones(16, float)
qk[:8] = 2.0
s = stats.entropy(pk, qk)
s2 = stats.entropy(pk, qk, base=2.0)
assert abs(s / s2 - math.log(2.0)) < 1.0e-5

def test_entropy_zero(self):
# Test for SciPy PR-479
s = stats.entropy(cupy.asarray([0, 1, 2]))
expected = 0.63651416829481278
assert abs(float(s) - expected) < 1e-12

def test_entropy_2d(self):
pk = cupy.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
qk = cupy.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
testing.assert_array_almost_equal(
stats.entropy(pk, qk), [0.1933259, 0.18609809]
)

def test_entropy_2d_zero(self):
pk = cupy.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
qk = cupy.asarray([[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]])
testing.assert_array_almost_equal(stats.entropy(pk, qk),
[cupy.inf, 0.18609809])

pk[0][0] = 0.0
testing.assert_array_almost_equal(
stats.entropy(pk, qk), [0.17403988, 0.18609809]
)

def test_entropy_base_2d_nondefault_axis(self):
pk = cupy.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
testing.assert_array_almost_equal(
stats.entropy(pk, axis=1),
cupy.asarray([0.63651417, 0.63651417, 0.66156324]),
)

def test_entropy_2d_nondefault_axis(self):
pk = cupy.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
qk = cupy.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
testing.assert_array_almost_equal(
stats.entropy(pk, qk, axis=1),
cupy.asarray([0.231049, 0.231049, 0.127706]),
)

def test_entropy_raises_value_error(self):
pk = cupy.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
qk = cupy.asarray([[0.1, 0.2], [0.6, 0.3]])
with pytest.raises(ValueError):
stats.entropy(pk, qk)


@testing.parameterize(*(
testing.product({
'shape': [(64, ), (16, 15), (14, 4, 10)],
'base': [None, 10],
'axis': [None, 0, -1],
'use_qk': [False, True],
'normalize': [False, True],
})
))
@testing.gpu
@testing.with_requires('scipy')
class TestEntropy(unittest.TestCase):

def _entropy(self, xp, scp, dtype, shape, use_qk, base, axis, normalize):
pk = testing.shaped_random(shape, xp, dtype=dtype)
is_float16 = pk.dtype.char == 'e'
if use_qk:
qk = testing.shaped_random(shape, xp, dtype=dtype)
else:
qk = None

if normalize and pk.dtype.kind != 'c':
# if we don't normalize pk and qk, entropy will do it internally
norm_axis = 0 if axis is None else axis
pk = distributions._normalize(pk, norm_axis)
if qk is not None:
qk = distributions._normalize(qk, norm_axis)
res = scp.stats.entropy(pk, qk=qk, base=base, axis=axis)

float_type = xp.float32 if pk.dtype.char in 'ef' else xp.float64
if res.ndim > 0:
# verify expected dtype
assert res.dtype == float_type

# Cast back to the floating precision of the input so that the
# correct rtol is used by numpy_cupy_allclose
res = xp.asarray(res, xp.float16 if is_float16 else float_type)
return res

@testing.for_all_dtypes(no_complex=True)
@testing.numpy_cupy_allclose(rtol={cupy.float16: 1e-3,
cupy.float32: 1e-6,
'default': 1e-15},
scipy_name='scp')
def test_entropy(self, xp, scp, dtype):
return self._entropy(xp, scp, dtype, self.shape, self.use_qk,
self.base, self.axis, self.normalize)

@testing.for_complex_dtypes()
def test_entropy_complex(self, dtype):
for xp, scp in zip([numpy, cupy], [scipy, cupyx.scipy]):
with pytest.raises(TypeError):
return self._entropy(xp, scp, dtype, self.shape, self.use_qk,
self.base, self.axis, self.normalize)