Skip to content

Commit

Permalink
Merge pull request #3765 from asi1024/use-cutensor
Browse files Browse the repository at this point in the history
Use cuTENSOR in `cupy.prod`, `cupy.max`, `cupy.min`, `cupy.ptp` and `cupy.mean`
  • Loading branch information
kmaehashi committed Sep 30, 2020
2 parents 624e669 + 28ba922 commit 34cb08d
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 10 deletions.
12 changes: 12 additions & 0 deletions cupy/_statistics/order.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ def amin(a, axis=None, out=None, keepdims=False):
Returns:
cupy.ndarray: The minimum of ``a``, along the axis if specified.
.. note::
When cuTENSOR accelerator is used, the output value might be collapsed
for reduction axes that have one or more NaN elements.
.. seealso:: :func:`numpy.amin`
"""
Expand Down Expand Up @@ -59,6 +63,10 @@ def amax(a, axis=None, out=None, keepdims=False):
Returns:
cupy.ndarray: The maximum of ``a``, along the axis if specified.
.. note::
When cuTENSOR accelerator is used, the output value might be collapsed
for reduction axes that have one or more NaN elements.
.. seealso:: :func:`numpy.amax`
"""
Expand Down Expand Up @@ -156,6 +164,10 @@ def ptp(a, axis=None, out=None, keepdims=False):
Returns:
cupy.ndarray: The minimum of ``a``, along the axis if specified.
.. note::
When cuTENSOR accelerator is used, the output value might be collapsed
for reduction axes that have one or more NaN elements.
.. seealso:: :func:`numpy.amin`
"""
Expand Down
8 changes: 6 additions & 2 deletions cupy/core/_routines_math.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,16 @@ cdef ndarray _ndarray_imag_setter(ndarray self, value):

cdef ndarray _ndarray_prod(ndarray self, axis, dtype, out, keepdims):
for accelerator in _accelerator._routine_accelerators:
result = None
if accelerator == _accelerator.ACCELERATOR_CUB:
# result will be None if the reduction is not compatible with CUB
result = cub.cub_reduction(
self, cub.CUPY_CUB_PROD, axis, dtype, out, keepdims)
if result is not None:
return result
if accelerator == _accelerator.ACCELERATOR_CUTENSOR:
result = cutensor._try_reduction_routine(
self, axis, dtype, out, keepdims, cuda_cutensor.OP_MUL, 1, 0)
if result is not None:
return result
if dtype is None:
return _prod_auto_dtype(self, axis, dtype, out, keepdims)
else:
Expand Down
52 changes: 48 additions & 4 deletions cupy/core/_routines_statistics.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,47 @@ if not cupy.cuda.runtime.is_hip:
else:
cub = None

if cupy.cuda.cutensor.available:
import cupy_backends.cuda.libs.cutensor as cuda_cutensor
from cupy import cutensor
else:
cuda_cutensor = None
cutensor = None


cdef ndarray _ndarray_max(ndarray self, axis, out, dtype, keepdims):
for accelerator in _accelerator._routine_accelerators:
result = None
if accelerator == _accelerator.ACCELERATOR_CUB:
# result will be None if the reduction is not compatible with CUB
result = cub.cub_reduction(
self, cub.CUPY_CUB_MAX, axis, dtype, out, keepdims)
if result is not None:
return result
if accelerator == _accelerator.ACCELERATOR_CUTENSOR:
if self.dtype.kind == 'c' or dtype in ('F', 'D'):
# Complex dtype is not supported
continue
result = cutensor._try_reduction_routine(
self, axis, dtype, out, keepdims, cuda_cutensor.OP_MAX, 1, 0)
if result is not None:
return result
return _amax(self, axis=axis, out=out, dtype=dtype, keepdims=keepdims)


cdef ndarray _ndarray_min(ndarray self, axis, out, dtype, keepdims):
for accelerator in _accelerator._routine_accelerators:
result = None
if accelerator == _accelerator.ACCELERATOR_CUB:
# result will be None if the reduction is not compatible with CUB
result = cub.cub_reduction(
self, cub.CUPY_CUB_MIN, axis, out, dtype, keepdims)
if result is not None:
return result
if accelerator == _accelerator.ACCELERATOR_CUTENSOR:
if self.dtype.kind == 'c' or dtype in ('F', 'D'):
# Complex dtype is not supported
continue
result = cutensor._try_reduction_routine(
self, axis, dtype, out, keepdims, cuda_cutensor.OP_MIN, 1, 0)
if result is not None:
return result
return _amin(self, axis=axis, out=out, dtype=dtype, keepdims=keepdims)


Expand All @@ -51,6 +72,16 @@ cdef ndarray _ndarray_ptp(ndarray self, axis, out, keepdims):
result -= cub.cub_reduction(
self, cub.CUPY_CUB_MIN, axis, None, None, keepdims)
return result
if accelerator == _accelerator.ACCELERATOR_CUTENSOR:
if self.dtype.kind == 'c':
# Complex dtype is not supported
continue
maxv = cutensor._try_reduction_routine(
self, axis, None, out, keepdims, cuda_cutensor.OP_MAX, 1, 0)
if maxv is None:
continue
return cutensor._try_reduction_routine(
self, axis, None, maxv, keepdims, cuda_cutensor.OP_MIN, -1, 1)

result = _amax(self, axis=axis, out=out, keepdims=keepdims)
result -= _amin(self, axis=axis, out=None, keepdims=keepdims)
Expand Down Expand Up @@ -82,6 +113,8 @@ cdef ndarray _ndarray_argmin(ndarray self, axis, out, dtype, keepdims):


cdef ndarray _ndarray_mean(ndarray self, axis, dtype, out, keepdims):
cdef Py_ssize_t n

dtype_sum = dtype_out = dtype
if dtype is None:
if self.dtype.kind in 'iub':
Expand All @@ -103,6 +136,17 @@ cdef ndarray _ndarray_mean(ndarray self, axis, dtype, out, keepdims):
n = self.size // result.size
cupy.true_divide(result, n, out=result, casting='unsafe')
break
if accelerator == _accelerator.ACCELERATOR_CUTENSOR:
reduce_axis, _ = _reduction._get_axis(axis, self._shape.size())
n = 1
for i in reduce_axis:
n *= self._shape[i]
n = max(n, 1)
result = cutensor._try_reduction_routine(
self, axis, dtype_sum, out, keepdims,
cuda_cutensor.OP_ADD, 1.0 / n, 0)
if result is not None:
break
else:
result = _mean(
self, axis=axis, dtype=dtype_sum, out=out, keepdims=keepdims)
Expand Down
2 changes: 2 additions & 0 deletions cupy/cutensor.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,8 @@ def _try_reduction_routine(
if dtype != x.dtype:
return None

if x.ndim == 0:
return None
if x.size == 0:
return None
if not x._c_contiguous:
Expand Down
15 changes: 11 additions & 4 deletions tests/cupy_tests/core_tests/test_ndarray_reduction.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import unittest

import numpy
import pytest

import cupy
from cupy.core import _accelerator
import cupy.core._accelerator as _acc
from cupy import testing


Expand Down Expand Up @@ -61,6 +62,8 @@ def test_max_multiple_axes_keepdims(self, xp, dtype):
@testing.for_float_dtypes()
@testing.numpy_cupy_allclose()
def test_max_nan(self, xp, dtype):
if _acc.ACCELERATOR_CUTENSOR in _acc.get_routine_accelerators():
pytest.skip()
a = xp.array([float('nan'), 1, -1], dtype)
return a.max()

Expand Down Expand Up @@ -127,6 +130,8 @@ def test_min_multiple_axes_keepdims(self, xp, dtype):
@testing.for_float_dtypes()
@testing.numpy_cupy_allclose()
def test_min_nan(self, xp, dtype):
if _acc.ACCELERATOR_CUTENSOR in _acc.get_routine_accelerators():
pytest.skip()
a = xp.array([float('nan'), 1, -1], dtype)
return a.min()

Expand Down Expand Up @@ -197,6 +202,8 @@ def test_ptp_multiple_axes_keepdims(self, xp, dtype):
@testing.for_float_dtypes()
@testing.numpy_cupy_allclose()
def test_ptp_nan(self, xp, dtype):
if _acc.ACCELERATOR_CUTENSOR in _acc.get_routine_accelerators():
pytest.skip()
a = xp.array([float('nan'), 1, -1], dtype)
return a.ptp()

Expand All @@ -223,11 +230,11 @@ def test_ptp_nan_imag(self, xp, dtype):
class TestCubReduction(unittest.TestCase):

def setUp(self):
self.old_accelerators = _accelerator.get_routine_accelerators()
_accelerator.set_routine_accelerators(['cub'])
self.old_accelerators = _acc.get_routine_accelerators()
_acc.set_routine_accelerators(['cub'])

def tearDown(self):
_accelerator.set_routine_accelerators(self.old_accelerators)
_acc.set_routine_accelerators(self.old_accelerators)

@testing.for_contiguous_axes()
@testing.for_all_dtypes(no_bool=True, no_float16=True)
Expand Down
5 changes: 5 additions & 0 deletions tests/cupy_tests/statistics_tests/test_order.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest

import cupy
import cupy.core._accelerator as _acc
from cupy import testing


Expand Down Expand Up @@ -236,11 +237,15 @@ def test_ptp_axis2(self, xp, dtype):
@testing.for_float_dtypes()
@testing.numpy_cupy_allclose()
def test_ptp_nan(self, xp, dtype):
if _acc.ACCELERATOR_CUTENSOR in _acc.get_routine_accelerators():
pytest.skip()
a = xp.array([float('nan'), 1, -1], dtype)
return xp.ptp(a)

@testing.for_float_dtypes()
@testing.numpy_cupy_allclose()
def test_ptp_all_nan(self, xp, dtype):
if _acc.ACCELERATOR_CUTENSOR in _acc.get_routine_accelerators():
pytest.skip()
a = xp.array([float('nan'), float('nan')], dtype)
return xp.ptp(a)

0 comments on commit 34cb08d

Please sign in to comment.