diff --git a/cupy/cuda/stream.pyx b/cupy/cuda/stream.pyx index 882e0164c73..567ef4c723a 100644 --- a/cupy/cuda/stream.pyx +++ b/cupy/cuda/stream.pyx @@ -276,12 +276,16 @@ class Stream(BaseStream): cdef intptr_t current_ptr if is_shutting_down(): return + tls = _ThreadLocal.get() if self.ptr: - tls = _ThreadLocal.get() current_ptr = tls.get_current_stream_ptr() if self.ptr == current_ptr: tls.set_current_stream(self.null) runtime.streamDestroy(self.ptr) + else: + current_stream = tls.get_current_stream() + if current_stream == self: + tls.set_current_stream(self.null) # Note that we can not release memory pool of the stream held in CPU # because the memory would still be used in kernels executed in GPU. diff --git a/cupy/testing/__init__.py b/cupy/testing/__init__.py index 060bf08d008..9f27c59c30c 100644 --- a/cupy/testing/__init__.py +++ b/cupy/testing/__init__.py @@ -20,6 +20,7 @@ from cupy.testing.helper import for_all_dtypes_combination # NOQA from cupy.testing.helper import for_CF_orders # NOQA from cupy.testing.helper import for_complex_dtypes # NOQA +from cupy.testing.helper import for_contiguous_axes # NOQA from cupy.testing.helper import for_dtypes # NOQA from cupy.testing.helper import for_dtypes_combination # NOQA from cupy.testing.helper import for_float_dtypes # NOQA diff --git a/cupy/testing/helper.py b/cupy/testing/helper.py index 06d7067f02e..714609e6625 100644 --- a/cupy/testing/helper.py +++ b/cupy/testing/helper.py @@ -1013,6 +1013,45 @@ def for_CF_orders(name='order'): return for_orders([None, 'C', 'F', 'c', 'f'], name) +def for_contiguous_axes(name='axis'): + '''Decorator for parametrizing tests with possible contiguous axes. + + Args: + name(str): Argument name to which specified axis are passed. + + .. note:: + 1. Adapted from tests/cupy_tests/fft_tests/test_fft.py. + 2. Example: for ``shape = (1, 2, 3)``, the tested axes are + ``[(2,), (1, 2), (0, 1, 2)]`` for the C order, and + ``[(0,), (0, 1), (0, 1, 2)]`` for the F order. + ''' + def decorator(impl): + @functools.wraps(impl) + def test_func(self, *args, **kw): + ndim = len(self.shape) + order = self.order + for i in range(ndim): + a = () + if order in ('c', 'C'): + for j in range(ndim-1, i-1, -1): + a = (j,) + a + elif order in ('f', 'F'): + for j in range(0, i+1): + a = a + (j,) + else: + raise ValueError('Please specify the array order.') + try: + print(order, ', testing', a) + kw[name] = a + impl(self, *args, **kw) + except Exception: + print(name, 'is', a, ', ndim is', ndim, ', shape is', + self.shape, ', order is', order) + raise + return test_func + return decorator + + def with_requires(*requirements): """Run a test case only when given requirements are satisfied. diff --git a/cupyx/__init__.py b/cupyx/__init__.py index 2bb3285b6b4..985b2d8d7f6 100644 --- a/cupyx/__init__.py +++ b/cupyx/__init__.py @@ -8,6 +8,7 @@ from cupyx import linalg # NOQA from cupyx import time # NOQA from cupyx import scipy # NOQA +from cupyx import optimizing # NOQA from cupyx._ufunc_config import errstate # NOQA from cupyx._ufunc_config import geterr # NOQA diff --git a/cupyx/optimizing/_optimize.py b/cupyx/optimizing/_optimize.py index af14cc6ef39..912a968bf29 100644 --- a/cupyx/optimizing/_optimize.py +++ b/cupyx/optimizing/_optimize.py @@ -1,7 +1,13 @@ import contextlib import math -import optuna + +try: + import optuna + _optuna_available = True +except ImportError: + _optuna_available = False + from cupy.core import _optimize_config from cupyx import time @@ -68,6 +74,11 @@ def optimize(*, key=None, **config_dict): Optuna (https://optuna.org) installation is required. Currently it works for reduction operations only. """ + if not _optuna_available: + raise RuntimeError( + 'Optuna is required to run optimization. ' + 'See https://optuna.org/ for the installation instructions.') + old_context = _optimize_config.get_current_context() context = _optimize_config.get_new_context(key, _optimize, config_dict) _optimize_config.set_current_context(context) diff --git a/tests/cupy_tests/core_tests/test_ndarray_reduction.py b/tests/cupy_tests/core_tests/test_ndarray_reduction.py index 3b941f4b149..fb750ffcdc2 100644 --- a/tests/cupy_tests/core_tests/test_ndarray_reduction.py +++ b/tests/cupy_tests/core_tests/test_ndarray_reduction.py @@ -1,5 +1,6 @@ import unittest +import cupy from cupy import testing @@ -207,3 +208,36 @@ def test_ptp_nan_real(self, xp, dtype): def test_ptp_nan_imag(self, xp, dtype): a = xp.array([float('nan')*1.j, 1.j, -1.j], dtype) return a.ptp() + + +# This class compares CUB results against NumPy's +@testing.parameterize(*testing.product({ + 'shape': [(10,), (10, 20), (10, 20, 30), (10, 20, 30, 40)], + 'order': ('C', 'F'), +})) +@testing.gpu +@unittest.skipIf(cupy.cuda.cub_enabled is False, 'The CUB module is not built') +class TestCUBreduction(unittest.TestCase): + @testing.for_contiguous_axes() + @testing.for_dtypes('bhilBHILfdFD') + @testing.numpy_cupy_allclose(rtol=1E-5) + def test_cub_min(self, xp, dtype, axis): + assert cupy.cuda.cub_enabled + a = testing.shaped_random(self.shape, xp, dtype) + if self.order in ('c', 'C'): + a = xp.ascontiguousarray(a) + elif self.order in ('f', 'F'): + a = xp.asfortranarray(a) + return a.min(axis=axis) + + @testing.for_contiguous_axes() + @testing.for_dtypes('bhilBHILfdFD') + @testing.numpy_cupy_allclose(rtol=1E-5) + def test_cub_max(self, xp, dtype, axis): + assert cupy.cuda.cub_enabled + a = testing.shaped_random(self.shape, xp, dtype) + if self.order in ('c', 'C'): + a = xp.ascontiguousarray(a) + elif self.order in ('f', 'F'): + a = xp.asfortranarray(a) + return a.max(axis=axis) diff --git a/tests/cupy_tests/cuda_tests/test_stream.py b/tests/cupy_tests/cuda_tests/test_stream.py index 4885ea1d7d6..521e04dc054 100644 --- a/tests/cupy_tests/cuda_tests/test_stream.py +++ b/tests/cupy_tests/cuda_tests/test_stream.py @@ -19,9 +19,8 @@ def test_eq(self): self.assertEqual(null1, null2) self.assertNotEqual(null2, null3) - @attr.gpu - def test_del(self): - stream = cuda.Stream().use() + def check_del(self, null): + stream = cuda.Stream(null=null).use() stream_ptr = stream.ptr x = from_data.array([1, 2, 3]) del stream @@ -31,6 +30,14 @@ def test_del(self): del stream_ptr del x + @attr.gpu + def test_del(self): + self.check_del(null=False) + + @attr.gpu + def test_del_null(self): + self.check_del(null=True) + @attr.gpu def test_get_and_add_callback(self): N = 100 diff --git a/tests/cupy_tests/math_tests/test_sumprod.py b/tests/cupy_tests/math_tests/test_sumprod.py index ca38546f39c..c586f7bab08 100644 --- a/tests/cupy_tests/math_tests/test_sumprod.py +++ b/tests/cupy_tests/math_tests/test_sumprod.py @@ -192,6 +192,77 @@ def test_prod_dtype(self, xp, src_dtype, dst_dtype): return a.prod(dtype=dst_dtype) +# This class compares CUB results against NumPy's +@testing.parameterize(*testing.product({ + 'shape': [(10,), (10, 20), (10, 20, 30), (10, 20, 30, 40)], + 'order': ('C', 'F'), +})) +@testing.gpu +@unittest.skipIf(cupy.cuda.cub_enabled is False, 'The CUB module is not built') +class TestCUBreduction(unittest.TestCase): + @testing.for_contiguous_axes() + # sum supports less dtypes; don't test float16 as it's not as accurate? + @testing.for_dtypes('lLfdFD') + @testing.numpy_cupy_allclose(rtol=1E-5) + def test_cub_sum(self, xp, dtype, axis): + assert cupy.cuda.cub_enabled + a = testing.shaped_random(self.shape, xp, dtype) + if self.order in ('c', 'C'): + a = xp.ascontiguousarray(a) + elif self.order in ('f', 'F'): + a = xp.asfortranarray(a) + return a.sum(axis=axis) + + @testing.for_contiguous_axes() + # prod supports less dtypes; don't test float16 as it's not as accurate? + @testing.for_dtypes('lLfdFD') + @testing.numpy_cupy_allclose(rtol=1E-5) + def test_cub_prod(self, xp, dtype, axis): + assert cupy.cuda.cub_enabled + a = testing.shaped_random(self.shape, xp, dtype) + if self.order in ('c', 'C'): + a = xp.ascontiguousarray(a) + elif self.order in ('f', 'F'): + a = xp.asfortranarray(a) + return a.prod(axis=axis) + + # TODO(leofang): test axis after support is added + # don't test float16 as it's not as accurate? + @testing.for_dtypes('bhilBHILfdFD') + @testing.numpy_cupy_allclose(rtol=1E-4) + def test_cub_cumsum(self, xp, dtype): + assert cupy.cuda.cub_enabled + a = testing.shaped_random(self.shape, xp, dtype) + if self.order in ('c', 'C'): + a = xp.ascontiguousarray(a) + elif self.order in ('f', 'F'): + a = xp.asfortranarray(a) + return a.cumsum() + + # TODO(leofang): test axis after support is added + # don't test float16 as it's not as accurate? + @testing.for_dtypes('bhilBHILfdFD') + @testing.numpy_cupy_allclose(rtol=1E-4) + def test_cub_cumprod(self, xp, dtype): + assert cupy.cuda.cub_enabled + a = testing.shaped_random(self.shape, xp, dtype) + if self.order in ('c', 'C'): + a = xp.ascontiguousarray(a) + elif self.order in ('f', 'F'): + a = xp.asfortranarray(a) + result = a.cumprod() + # for testing cumprod against complex arrays, the gotcha is CuPy may + # produce only Inf at the position where NumPy starts to give NaN. So, + # an error would be raised during assert_allclose where the positions + # of NaNs are examined. Since this is both algorithm and architecture + # dependent, we have no control over this behavior and can only + # circumvent the issue by manually converting Inf to NaN + if dtype in (numpy.complex64, numpy.complex128): + pos = xp.where(xp.isinf(result)) + result[pos] = xp.nan + 1j * xp.nan + return result + + @testing.parameterize( *testing.product({ 'shape': [(2, 3, 4), (20, 30, 40)], diff --git a/tests/cupy_tests/sorting_tests/test_search.py b/tests/cupy_tests/sorting_tests/test_search.py index 8235f0afdc0..87420de87dc 100644 --- a/tests/cupy_tests/sorting_tests/test_search.py +++ b/tests/cupy_tests/sorting_tests/test_search.py @@ -159,6 +159,38 @@ def test_argmin_zero_size_axis1(self, xp, dtype): return a.argmin(axis=1) +# This class compares CUB results against NumPy's +# TODO(leofang): test axis after support is added +@testing.parameterize(*testing.product({ + 'shape': [(10,), (10, 20), (10, 20, 30), (10, 20, 30, 40)], + 'order': ('C', 'F'), +})) +@testing.gpu +@unittest.skipIf(cupy.cuda.cub_enabled is False, 'The CUB module is not built') +class TestCUBreduction(unittest.TestCase): + @testing.for_dtypes('bhilBHILefdFD') + @testing.numpy_cupy_allclose(rtol=1E-5) + def test_cub_argmin(self, xp, dtype): + assert cupy.cuda.cub_enabled + a = testing.shaped_random(self.shape, xp, dtype) + if self.order == 'C': + a = xp.ascontiguousarray(a) + else: + a = xp.asfortranarray(a) + return a.argmin() + + @testing.for_dtypes('bhilBHILefdFD') + @testing.numpy_cupy_allclose(rtol=1E-5) + def test_cub_argmax(self, xp, dtype): + assert cupy.cuda.cub_enabled + a = testing.shaped_random(self.shape, xp, dtype) + if self.order == 'C': + a = xp.ascontiguousarray(a) + else: + a = xp.asfortranarray(a) + return a.argmax() + + @testing.gpu @testing.parameterize(*testing.product({ 'func': ['argmin', 'argmax'],