Merge pull request #27 from cupy/update-to-v1-22

Update to Chainer v1.22.0
cupy · Mar 14, 2017 · a9ed101 · a9ed101
2 parents 07f2656 + 9e375ad
commit a9ed101
Show file tree

Hide file tree

Showing 38 changed files with 1,785 additions and 151 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,4 @@ _readthedocs_build
 chainer.egg-info/
 dist/
 htmlcov/
+.idea/
diff --git a/cupy/__init__.py b/cupy/__init__.py
@@ -17,7 +17,7 @@
            '--no-cache-dir -vvvv`.\n\n'
            'original error: {}'.format(exc_info[1]))
 
-    six.reraise(RuntimeError, RuntimeError(msg), exc_info[2])
+    six.reraise(ImportError, ImportError(msg), exc_info[2])
 
 __version__ = pkg_resources.get_distribution('cupy').version
 
@@ -144,6 +144,7 @@
 from cupy.creation.ranges import arange  # NOQA
 from cupy.creation.ranges import linspace  # NOQA
 from cupy.creation.ranges import logspace  # NOQA
+from cupy.creation.ranges import meshgrid  # NOQA
 
 from cupy.creation.matrix import diag  # NOQA
 from cupy.creation.matrix import diagflat  # NOQA
@@ -187,9 +188,11 @@
 from cupy.manipulation.tiling import repeat  # NOQA
 from cupy.manipulation.tiling import tile  # NOQA
 
+from cupy.manipulation.rearrange import flip  # NOQA
 from cupy.manipulation.rearrange import fliplr  # NOQA
 from cupy.manipulation.rearrange import flipud  # NOQA
 from cupy.manipulation.rearrange import roll  # NOQA
+from cupy.manipulation.rearrange import rot90  # NOQA
 
 # -----------------------------------------------------------------------------
 # Binary operations
@@ -326,6 +329,7 @@
 from cupy.math.hyperbolic import tanh  # NOQA
 
 from cupy.math.rounding import ceil  # NOQA
+from cupy.math.rounding import fix  # NOQA
 from cupy.math.rounding import floor  # NOQA
 from cupy.math.rounding import rint  # NOQA
 from cupy.math.rounding import trunc  # NOQA
@@ -380,6 +384,7 @@
 # -----------------------------------------------------------------------------
 pad = padding.pad.pad
 
+
 # -----------------------------------------------------------------------------
 # Sorting, searching, and counting
 # -----------------------------------------------------------------------------

diff --git a/cupy/core/core.pyx b/cupy/core/core.pyx
@@ -1131,7 +1131,13 @@ cdef class ndarray:
         mask_exists = False
         for i, s in enumerate(slices):
             if isinstance(s, (list, numpy.ndarray)):
+                is_list = isinstance(s, list)
                 s = array(s)
+                # handle the case when s is an empty list
+                if is_list and s.size == 0:
+                    s = s.astype(numpy.int32)
+                    if s.ndim > 1:
+                        s = s[0]
                 slices[i] = s
             if isinstance(s, ndarray):
                 if issubclass(s.dtype.type, numpy.integer):
@@ -1261,7 +1267,7 @@ cdef class ndarray:
             >>> x = cupy.arange(3)
             >>> x[[1, 3]] = 10
             >>> x
-            array([10, 10, 2])
+            array([10, 10,  2])
 
         .. note::
 
@@ -1274,8 +1280,8 @@ cdef class ndarray:
             >>> i = cupy.arange(10000) % 2
             >>> v = cupy.arange(10000).astype(numpy.float)
             >>> a[i] = v
-            >>> a
-            array([9150., 9151.])
+            >>> a  # doctest: +SKIP
+            array([ 9150.,  9151.])
 
             On the other hand, NumPy stores the value corresponding to the
             last index among the indices referencing duplicate locations.
@@ -1286,7 +1292,7 @@ cdef class ndarray:
             >>> v_cpu = numpy.arange(10000).astype(numpy.float)
             >>> a_cpu[i_cpu] = v_cpu
             >>> a_cpu
-            array([9998., 9999.])
+            array([ 9998.,  9999.])
 
         """
         _scatter_op(self, slices, value, 'update')
@@ -1742,6 +1748,9 @@ cpdef ndarray array(obj, dtype=None, bint copy=True, Py_ssize_t ndmin=0):
 
         ndim = a._shape.size()
         if ndmin > ndim:
+            if a is obj:
+                # When `copy` is False, `a` is same as `obj`.
+                a = a.view()
             a.shape = (1,) * (ndmin - ndim) + a.shape
         return a
     else:
@@ -2396,6 +2405,14 @@ cpdef _prepare_mask_indexing_single(ndarray a, ndarray mask, int axis):
     cdef int n_true
     cdef tuple lshape, rshape, out_shape
 
+    lshape = a.shape[:axis]
+    rshape = a.shape[axis + mask.ndim:]
+
+    if mask.size == 0:
+        masked_shape = lshape + (0,) + rshape
+        mask_br = mask._reshape(masked_shape)
+        return mask_br, mask_br, masked_shape
+
     # Get number of True in the mask to determine the shape of the array
     # after masking.
     if mask.size <= 2 ** 31 - 1:
@@ -2404,8 +2421,6 @@ cpdef _prepare_mask_indexing_single(ndarray a, ndarray mask, int axis):
         mask_type = numpy.int64
     mask_scanned = scan(mask.astype(mask_type).ravel())  # starts with 1
     n_true = int(mask_scanned[-1])
-    lshape = a.shape[:axis]
-    rshape = a.shape[axis + mask.ndim:]
     masked_shape = lshape + (n_true,) + rshape
 
     # When mask covers the entire array, broadcasting is not necessary.
@@ -2432,6 +2447,8 @@ cpdef ndarray _getitem_mask_single(ndarray a, ndarray mask, int axis):
     mask, mask_scanned, masked_shape = _prepare_mask_indexing_single(
         a, mask, axis)
     out = ndarray(masked_shape, dtype=a.dtype)
+    if out.size == 0:
+        return out
     return _getitem_mask_kernel(a, mask, mask_scanned, out)
 
 
@@ -2578,24 +2595,12 @@ cpdef _scatter_op_mask_single(ndarray a, ndarray mask, v, int axis, op):
         raise ValueError('provided op is not supported')
 
 
-cpdef _scatter_op_multiple(ndarray a, list slices, v, op):
-    cdef ndarray a_interm, reduced_idx
-    cdef int li, ri
-
-    if op != 'update':
-        raise TypeError('scatter_op_multiple does not support op other than'
-                        'update yet')
-
-    a_interm, reduced_idx, li, ri =\
-        _prepare_multiple_array_indexing(a, slices)
-    _scatter_op_single(a_interm, reduced_idx, v, li=li, ri=ri, op=op)
-
-
 cpdef _scatter_op(ndarray a, slices, value, op):
     cdef Py_ssize_t i, ndim, n_newaxes, n_ellipses, ellipsis, axis
     cdef Py_ssize_t n_not_slice_none, mask_i
     cdef Py_ssize_t ellipsis_size
-    cdef ndarray v, x, y
+    cdef ndarray v, x, y, a_interm, reduced_idx
+    cdef int li, ri
 
     if not isinstance(slices, tuple):
         slices = [slices]
@@ -2694,7 +2699,11 @@ cpdef _scatter_op(ndarray a, slices, value, op):
             _scatter_op_single(a, adv_slices[axis], value,
                                li=axis, ri=axis, op=op)
             return
-        _scatter_op_multiple(a, adv_slices, value, op)
+
+        # scatter_op with multiple integer arrays
+        a_interm, reduced_idx, li, ri =\
+            _prepare_multiple_array_indexing(a, adv_slices)
+        _scatter_op_single(a_interm, reduced_idx, value, li=li, ri=ri, op=op)
         return
 
     if op == 'update':
@@ -2951,9 +2960,9 @@ cpdef ndarray matmul(ndarray a, ndarray b):
     .. note::
         Differences to numpy or missing features:
 
-        Currently the output must be float32 (float64, comlplex64
-        and complex128 follow later). This means, that
-        numpy.result_type(a.dtype, b.dtype) have to be numpy.float32.
+        Currently the output must be real (float16, float32, uint8, ...),
+        complex64 and complex128 follow later. This means, that
+        numpy.result_type(a.dtype, b.dtype) have to be real.
 
         The out array as input is currently not supported.
 
@@ -2974,7 +2983,11 @@ cpdef ndarray matmul(ndarray a, ndarray b):
     cdef int batchCount
     cdef ndarray out, ap, bp, outp
 
-    dtype = numpy.result_type(a.dtype, b.dtype)
+    ret_dtype = numpy.result_type(a.dtype, b.dtype)
+    dtype = numpy.find_common_type((ret_dtype, 'f'), ())
+
+    a = a.astype(dtype, copy=False)
+    b = b.astype(dtype, copy=False)
 
     if a.ndim == 1:
         a = a.reshape(1, len(a))
@@ -3106,7 +3119,12 @@ cpdef ndarray matmul(ndarray a, ndarray b):
     else:
         raise TypeError(dtype, a.dtype, b.dtype)
 
-    return out
+    if dtype == ret_dtype:
+        return out
+    else:
+        ret = ndarray(out_shape, ret_dtype)
+        elementwise_copy(out, ret)
+        return ret
 
 
 cdef _cuda_runtime_version = None
@@ -3158,6 +3176,12 @@ cpdef ndarray tensordot_core(
         if out.dtype != dtype:
             out = ndarray(ret_shape, dtype)
 
+    if m == 1 and n == 1:
+        (a.ravel() * b.ravel()).sum(out=out.reshape(()))
+        if out is not ret:
+            elementwise_copy(out, ret)
+        return ret
+
     # It copies the operands if needed
     if a._shape.size() != 2 or a._shape[0] != k or a._shape[1] != n:
         shape.clear()

diff --git a/cupy/creation/ranges.py b/cupy/creation/ranges.py
@@ -126,7 +126,71 @@ def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None):
     return core.power(base, y).astype(dtype)
 
 
-# TODO(okuta): Implement meshgrid
+def meshgrid(*xi, **kwargs):
+    """Return coordinate matrices from coordinate vectors.
+
+    Given one-dimensional coordinate arrays x1, x2, ..., xn, this function
+    makes N-D grids.
+
+    For one-dimensional arrays x1, x2, ..., xn with lengths ``Ni = len(xi)``,
+    this function returns ``(N1, N2, N3, ..., Nn)`` shaped arrays
+    if indexing='ij' or ``(N2, N1, N3, ..., Nn)`` shaped arrays
+    if indexing='xy'.
+
+    Unlike NumPy, CuPy currently only supports 1-D arrays as inputs.
+    Also, CuPy does not support ``sparse`` option yet.
+
+    Args:
+        xi (tuple of ndarrays): 1-D arrays representing the coordinates
+            of a grid.
+        indexing ({'xy', 'ij'}, optional): Cartesian ('xy', default) or
+            matrix ('ij') indexing of output.
+        copy (bool, optional): If ``False``, a view
+            into the original arrays are returned. Default is True.
+
+    Returns:
+        list of cupy.ndarray
+
+    .. seealso:: :func:`numpy.meshgrid`
+
+    """
+
+    indexing = kwargs.pop('indexing', 'xy')
+    copy = bool(kwargs.pop('copy', True))
+    if kwargs:
+        raise TypeError(
+            'meshgrid() got an unexpected keyword argument \'{}\''.format(
+                list(kwargs)[0]))
+    if indexing not in ['xy', 'ij']:
+        raise ValueError('Valid values for `indexing` are \'xy\' and \'ij\'.')
+
+    for x in xi:
+        if x.ndim != 1:
+            raise ValueError('input has to be 1d')
+        if not isinstance(x, cupy.ndarray):
+            raise ValueError('input has to be cupy.ndarray')
+    if len(xi) <= 1:
+        return list(xi)
+
+    meshes = []
+    for i, x in enumerate(xi):
+        if indexing == 'xy' and i == 0:
+            left_none = 1
+        elif indexing == 'xy' and i == 1:
+            left_none = 0
+        else:
+            left_none = i
+
+        expand_slices = ((None,) * left_none +
+                         (slice(None),) +
+                         (None,) * (len(xi) - (left_none + 1)))
+        meshes.append(x[expand_slices])
+    meshes_br = list(cupy.broadcast_arrays(*meshes))
+
+    if copy:
+        for i in range(len(meshes_br)):
+            meshes_br[i] = meshes_br[i].copy()
+    return meshes_br
 
 
 # mgrid

diff --git a/cupy/cuda/__init__.py b/cupy/cuda/__init__.py
@@ -8,6 +8,12 @@
 from cupy.cuda import profiler  # NOQA
 from cupy.cuda import stream  # NOQA
 
+try:
+    from cupy.cuda import cusolver  # NOQA
+    cusolver_enabled = True
+except ImportError:
+    cusolver_enabled = False
+
 
 # import class and function
 from cupy.cuda.compiler import compile_with_cache  # NOQA

diff --git a/cupy/cuda/cublas.pxd b/cupy/cuda/cublas.pxd
@@ -28,6 +28,9 @@ cpdef enum:
     CUBLAS_SIDE_LEFT = 0
     CUBLAS_SIDE_RIGHT = 1
 
+    CUBLAS_FILL_MODE_LOWER = 0
+    CUBLAS_FILL_MODE_UPPER = 1
+
 
 ###############################################################################
 # Context
@@ -95,6 +98,10 @@ cpdef sgemmBatched(size_t handle, int transa, int transb,
                    int m, int n, int k, float alpha, size_t Aarray, int lda,
                    size_t Barray, int ldb, float beta, size_t Carray, int ldc,
                    int batchCount)
+cpdef dgemmBatched(size_t handle, int transa, int transb,
+                   int m, int n, int k, double alpha, size_t Aarray, int lda,
+                   size_t Barray, int ldb, double beta, size_t Carray, int ldc,
+                   int batchCount)
 
 ###############################################################################
 # BLAS extension

diff --git a/cupy/cuda/cublas.pyx b/cupy/cuda/cublas.pyx
@@ -73,6 +73,11 @@ cdef extern from 'cupy_cuda.h':
         int n, int k, const float* alpha, const float** Aarray,
         int lda, const float** Barray, int ldb, const float* beta,
         float** Carray, int ldc, int batchCount) nogil
+    int cublasDgemmBatched(
+        Handle handle, Operation transa, Operation transb, int m,
+        int n, int k, const double* alpha, const double** Aarray,
+        int lda, const double** Barray, int ldb, const double* beta,
+        double** Carray, int ldc, int batchCount) nogil
 
     # BLAS extension
     int cublasSgeam(
@@ -346,6 +351,18 @@ cpdef sgemmBatched(
             &beta, <float**>Carray, ldc, batchCount)
     check_status(status)
 
+
+cpdef dgemmBatched(
+        size_t handle, int transa, int transb, int m, int n, int k,
+        double alpha, size_t Aarray, int lda, size_t Barray, int ldb,
+        double beta, size_t Carray, int ldc, int batchCount):
+    with nogil:
+        status = cublasDgemmBatched(
+            <Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
+            &alpha, <const double**>Aarray, lda, <const double**>Barray, ldb,
+            &beta, <double**>Carray, ldc, batchCount)
+    check_status(status)
+
 ###############################################################################
 # BLAS extension
 ###############################################################################