Skip to content

Commit

Permalink
Merge pull request #27 from cupy/update-to-v1-22
Browse files Browse the repository at this point in the history
Update to Chainer v1.22.0
  • Loading branch information
unnonouno committed Mar 14, 2017
2 parents 07f2656 + 9e375ad commit a9ed101
Show file tree
Hide file tree
Showing 38 changed files with 1,785 additions and 151 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ _readthedocs_build
chainer.egg-info/
dist/
htmlcov/
.idea/
7 changes: 6 additions & 1 deletion cupy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
'--no-cache-dir -vvvv`.\n\n'
'original error: {}'.format(exc_info[1]))

six.reraise(RuntimeError, RuntimeError(msg), exc_info[2])
six.reraise(ImportError, ImportError(msg), exc_info[2])

__version__ = pkg_resources.get_distribution('cupy').version

Expand Down Expand Up @@ -144,6 +144,7 @@
from cupy.creation.ranges import arange # NOQA
from cupy.creation.ranges import linspace # NOQA
from cupy.creation.ranges import logspace # NOQA
from cupy.creation.ranges import meshgrid # NOQA

from cupy.creation.matrix import diag # NOQA
from cupy.creation.matrix import diagflat # NOQA
Expand Down Expand Up @@ -187,9 +188,11 @@
from cupy.manipulation.tiling import repeat # NOQA
from cupy.manipulation.tiling import tile # NOQA

from cupy.manipulation.rearrange import flip # NOQA
from cupy.manipulation.rearrange import fliplr # NOQA
from cupy.manipulation.rearrange import flipud # NOQA
from cupy.manipulation.rearrange import roll # NOQA
from cupy.manipulation.rearrange import rot90 # NOQA

# -----------------------------------------------------------------------------
# Binary operations
Expand Down Expand Up @@ -326,6 +329,7 @@
from cupy.math.hyperbolic import tanh # NOQA

from cupy.math.rounding import ceil # NOQA
from cupy.math.rounding import fix # NOQA
from cupy.math.rounding import floor # NOQA
from cupy.math.rounding import rint # NOQA
from cupy.math.rounding import trunc # NOQA
Expand Down Expand Up @@ -380,6 +384,7 @@
# -----------------------------------------------------------------------------
pad = padding.pad.pad


# -----------------------------------------------------------------------------
# Sorting, searching, and counting
# -----------------------------------------------------------------------------
Expand Down
76 changes: 50 additions & 26 deletions cupy/core/core.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,13 @@ cdef class ndarray:
mask_exists = False
for i, s in enumerate(slices):
if isinstance(s, (list, numpy.ndarray)):
is_list = isinstance(s, list)
s = array(s)
# handle the case when s is an empty list
if is_list and s.size == 0:
s = s.astype(numpy.int32)
if s.ndim > 1:
s = s[0]
slices[i] = s
if isinstance(s, ndarray):
if issubclass(s.dtype.type, numpy.integer):
Expand Down Expand Up @@ -1261,7 +1267,7 @@ cdef class ndarray:
>>> x = cupy.arange(3)
>>> x[[1, 3]] = 10
>>> x
array([10, 10, 2])
array([10, 10, 2])
.. note::
Expand All @@ -1274,8 +1280,8 @@ cdef class ndarray:
>>> i = cupy.arange(10000) % 2
>>> v = cupy.arange(10000).astype(numpy.float)
>>> a[i] = v
>>> a
array([9150., 9151.])
>>> a # doctest: +SKIP
array([ 9150., 9151.])
On the other hand, NumPy stores the value corresponding to the
last index among the indices referencing duplicate locations.
Expand All @@ -1286,7 +1292,7 @@ cdef class ndarray:
>>> v_cpu = numpy.arange(10000).astype(numpy.float)
>>> a_cpu[i_cpu] = v_cpu
>>> a_cpu
array([9998., 9999.])
array([ 9998., 9999.])
"""
_scatter_op(self, slices, value, 'update')
Expand Down Expand Up @@ -1742,6 +1748,9 @@ cpdef ndarray array(obj, dtype=None, bint copy=True, Py_ssize_t ndmin=0):

ndim = a._shape.size()
if ndmin > ndim:
if a is obj:
# When `copy` is False, `a` is same as `obj`.
a = a.view()
a.shape = (1,) * (ndmin - ndim) + a.shape
return a
else:
Expand Down Expand Up @@ -2396,6 +2405,14 @@ cpdef _prepare_mask_indexing_single(ndarray a, ndarray mask, int axis):
cdef int n_true
cdef tuple lshape, rshape, out_shape

lshape = a.shape[:axis]
rshape = a.shape[axis + mask.ndim:]

if mask.size == 0:
masked_shape = lshape + (0,) + rshape
mask_br = mask._reshape(masked_shape)
return mask_br, mask_br, masked_shape

# Get number of True in the mask to determine the shape of the array
# after masking.
if mask.size <= 2 ** 31 - 1:
Expand All @@ -2404,8 +2421,6 @@ cpdef _prepare_mask_indexing_single(ndarray a, ndarray mask, int axis):
mask_type = numpy.int64
mask_scanned = scan(mask.astype(mask_type).ravel()) # starts with 1
n_true = int(mask_scanned[-1])
lshape = a.shape[:axis]
rshape = a.shape[axis + mask.ndim:]
masked_shape = lshape + (n_true,) + rshape

# When mask covers the entire array, broadcasting is not necessary.
Expand All @@ -2432,6 +2447,8 @@ cpdef ndarray _getitem_mask_single(ndarray a, ndarray mask, int axis):
mask, mask_scanned, masked_shape = _prepare_mask_indexing_single(
a, mask, axis)
out = ndarray(masked_shape, dtype=a.dtype)
if out.size == 0:
return out
return _getitem_mask_kernel(a, mask, mask_scanned, out)


Expand Down Expand Up @@ -2578,24 +2595,12 @@ cpdef _scatter_op_mask_single(ndarray a, ndarray mask, v, int axis, op):
raise ValueError('provided op is not supported')


cpdef _scatter_op_multiple(ndarray a, list slices, v, op):
cdef ndarray a_interm, reduced_idx
cdef int li, ri

if op != 'update':
raise TypeError('scatter_op_multiple does not support op other than'
'update yet')

a_interm, reduced_idx, li, ri =\
_prepare_multiple_array_indexing(a, slices)
_scatter_op_single(a_interm, reduced_idx, v, li=li, ri=ri, op=op)


cpdef _scatter_op(ndarray a, slices, value, op):
cdef Py_ssize_t i, ndim, n_newaxes, n_ellipses, ellipsis, axis
cdef Py_ssize_t n_not_slice_none, mask_i
cdef Py_ssize_t ellipsis_size
cdef ndarray v, x, y
cdef ndarray v, x, y, a_interm, reduced_idx
cdef int li, ri

if not isinstance(slices, tuple):
slices = [slices]
Expand Down Expand Up @@ -2694,7 +2699,11 @@ cpdef _scatter_op(ndarray a, slices, value, op):
_scatter_op_single(a, adv_slices[axis], value,
li=axis, ri=axis, op=op)
return
_scatter_op_multiple(a, adv_slices, value, op)

# scatter_op with multiple integer arrays
a_interm, reduced_idx, li, ri =\
_prepare_multiple_array_indexing(a, adv_slices)
_scatter_op_single(a_interm, reduced_idx, value, li=li, ri=ri, op=op)
return

if op == 'update':
Expand Down Expand Up @@ -2951,9 +2960,9 @@ cpdef ndarray matmul(ndarray a, ndarray b):
.. note::
Differences to numpy or missing features:
Currently the output must be float32 (float64, comlplex64
and complex128 follow later). This means, that
numpy.result_type(a.dtype, b.dtype) have to be numpy.float32.
Currently the output must be real (float16, float32, uint8, ...),
complex64 and complex128 follow later. This means, that
numpy.result_type(a.dtype, b.dtype) have to be real.
The out array as input is currently not supported.
Expand All @@ -2974,7 +2983,11 @@ cpdef ndarray matmul(ndarray a, ndarray b):
cdef int batchCount
cdef ndarray out, ap, bp, outp

dtype = numpy.result_type(a.dtype, b.dtype)
ret_dtype = numpy.result_type(a.dtype, b.dtype)
dtype = numpy.find_common_type((ret_dtype, 'f'), ())

a = a.astype(dtype, copy=False)
b = b.astype(dtype, copy=False)

if a.ndim == 1:
a = a.reshape(1, len(a))
Expand Down Expand Up @@ -3106,7 +3119,12 @@ cpdef ndarray matmul(ndarray a, ndarray b):
else:
raise TypeError(dtype, a.dtype, b.dtype)

return out
if dtype == ret_dtype:
return out
else:
ret = ndarray(out_shape, ret_dtype)
elementwise_copy(out, ret)
return ret


cdef _cuda_runtime_version = None
Expand Down Expand Up @@ -3158,6 +3176,12 @@ cpdef ndarray tensordot_core(
if out.dtype != dtype:
out = ndarray(ret_shape, dtype)

if m == 1 and n == 1:
(a.ravel() * b.ravel()).sum(out=out.reshape(()))
if out is not ret:
elementwise_copy(out, ret)
return ret

# It copies the operands if needed
if a._shape.size() != 2 or a._shape[0] != k or a._shape[1] != n:
shape.clear()
Expand Down
66 changes: 65 additions & 1 deletion cupy/creation/ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,71 @@ def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None):
return core.power(base, y).astype(dtype)


# TODO(okuta): Implement meshgrid
def meshgrid(*xi, **kwargs):
"""Return coordinate matrices from coordinate vectors.
Given one-dimensional coordinate arrays x1, x2, ..., xn, this function
makes N-D grids.
For one-dimensional arrays x1, x2, ..., xn with lengths ``Ni = len(xi)``,
this function returns ``(N1, N2, N3, ..., Nn)`` shaped arrays
if indexing='ij' or ``(N2, N1, N3, ..., Nn)`` shaped arrays
if indexing='xy'.
Unlike NumPy, CuPy currently only supports 1-D arrays as inputs.
Also, CuPy does not support ``sparse`` option yet.
Args:
xi (tuple of ndarrays): 1-D arrays representing the coordinates
of a grid.
indexing ({'xy', 'ij'}, optional): Cartesian ('xy', default) or
matrix ('ij') indexing of output.
copy (bool, optional): If ``False``, a view
into the original arrays are returned. Default is True.
Returns:
list of cupy.ndarray
.. seealso:: :func:`numpy.meshgrid`
"""

indexing = kwargs.pop('indexing', 'xy')
copy = bool(kwargs.pop('copy', True))
if kwargs:
raise TypeError(
'meshgrid() got an unexpected keyword argument \'{}\''.format(
list(kwargs)[0]))
if indexing not in ['xy', 'ij']:
raise ValueError('Valid values for `indexing` are \'xy\' and \'ij\'.')

for x in xi:
if x.ndim != 1:
raise ValueError('input has to be 1d')
if not isinstance(x, cupy.ndarray):
raise ValueError('input has to be cupy.ndarray')
if len(xi) <= 1:
return list(xi)

meshes = []
for i, x in enumerate(xi):
if indexing == 'xy' and i == 0:
left_none = 1
elif indexing == 'xy' and i == 1:
left_none = 0
else:
left_none = i

expand_slices = ((None,) * left_none +
(slice(None),) +
(None,) * (len(xi) - (left_none + 1)))
meshes.append(x[expand_slices])
meshes_br = list(cupy.broadcast_arrays(*meshes))

if copy:
for i in range(len(meshes_br)):
meshes_br[i] = meshes_br[i].copy()
return meshes_br


# mgrid
Expand Down
6 changes: 6 additions & 0 deletions cupy/cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
from cupy.cuda import profiler # NOQA
from cupy.cuda import stream # NOQA

try:
from cupy.cuda import cusolver # NOQA
cusolver_enabled = True
except ImportError:
cusolver_enabled = False


# import class and function
from cupy.cuda.compiler import compile_with_cache # NOQA
Expand Down
7 changes: 7 additions & 0 deletions cupy/cuda/cublas.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ cpdef enum:
CUBLAS_SIDE_LEFT = 0
CUBLAS_SIDE_RIGHT = 1

CUBLAS_FILL_MODE_LOWER = 0
CUBLAS_FILL_MODE_UPPER = 1


###############################################################################
# Context
Expand Down Expand Up @@ -95,6 +98,10 @@ cpdef sgemmBatched(size_t handle, int transa, int transb,
int m, int n, int k, float alpha, size_t Aarray, int lda,
size_t Barray, int ldb, float beta, size_t Carray, int ldc,
int batchCount)
cpdef dgemmBatched(size_t handle, int transa, int transb,
int m, int n, int k, double alpha, size_t Aarray, int lda,
size_t Barray, int ldb, double beta, size_t Carray, int ldc,
int batchCount)

###############################################################################
# BLAS extension
Expand Down
17 changes: 17 additions & 0 deletions cupy/cuda/cublas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ cdef extern from 'cupy_cuda.h':
int n, int k, const float* alpha, const float** Aarray,
int lda, const float** Barray, int ldb, const float* beta,
float** Carray, int ldc, int batchCount) nogil
int cublasDgemmBatched(
Handle handle, Operation transa, Operation transb, int m,
int n, int k, const double* alpha, const double** Aarray,
int lda, const double** Barray, int ldb, const double* beta,
double** Carray, int ldc, int batchCount) nogil

# BLAS extension
int cublasSgeam(
Expand Down Expand Up @@ -346,6 +351,18 @@ cpdef sgemmBatched(
&beta, <float**>Carray, ldc, batchCount)
check_status(status)


cpdef dgemmBatched(
size_t handle, int transa, int transb, int m, int n, int k,
double alpha, size_t Aarray, int lda, size_t Barray, int ldb,
double beta, size_t Carray, int ldc, int batchCount):
with nogil:
status = cublasDgemmBatched(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
&alpha, <const double**>Aarray, lda, <const double**>Barray, ldb,
&beta, <double**>Carray, ldc, batchCount)
check_status(status)

###############################################################################
# BLAS extension
###############################################################################
Expand Down

0 comments on commit a9ed101

Please sign in to comment.