Skip to content

Commit

Permalink
Merge 829f4fd into 3d8d4f4
Browse files Browse the repository at this point in the history
  • Loading branch information
kmaehashi committed Apr 6, 2018
2 parents 3d8d4f4 + 829f4fd commit 4a62e06
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 0 deletions.
20 changes: 20 additions & 0 deletions cupy/cuda/cublas.pyx
Expand Up @@ -417,6 +417,7 @@ cpdef ddot(size_t handle, int n, size_t x, int incx, size_t y, int incy,

cpdef cdotu(size_t handle, int n, size_t x, int incx, size_t y, int incy,
size_t result):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasCdotu(
<Handle>handle, n, <cuComplex*>x, incx, <cuComplex*>y, incy,
Expand All @@ -426,6 +427,7 @@ cpdef cdotu(size_t handle, int n, size_t x, int incx, size_t y, int incy,

cpdef cdotc(size_t handle, int n, size_t x, int incx, size_t y, int incy,
size_t result):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasCdotc(
<Handle>handle, n, <cuComplex*>x, incx, <cuComplex*>y, incy,
Expand All @@ -435,6 +437,7 @@ cpdef cdotc(size_t handle, int n, size_t x, int incx, size_t y, int incy,

cpdef zdotu(size_t handle, int n, size_t x, int incx, size_t y, int incy,
size_t result):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasZdotu(
<Handle>handle, n, <cuDoubleComplex*>x, incx,
Expand Down Expand Up @@ -496,6 +499,7 @@ cpdef cgemv(size_t handle, int trans, int m, int n, float complex alpha,
size_t y, int incy):
cdef cuComplex a = get_cu_complex(alpha)
cdef cuComplex b = get_cu_complex(beta)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasCgemv(
<Handle>handle, <Operation>trans, m, n, &a, <cuComplex*>A, lda,
Expand All @@ -508,6 +512,7 @@ cpdef zgemv(size_t handle, int trans, int m, int n, double complex alpha,
size_t y, int incy):
cdef cuDoubleComplex a = get_cu_double_complex(alpha)
cdef cuDoubleComplex b = get_cu_double_complex(beta)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasZgemv(
<Handle>handle, <Operation>trans, m, n, &a, <cuDoubleComplex*>A,
Expand Down Expand Up @@ -538,6 +543,7 @@ cpdef dger(size_t handle, int m, int n, double alpha, size_t x, int incx,
cpdef cgeru(size_t handle, int m, int n, float complex alpha, size_t x,
int incx, size_t y, int incy, size_t A, int lda):
cdef cuComplex a = get_cu_complex(alpha)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasCgeru(
<Handle>handle, m, n, &a, <cuComplex*>x, incx,
Expand All @@ -548,6 +554,7 @@ cpdef cgeru(size_t handle, int m, int n, float complex alpha, size_t x,
cpdef cgerc(size_t handle, int m, int n, float complex alpha, size_t x,
int incx, size_t y, int incy, size_t A, int lda):
cdef cuComplex a = get_cu_complex(alpha)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasCgerc(
<Handle>handle, m, n, &a, <cuComplex*>x, incx,
Expand All @@ -558,6 +565,7 @@ cpdef cgerc(size_t handle, int m, int n, float complex alpha, size_t x,
cpdef zgeru(size_t handle, int m, int n, double complex alpha, size_t x,
int incx, size_t y, int incy, size_t A, int lda):
cdef cuDoubleComplex a = get_cu_double_complex(alpha)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasZgeru(
<Handle>handle, m, n, &a,
Expand All @@ -569,6 +577,7 @@ cpdef zgeru(size_t handle, int m, int n, double complex alpha, size_t x,
cpdef zgerc(size_t handle, int m, int n, double complex alpha, size_t x,
int incx, size_t y, int incy, size_t A, int lda):
cdef cuDoubleComplex a = get_cu_double_complex(alpha)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasZgerc(
<Handle>handle, m, n, &a,
Expand Down Expand Up @@ -608,6 +617,7 @@ cpdef cgemm(size_t handle, int transa, int transb,
size_t B, int ldb, float complex beta, size_t C, int ldc):
cdef cuComplex a = get_cu_complex(alpha)
cdef cuComplex b = get_cu_complex(beta)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasCgemm(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
Expand All @@ -621,6 +631,7 @@ cpdef zgemm(size_t handle, int transa, int transb,
size_t B, int ldb, double complex beta, size_t C, int ldc):
cdef cuDoubleComplex a = get_cu_double_complex(alpha)
cdef cuDoubleComplex b = get_cu_double_complex(beta)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasZgemm(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
Expand Down Expand Up @@ -662,6 +673,7 @@ cpdef cgemmBatched(
float complex beta, size_t Carray, int ldc, int batchCount):
cdef cuComplex a = get_cu_complex(alpha)
cdef cuComplex b = get_cu_complex(beta)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasCgemmBatched(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
Expand All @@ -676,6 +688,7 @@ cpdef zgemmBatched(
double complex beta, size_t Carray, int ldc, int batchCount):
cdef cuDoubleComplex a = get_cu_double_complex(alpha)
cdef cuDoubleComplex b = get_cu_double_complex(beta)
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasZgemmBatched(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
Expand All @@ -692,6 +705,7 @@ cpdef sgemmStridedBatched(
float beta,
size_t C, int ldc, long long strideC,
int batchCount):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasSgemmStridedBatched(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
Expand All @@ -712,6 +726,7 @@ cpdef dgemmStridedBatched(
double beta,
size_t C, int ldc, long long strideC,
int batchCount):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasDgemmStridedBatched(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
Expand All @@ -732,6 +747,7 @@ cpdef cgemmStridedBatched(
float complex beta,
size_t C, int ldc, long long strideC,
int batchCount):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasCgemmStridedBatched(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
Expand All @@ -752,6 +768,7 @@ cpdef zgemmStridedBatched(
double complex beta,
size_t C, int ldc, long long strideC,
int batchCount):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasZgemmStridedBatched(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
Expand All @@ -768,6 +785,7 @@ cpdef strsm(
size_t handle, int side, int uplo, int trans, int diag,
int m, int n, float alpha, size_t Aarray, int lda,
size_t Barray, int ldb):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasStrsm(
<Handle>handle, <SideMode>side, <FillMode>uplo, <Operation>trans,
Expand All @@ -780,6 +798,7 @@ cpdef dtrsm(
size_t handle, int side, int uplo, int trans, int diag,
int m, int n, double alpha, size_t Aarray, int lda,
size_t Barray, int ldb):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasDtrsm(
<Handle>handle, <SideMode>side, <FillMode>uplo, <Operation>trans,
Expand Down Expand Up @@ -867,6 +886,7 @@ cpdef gemmEx(
size_t alpha, size_t A, int Atype, int lda, size_t B,
int Btype, int ldb, size_t beta, size_t C, int Ctype,
int ldc, int computeType, int algo):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cublasGemmEx(
<Handle>handle, <Operation>transa, <Operation>transb, m, n, k,
Expand Down
2 changes: 2 additions & 0 deletions cupy/cuda/cudnn.pyx
Expand Up @@ -1178,6 +1178,7 @@ cpdef dropoutForward(
size_t srcDesc, size_t srcData,
size_t dstDesc, size_t dstData,
size_t reserveSpace, size_t reserveSpaceSizeInBytes):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cudnnDropoutForward(
<Handle>handle, <DropoutDescriptor>dropoutDesc,
Expand All @@ -1192,6 +1193,7 @@ cpdef dropoutBackward(
size_t dyDesc, size_t dyData,
size_t dxDesc, size_t dxData,
size_t reserveSpace, size_t reserveSpaceSizeInBytes):
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cudnnDropoutBackward(
<Handle>handle, <DropoutDescriptor>dropoutDesc,
Expand Down
2 changes: 2 additions & 0 deletions cupy/cuda/cusolver.pyx
Expand Up @@ -563,6 +563,7 @@ cpdef scsrlsvqr(size_t handle, int m, int nnz, size_t descrA, size_t csrValA,
size_t csrRowPtrA, size_t csrColIndA, size_t b, float tol,
int reorder, size_t x, size_t singularity):
cdef int status
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cusolverSpScsrlsvqr(
<SpHandle>handle, m, nnz, <const MatDescr> descrA,
Expand All @@ -575,6 +576,7 @@ cpdef dcsrlsvqr(size_t handle, int m, int nnz, size_t descrA, size_t csrValA,
size_t csrRowPtrA, size_t csrColIndA, size_t b, double tol,
int reorder, size_t x, size_t singularity):
cdef int status
setStream(handle, stream_module.get_current_stream_ptr())
with nogil:
status = cusolverSpDcsrlsvqr(
<SpHandle>handle, m, nnz, <const MatDescr> descrA,
Expand Down
12 changes: 12 additions & 0 deletions cupy/cuda/cusparse.pyx
Expand Up @@ -548,6 +548,7 @@ cpdef scsc2dense(
size_t handle, int m, int n, size_t descrA,
size_t cscSortedValA, size_t cscSortedRowIndA,
size_t cscSortedColPtrA, size_t A, int lda):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseScsc2dense(
<Handle>handle, m, n, <MatDescr>descrA,
<const float *>cscSortedValA, <const int *>cscSortedRowIndA,
Expand All @@ -559,6 +560,7 @@ cpdef dcsc2dense(
size_t handle, int m, int n, size_t descrA,
size_t cscSortedValA, size_t cscSortedRowIndA,
size_t cscSortedColPtrA, size_t A, int lda):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseDcsc2dense(
<Handle>handle, m, n, <MatDescr>descrA,
<const double *>cscSortedValA, <const int *>cscSortedRowIndA,
Expand Down Expand Up @@ -631,6 +633,7 @@ cpdef snnz_compress(
size_t values, size_t rowPtr, size_t nnzPerRow,
float tol):
cdef int nnz_total
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseSnnz_compress(
<Handle>handle, m, <const MatDescr>descr,
<const float *>values, <const int *>rowPtr, <int *>nnzPerRow,
Expand All @@ -644,6 +647,7 @@ cpdef dnnz_compress(
size_t values, size_t rowPtr, size_t nnzPerRow,
double tol):
cdef int nnz_total
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseDnnz_compress(
<Handle>handle, m, <const MatDescr>descr,
<const double *>values, <const int *>rowPtr, <int *>nnzPerRow,
Expand All @@ -657,6 +661,7 @@ cpdef scsr2csr_compress(
size_t inVal, size_t inColInd, size_t inRowPtr,
int inNnz, size_t nnzPerRow, size_t outVal, size_t outColInd,
size_t outRowPtr, float tol):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseScsr2csr_compress(
<Handle>handle, m, n, <MatDescr>descrA,
<const float *>inVal, <const int *>inColInd, <const int *>inRowPtr,
Expand All @@ -670,6 +675,7 @@ cpdef dcsr2csr_compress(
size_t inVal, size_t inColInd, size_t inRowPtr,
int inNnz, size_t nnzPerRow, size_t outVal, size_t outColInd,
size_t outRowPtr, float tol):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseDcsr2csr_compress(
<Handle>handle, m, n, <MatDescr>descrA,
<const double *>inVal, <const int *>inColInd, <const int *>inRowPtr,
Expand All @@ -682,6 +688,7 @@ cpdef sdense2csc(
size_t handle, int m, int n, size_t descrA, size_t A,
int lda, size_t nnzPerCol, size_t cscValA, size_t cscRowIndA,
size_t cscColPtrA):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseSdense2csc(
<Handle>handle, m, n, <const MatDescr>descrA, <const float *>A,
lda, <const int *>nnzPerCol, <float *>cscValA, <int *>cscRowIndA,
Expand All @@ -693,6 +700,7 @@ cpdef ddense2csc(
size_t handle, int m, int n, size_t descrA, size_t A,
int lda, size_t nnzPerCol, size_t cscValA, size_t cscRowIndA,
size_t cscColPtrA):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseDdense2csc(
<Handle>handle, m, n, <const MatDescr>descrA, <const double *>A,
lda, <const int *>nnzPerCol, <double *>cscValA, <int *>cscRowIndA,
Expand All @@ -704,6 +712,7 @@ cpdef sdense2csr(
size_t handle, int m, int n, size_t descrA,
size_t A, int lda, size_t nnzPerRow, size_t csrValA,
size_t csrRowPtrA, size_t csrColIndA):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseSdense2csr(
<Handle>handle, m, n, <MatDescr>descrA,
<const float *>A, lda, <const int *>nnzPerRow, <float *>csrValA,
Expand All @@ -715,6 +724,7 @@ cpdef ddense2csr(
size_t handle, int m, int n, size_t descrA,
size_t A, int lda, size_t nnzPerRow, size_t csrValA,
size_t csrRowPtrA, size_t csrColIndA):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseDdense2csr(
<Handle>handle, m, n, <MatDescr>descrA,
<const double *>A, lda, <const int *>nnzPerRow, <double *>csrValA,
Expand All @@ -725,6 +735,7 @@ cpdef ddense2csr(
cpdef snnz(
size_t handle, int dirA, int m, int n, size_t descrA,
size_t A, int lda, size_t nnzPerRowColumn, size_t nnzTotalDevHostPtr):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseSnnz(
<Handle>handle, <Direction>dirA, m, n, <const MatDescr>descrA,
<const float *>A, lda, <int *>nnzPerRowColumn,
Expand All @@ -735,6 +746,7 @@ cpdef snnz(
cpdef dnnz(
size_t handle, int dirA, int m, int n, size_t descrA,
size_t A, int lda, size_t nnzPerRowColumn, size_t nnzTotalDevHostPtr):
setStream(handle, stream_module.get_current_stream_ptr())
status = cusparseDnnz(
<Handle>handle, <Direction>dirA, m, n, <const MatDescr>descrA,
<const double *>A, lda, <int *>nnzPerRowColumn,
Expand Down

0 comments on commit 4a62e06

Please sign in to comment.