diff --git a/cupy/cuda/cudnn.pxd b/cupy/cuda/cudnn.pxd index b5632cb7c61..9d7385bdd49 100644 --- a/cupy/cuda/cudnn.pxd +++ b/cupy/cuda/cudnn.pxd @@ -176,9 +176,6 @@ cpdef setTensor4dDescriptorEx(size_t tensorDesc, int dataType, cpdef setTensorNdDescriptor(size_t tensorDesc, int dataType, int nbDims, size_t dimA, size_t strideA) cpdef destroyTensorDescriptor(size_t tensorDesc) -cpdef addTensor_v2( - size_t handle, int mode, size_t alpha, size_t biasDesc, - size_t biasData, size_t beta, size_t srcDestDesc, size_t srcDestData) cpdef addTensor_v3(size_t handle, size_t alpha, size_t bDesc, size_t b, size_t beta, size_t yDesc, size_t y) @@ -188,10 +185,6 @@ cpdef addTensor_v3(size_t handle, size_t alpha, size_t bDesc, ############################################################################### cpdef size_t createFilterDescriptor() except * -cpdef setFilter4dDescriptor_v3( - size_t filterDesc, int dataType, int k, int c, int h, int w) -cpdef setFilterNdDescriptor_v3( - size_t filterDesc, int dataType, int nbDims, size_t filterDimA) cpdef setFilter4dDescriptor_v4( size_t filterDesc, int dataType, int format, int k, int c, int h, int w) cpdef setFilterNdDescriptor_v4( @@ -210,9 +203,6 @@ cpdef setConvolution2dDescriptor_v4( cpdef setConvolution2dDescriptor_v5( size_t convDesc, int pad_h, int pad_w, int u, int v, int dilation_h, int dilation_w, int mode, size_t computeType) -cpdef setConvolutionNdDescriptor_v2( - size_t convDesc, int arrayLength, size_t padA, size_t filterStrideA, - size_t dilationA, int mode) cpdef setConvolutionNdDescriptor_v3( size_t convDesc, int arrayLength, size_t padA, size_t filterStrideA, size_t dilationA, int mode, int dataType) @@ -253,10 +243,6 @@ cpdef int getConvolutionBackwardFilterAlgorithm( cpdef size_t getConvolutionBackwardFilterWorkspaceSize( size_t handle, size_t srcDesc, size_t diffDesc, size_t convDesc, size_t filterDesc, int algo) except * -cpdef convolutionBackwardFilter_v2( - size_t handle, size_t alpha, size_t srcDesc, size_t srcData, - size_t diffDesc, size_t diffData, size_t convDesc, size_t beta, - size_t gradDesc, size_t gradData) cpdef convolutionBackwardFilter_v3( size_t handle, size_t alpha, size_t srcDesc, size_t srcData, size_t diffDesc, size_t diffData, size_t convDesc, int algo, @@ -276,10 +262,6 @@ cpdef int getConvolutionBackwardDataAlgorithm( cpdef size_t getConvolutionBackwardDataWorkspaceSize( size_t handle, size_t filterDesc, size_t diffDesc, size_t convDesc, size_t gradDesc, int algo) except * -cpdef convolutionBackwardData_v2( - size_t handle, size_t alpha, size_t filterDesc, size_t filterData, - size_t diffDesc, size_t diffData, size_t convDesc, size_t beta, - size_t gradDesc, size_t gradData) cpdef convolutionBackwardData_v3( size_t handle, size_t alpha, size_t filterDesc, size_t filterData, size_t diffDesc, size_t diffData, size_t convDesc, int algo, @@ -292,13 +274,13 @@ cpdef convolutionBackwardData_v3( ############################################################################### cpdef size_t createPoolingDescriptor() except * -cpdef setPooling2dDescriptor_v3( - size_t poolingDesc, int mode, int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, int verticalStride, - int horizontalStride) -cpdef setPoolingNdDescriptor_v3( - size_t poolingDesc, int mode, int nbDims, size_t windowDimA, - size_t paddingA, size_t strideA) +cpdef setPooling2dDescriptor_v4( + size_t poolingDesc, int mode, int maxpoolingNanOpt, int windowHeight, + int windowWidth, int verticalPadding, int horizontalPadding, + int verticalStride, int horizontalStride) +cpdef setPoolingNdDescriptor_v4( + size_t poolingDesc, int mode, int maxpoolingNanOpt, int nbDims, + size_t windowDimA, size_t paddingA, size_t strideA) cpdef destroyPoolingDescriptor(size_t poolingDesc) cpdef poolingForward( size_t handle, size_t poolingDesc, size_t alpha, size_t srcDesc, @@ -359,15 +341,14 @@ cpdef softmaxBackward( size_t handle, int algorithm, int mode, size_t alpha, size_t srcDesc, size_t srcData, size_t srcDiffDesc, size_t srcDiffData, size_t beta, size_t destDiffDesc, size_t destDiffData) -cpdef activationForward_v3( - size_t handle, int mode, size_t alpha, size_t srcDesc, size_t srcData, - size_t beta, size_t dstDesc, size_t dstData) -cpdef activationBackward_v3( - size_t handle, int mode, size_t alpha, size_t srcDesc, size_t srcData, - size_t srcDiffDesc, size_t srcDiffData, size_t destDesc, - size_t destData, size_t beta, size_t destDiffDesc, +cpdef activationForward_v4( + size_t handle, size_t activationDesc, size_t alpha, size_t srcDesc, + size_t srcData, size_t beta, size_t dstDesc, size_t dstData) +cpdef activationBackward_v4( + size_t handle, size_t activationDesc, size_t alpha, size_t srcDesc, + size_t srcData, size_t srcDiffDesc, size_t srcDiffData, + size_t destDesc, size_t destData, size_t beta, size_t destDiffDesc, size_t destDiffData) - cpdef size_t createDropoutDescriptor() except * cpdef destroyDropoutDescriptor(size_t dropoutDesc) cpdef size_t dropoutGetStatesSize(size_t handle) except * diff --git a/cupy/cuda/cudnn.pyx b/cupy/cuda/cudnn.pyx index 89bebd1d66c..261aca31b8e 100644 --- a/cupy/cuda/cudnn.pyx +++ b/cupy/cuda/cudnn.pyx @@ -39,22 +39,12 @@ cdef extern from "cupy_cudnn.h": TensorDescriptor tensorDesc, DataType dataType, int nbDims, int* dimA, int* strideA) nogil int cudnnDestroyTensorDescriptor(TensorDescriptor tensorDesc) nogil - int cudnnAddTensor_v2( - Handle handle, AddMode mode, void* alpha, - TensorDescriptor biasDesc, void* biasData, void* beta, - TensorDescriptor srcDestDesc, void* srcDestData) nogil int cudnnAddTensor_v3( Handle handle, void* alpha, TensorDescriptor bDesc, void* b, void* beta, TensorDescriptor yDesc, void* y) nogil # Filter manipulation int cudnnCreateFilterDescriptor(FilterDescriptor* filterDesc) nogil - int cudnnSetFilter4dDescriptor_v3( - FilterDescriptor filterDesc, DataType dataType, - int n, int c, int h, int w) nogil - int cudnnSetFilterNdDescriptor_v3( - FilterDescriptor filterDesc, DataType dataType, int nbDims, - int* filterDimA) nogil int cudnnSetFilter4dDescriptor_v4( FilterDescriptor filterDesc, DataType dataType, TensorFormat format, int k, int c, int h, int w) nogil @@ -75,9 +65,6 @@ cdef extern from "cupy_cudnn.h": ConvolutionDescriptor convDesc, int pad_h, int pad_w, int u, int v, int dilation_h, int dilation_w, ConvolutionMode mode, DataType computeType) nogil - int cudnnSetConvolutionNdDescriptor_v2( - ConvolutionDescriptor convDesc, int arrayLength, int* padA, - int* filterStrideA, int* dilationA, ConvolutionMode mode) nogil int cudnnSetConvolutionNdDescriptor_v3( ConvolutionDescriptor convDesc, int arrayLength, int* padA, int* filterStrideA, int* dilationA, ConvolutionMode mode, @@ -134,12 +121,6 @@ cdef extern from "cupy_cudnn.h": Handle handle, TensorDescriptor srcDesc, TensorDescriptor diffDesc, ConvolutionDescriptor convDesc, FilterDescriptor filterDesc, ConvolutionBwdFilterAlgo algo, size_t* sizeInBytes) nogil - int cudnnConvolutionBackwardFilter_v2( - Handle handle, void* alpha, - TensorDescriptor srcDesc, void* srcData, - TensorDescriptor diffDesc, void* diffData, - ConvolutionDescriptor convDesc, void* beta, - FilterDescriptor gradDesc, void* gradData) nogil int cudnnConvolutionBackwardFilter_v3( Handle handle, void* alpha, TensorDescriptor srcDesc, void* srcData, @@ -169,12 +150,6 @@ cdef extern from "cupy_cudnn.h": TensorDescriptor diffDesc, ConvolutionDescriptor convDesc, TensorDescriptor gradDesc, ConvolutionBwdDataAlgo algo, size_t* sizeInBytes) nogil - int cudnnConvolutionBackwardData_v2( - Handle handle, void* alpha, - FilterDescriptor filterDesc, void* filterData, - TensorDescriptor diffDesc, void* diffData, - ConvolutionDescriptor convDesc, void* beta, - TensorDescriptor gradDesc, void* gradData) nogil int cudnnConvolutionBackwardData_v3( Handle handle, void* alpha, FilterDescriptor filterDesc, void* filterData, @@ -185,19 +160,11 @@ cdef extern from "cupy_cudnn.h": # Pooling int cudnnCreatePoolingDescriptor(PoolingDescriptor* desc) nogil - int cudnnSetPooling2dDescriptor_v3( - PoolingDescriptor poolingDesc, PoolingMode mode, - int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, - int verticalStride, int horizontalStride) nogil int cudnnSetPooling2dDescriptor_v4( PoolingDescriptor poolingDesc, PoolingMode mode, NanPropagation maxpoolingNanOpt, int windowHeight, int windowWidth, int verticalPadding, int horizontalPadding, int verticalStride, int horizontalStride) nogil - int cudnnSetPoolingNdDescriptor_v3( - PoolingDescriptor poolingDesc, PoolingMode mode, int nbDims, - int* windowDimA, int* paddingA, int* strideA) nogil int cudnnSetPoolingNdDescriptor_v4( PoolingDescriptor poolingDesc, PoolingMode mode, NanPropagation maxpoolingNanOpt, int nbDims, @@ -262,20 +229,10 @@ cdef extern from "cupy_cudnn.h": void* alpha, TensorDescriptor srcDesc, void* srcData, TensorDescriptor srcDiffDesc, void* srcDiffData, void* beta, TensorDescriptor destDiffDesc, void* destDiffData) nogil - int cudnnActivationForward_v3( - Handle handle, ActivationMode mode, void* alpha, - TensorDescriptor srcDesc, void* srcData, void* beta, - TensorDescriptor dstDesc, void* dstData) nogil int cudnnActivationForward_v4( Handle handle, ActivationDescriptor activationDesc, void* alpha, TensorDescriptor srcDesc, void* srcData, void* beta, TensorDescriptor dstDesc, void* dstData) nogil - int cudnnActivationBackward_v3( - Handle handle, ActivationMode mode, void* alpha, - TensorDescriptor srcDesc, void* srcData, - TensorDescriptor srcDiffDesc, void* srcDiffData, - TensorDescriptor destDesc, void* destData, void* beta, - TensorDescriptor destDiffDesc, void* destDiffData) nogil int cudnnActivationBackward_v4( Handle handle, ActivationDescriptor activationDesc, void* alpha, TensorDescriptor srcDesc, void* srcData, @@ -496,17 +453,6 @@ cpdef destroyTensorDescriptor(size_t tensorDesc): check_status(status) -cpdef addTensor_v2( - size_t handle, int mode, size_t alpha, size_t biasDesc, - size_t biasData, size_t beta, size_t srcDestDesc, size_t srcDestData): - with nogil: - status = cudnnAddTensor_v2( - handle, mode, alpha, - biasDesc, biasData, beta, - srcDestDesc, srcDestData) - check_status(status) - - cpdef addTensor_v3(size_t handle, size_t alpha, size_t bDesc, size_t b, size_t beta, size_t yDesc, size_t y): with nogil: @@ -527,21 +473,6 @@ cpdef size_t createFilterDescriptor() except *: return desc -cpdef setFilter4dDescriptor_v3( - size_t filterDesc, int dataType, int k, int c, int h, int w): - status = cudnnSetFilter4dDescriptor_v3( - filterDesc, dataType, k, c, h, w) - check_status(status) - - -cpdef setFilterNdDescriptor_v3( - size_t filterDesc, int dataType, int nbDims, size_t filterDimA): - status = cudnnSetFilterNdDescriptor_v3( - filterDesc, dataType, nbDims, - filterDimA) - check_status(status) - - cpdef setFilter4dDescriptor_v4( size_t filterDesc, int dataType, int format, int k, int c, int h, int w): @@ -608,15 +539,6 @@ cpdef setConvolution2dDescriptor_v5( check_status(status) -cpdef setConvolutionNdDescriptor_v2( - size_t convDesc, int arrayLength, size_t padA, size_t filterStrideA, - size_t dilationA, int mode): - status = cudnnSetConvolutionNdDescriptor_v2( - convDesc, arrayLength, padA, - filterStrideA, dilationA, mode) - check_status(status) - - cpdef setConvolutionNdDescriptor_v3( size_t convDesc, int arrayLength, size_t padA, size_t filterStrideA, size_t dilationA, int mode, int dataType): @@ -632,6 +554,7 @@ cpdef destroyConvolutionDescriptor(size_t convDesc): convDesc) check_status(status) + cpdef findConvolutionForwardAlgorithm( size_t handle, size_t xDesc, size_t wDesc, size_t convDesc, size_t yDesc, int requestedAlgoCount): @@ -646,6 +569,7 @@ cpdef findConvolutionForwardAlgorithm( check_status(status) return returnedAlgoCount[0], perfResults + cpdef findConvolutionForwardAlgorithmEx( size_t handle, size_t xDesc, size_t x, size_t wDesc, size_t w, size_t convDesc, size_t yDesc, size_t y, int requestedAlgoCount, @@ -663,6 +587,7 @@ cpdef findConvolutionForwardAlgorithmEx( check_status(status) return returnedAlgoCount[0], perfResults + cpdef int getConvolutionForwardAlgorithm( size_t handle, size_t srcDesc, size_t filterDesc, size_t convDesc, size_t destDesc, ConvolutionFwdPreference preference, @@ -715,6 +640,7 @@ cpdef convolutionBackwardBias( destDesc, destData) check_status(status) + cpdef findConvolutionBackwardFilterAlgorithm( size_t handle, size_t xDesc, size_t dyDesc, size_t convDesc, size_t dwDesc, int requestedAlgoCount): @@ -729,6 +655,7 @@ cpdef findConvolutionBackwardFilterAlgorithm( check_status(status) return returnedAlgoCount[0], perfResults + cpdef findConvolutionBackwardFilterAlgorithmEx( size_t handle, size_t xDesc, size_t x, size_t dyDesc, size_t dy, size_t convDesc, size_t dwDesc, size_t dw, int requestedAlgoCount, @@ -746,6 +673,7 @@ cpdef findConvolutionBackwardFilterAlgorithmEx( check_status(status) return returnedAlgoCount[0], perfResults + cpdef int getConvolutionBackwardFilterAlgorithm( size_t handle, size_t srcDesc, size_t diffDesc, size_t convDesc, size_t filterDesc, ConvolutionBwdFilterPreference preference, @@ -760,6 +688,7 @@ cpdef int getConvolutionBackwardFilterAlgorithm( check_status(status) return algo + cpdef size_t getConvolutionBackwardFilterWorkspaceSize( size_t handle, size_t srcDesc, size_t diffDesc, size_t convDesc, size_t filterDesc, int algo) except *: @@ -772,18 +701,6 @@ cpdef size_t getConvolutionBackwardFilterWorkspaceSize( check_status(status) return sizeInBytes -cpdef convolutionBackwardFilter_v2( - size_t handle, size_t alpha, size_t srcDesc, size_t srcData, - size_t diffDesc, size_t diffData, size_t convDesc, size_t beta, - size_t gradDesc, size_t gradData): - with nogil: - status = cudnnConvolutionBackwardFilter_v2( - handle, alpha, - srcDesc, srcData, - diffDesc, diffData, - convDesc, beta, - gradDesc, gradData) - check_status(status) cpdef convolutionBackwardFilter_v3( size_t handle, size_t alpha, size_t srcDesc, size_t srcData, @@ -800,6 +717,7 @@ cpdef convolutionBackwardFilter_v3( gradDesc, gradData) check_status(status) + cpdef findConvolutionBackwardDataAlgorithm( size_t handle, size_t wDesc, size_t dyDesc, size_t convDesc, size_t dxDesc, int requestedAlgoCount): @@ -814,6 +732,7 @@ cpdef findConvolutionBackwardDataAlgorithm( check_status(status) return returnedAlgoCount[0], perfResults + cpdef findConvolutionBackwardDataAlgorithmEx( size_t handle, size_t wDesc, size_t w, size_t dyDesc, size_t dy, size_t convDesc, size_t dxDesc, size_t dx, @@ -831,6 +750,7 @@ cpdef findConvolutionBackwardDataAlgorithmEx( check_status(status) return returnedAlgoCount[0], perfResults + cpdef int getConvolutionBackwardDataAlgorithm( size_t handle, size_t filterDesc, size_t diffDesc, size_t convDesc, size_t gradDesc, size_t preference, @@ -844,6 +764,7 @@ cpdef int getConvolutionBackwardDataAlgorithm( check_status(status) return algo + cpdef size_t getConvolutionBackwardDataWorkspaceSize( size_t handle, size_t filterDesc, size_t diffDesc, size_t convDesc, size_t gradDesc, int algo) except *: @@ -856,18 +777,6 @@ cpdef size_t getConvolutionBackwardDataWorkspaceSize( check_status(status) return sizeInBytes -cpdef convolutionBackwardData_v2( - size_t handle, size_t alpha, size_t filterDesc, size_t filterData, - size_t diffDesc, size_t diffData, size_t convDesc, size_t beta, - size_t gradDesc, size_t gradData): - with nogil: - status = cudnnConvolutionBackwardData_v2( - handle, alpha, - filterDesc, filterData, - diffDesc, diffData, - convDesc, beta, - gradDesc, gradData) - check_status(status) cpdef convolutionBackwardData_v3( size_t handle, size_t alpha, size_t filterDesc, size_t filterData, @@ -895,17 +804,6 @@ cpdef size_t createPoolingDescriptor() except *: return desc -cpdef setPooling2dDescriptor_v3( - size_t poolingDesc, int mode, int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, int verticalStride, - int horizontalStride): - status = cudnnSetPooling2dDescriptor_v3( - poolingDesc, mode, - windowHeight, windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride) - check_status(status) - - cpdef setPooling2dDescriptor_v4( size_t poolingDesc, int mode, int maxpoolingNanOpt, int windowHeight, int windowWidth, int verticalPadding, int horizontalPadding, @@ -917,15 +815,6 @@ cpdef setPooling2dDescriptor_v4( check_status(status) -cpdef setPoolingNdDescriptor_v3( - size_t poolingDesc, int mode, int nbDims, size_t windowDimA, - size_t paddingA, size_t strideA): - status = cudnnSetPoolingNdDescriptor_v3( - poolingDesc, mode, nbDims, - windowDimA, paddingA, strideA) - check_status(status) - - cpdef setPoolingNdDescriptor_v4( size_t poolingDesc, int mode, int maxpoolingNanOpt, int nbDims, size_t windowDimA, size_t paddingA, size_t strideA): @@ -977,6 +866,7 @@ cpdef deriveBNTensorDescriptor( mode) check_status(status) + cpdef batchNormalizationForwardTraining( size_t handle, int mode, size_t alpha, size_t beta, size_t xDesc, @@ -996,6 +886,7 @@ cpdef batchNormalizationForwardTraining( epsilon, resultSaveMean, resultSaveInvVariance) check_status(status) + cpdef batchNormalizationForwardInference( size_t handle, int mode, size_t alpha, size_t beta, size_t xDesc, @@ -1013,6 +904,7 @@ cpdef batchNormalizationForwardInference( epsilon) check_status(status) + cpdef batchNormalizationBackward( size_t handle, int mode, size_t alphaDataDiff, size_t betaDataDiff, @@ -1045,6 +937,7 @@ cpdef size_t createActivationDescriptor() except *: check_status(status) return activationDesc + cpdef setActivationDescriptor( size_t activationDesc, int mode, int reluNanOpt, double reluCeiling): status = cudnnSetActivationDescriptor( @@ -1052,11 +945,13 @@ cpdef setActivationDescriptor( reluNanOpt, reluCeiling) check_status(status) + cpdef destroyActivationDescriptor(size_t activationDesc): status = cudnnDestroyActivationDescriptor( activationDesc) check_status(status) + cpdef softmaxForward( size_t handle, int algorithm, int mode, size_t alpha, size_t srcDesc, size_t srcData, size_t beta, size_t dstDesc, size_t dstData): @@ -1081,17 +976,6 @@ cpdef softmaxBackward( check_status(status) -cpdef activationForward_v3( - size_t handle, int mode, size_t alpha, size_t srcDesc, size_t srcData, - size_t beta, size_t dstDesc, size_t dstData): - with nogil: - status = cudnnActivationForward_v3( - handle, mode, alpha, - srcDesc, srcData, beta, - dstDesc, dstData) - check_status(status) - - cpdef activationForward_v4( size_t handle, size_t activationDesc, size_t alpha, size_t srcDesc, size_t srcData, size_t beta, size_t dstDesc, size_t dstData): @@ -1103,21 +987,6 @@ cpdef activationForward_v4( check_status(status) -cpdef activationBackward_v3( - size_t handle, int mode, size_t alpha, size_t srcDesc, size_t srcData, - size_t srcDiffDesc, size_t srcDiffData, size_t destDesc, - size_t destData, size_t beta, size_t destDiffDesc, - size_t destDiffData): - with nogil: - status = cudnnActivationBackward_v3( - handle, mode, alpha, - srcDesc, srcData, - srcDiffDesc, srcDiffData, - destDesc, destData, beta, - destDiffDesc, destDiffData) - check_status(status) - - cpdef activationBackward_v4( size_t handle, size_t activationDesc, size_t alpha, size_t srcDesc, size_t srcData, size_t srcDiffDesc, size_t srcDiffData, @@ -1188,7 +1057,6 @@ cpdef setRNNDescriptor( direction, mode, dataType) check_status(status) - cpdef getRNNWorkspaceSize( size_t handle, size_t rnnDesc, int seqLength, size_t xDesc): cdef size_t sizeInBytes diff --git a/cupy/cuda/cupy_cudnn.h b/cupy/cuda/cupy_cudnn.h index d6682b2db0c..2a8cbb6d90f 100644 --- a/cupy/cuda/cupy_cudnn.h +++ b/cupy/cuda/cupy_cudnn.h @@ -155,22 +155,6 @@ cudnnStatus_t cudnnSoftmaxBackward(...) { } -} // extern "C" - -#endif // #ifndef CUPY_NO_CUDA - - -/////////////////////////////////////////////////////////////////////////////// -// Definitions are for compatibility with cuDNN v2, v3 v4 and v5. -/////////////////////////////////////////////////////////////////////////////// - -extern "C" { - - -#if defined(CUPY_NO_CUDA) || (CUDNN_VERSION < 3000) -// ***_v3 functions are not declared in cuDNN v2. -// Following definitions are for compatibility with cuDNN v3. - typedef enum {} cudnnConvolutionBwdDataAlgo_t; typedef enum {} cudnnConvolutionBwdDataPreference_t; typedef enum {} cudnnConvolutionBwdFilterAlgo_t; @@ -238,47 +222,6 @@ cudnnStatus_t cudnnSetConvolutionNdDescriptor_v3(...) { return CUDNN_STATUS_NOT_SUPPORTED; } -#endif // #if defined(CUPY_NO_CUDA) || (CUDNN_VERSION < 3000) - -#if defined(CUPY_NO_CUDA) || (CUDNN_VERSION < 3000) || (CUDNN_VERSION >= 6000) -// some ***_v3 functions are not declared in cuDNN v2 and v6. - -cudnnStatus_t cudnnSetFilter4dDescriptor_v3(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -cudnnStatus_t cudnnSetFilterNdDescriptor_v3(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -cudnnStatus_t cudnnSetPooling2dDescriptor_v3(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -cudnnStatus_t cudnnSetPoolingNdDescriptor_v3(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -cudnnStatus_t cudnnActivationForward_v3(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -cudnnStatus_t cudnnActivationBackward_v3(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -#endif // #if defined(CUPY_NO_CUDA) || (CUDNN_VERSION < 3000) || (CUDNN_VERSION >= 6000) - - -#if defined(CUPY_NO_CUDA) || (CUDNN_VERSION < 4000) -// ***_v2 functions are not declared in cuDNN v2 and v3. -// Following definitions are for compatibility with cuDNN v4. - -#define cudnnAddTensor_v2 cudnnAddTensor -#define cudnnConvolutionBackwardData_v2 cudnnConvolutionBackwardData -#define cudnnConvolutionBackwardFilter_v2 cudnnConvolutionBackwardFilter -#define cudnnSetConvolutionNdDescriptor_v2 cudnnSetConvolutionNdDescriptor - typedef enum {} cudnnBatchNormMode_t; typedef enum {} cudnnNanPropagation_t; @@ -343,20 +286,16 @@ cudnnStatus_t cudnnActivationBackward_v4(...) { return CUDNN_STATUS_NOT_SUPPORTED; } -#endif // #if defined(CUPY_NO_CUDA) || (CUDNN_VERSION < 4000) - +} // extern "C" -#if !defined(CUPY_NO_CUDA) && (CUDNN_VERSION < 5000) +#endif // #ifndef CUPY_NO_CUDA -#define cudnnActivationForward_v3 cudnnActivationForward -#define cudnnActivationBackward_v3 cudnnActivationBackward -#define cudnnSetFilter4dDescriptor_v3 cudnnSetFilter4dDescriptor -#define cudnnSetFilterNdDescriptor_v3 cudnnSetFilterNdDescriptor -#define cudnnSetPooling2dDescriptor_v3 cudnnSetPooling2dDescriptor -#define cudnnSetPoolingNdDescriptor_v3 cudnnSetPoolingNdDescriptor -#endif // #if !defined(CUPY_NO_CUDA) && (CUDNN_VERSION < 5000) +/////////////////////////////////////////////////////////////////////////////// +// Definitions are for compatibility with cuDNN v4, v5 and v6. +/////////////////////////////////////////////////////////////////////////////// +extern "C" { #if defined(CUPY_NO_CUDA) || (CUDNN_VERSION < 5000) // ***_v3 functions are not declared in cuDNN v2, v3 and v4. @@ -511,36 +450,6 @@ cudnnStatus_t cudnnSpatialTfSamplerBackward(...) { #endif // #if !defined(CUPY_NO_CUDA) && CUDNN_VERSION >= 5000 -#if defined(CUPY_NO_CUDA) || (CUDNN_VERSION >= 5000) -// ***_v2 functions are deleted in cuDNN v5. -// Following definitions are for compatibility with cuDNN v5 and higher. -// This section code is also used instead of cuDNN v2 stub. - -typedef enum {} cudnnAddMode_t; - -cudnnStatus_t cudnnSetConvolutionNdDescriptor_v2(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -cudnnStatus_t cudnnGetConvolutionNdDescriptor_v2(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -cudnnStatus_t cudnnAddTensor_v2(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -cudnnStatus_t cudnnConvolutionBackwardFilter_v2(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -cudnnStatus_t cudnnConvolutionBackwardData_v2(...) { - return CUDNN_STATUS_NOT_SUPPORTED; -} - -#endif // #if defined(CUPY_NO_CUDA) || (CUDNN_VERSION >= 5000) - - #if !defined(CUPY_NO_CUDA) && (CUDNN_VERSION < 6000) #define cudnnSetConvolution2dDescriptor_v4 cudnnSetConvolution2dDescriptor diff --git a/cupy/cudnn.py b/cupy/cudnn.py index 490b93682dd..731c9cda1c0 100644 --- a/cupy/cudnn.py +++ b/cupy/cudnn.py @@ -125,22 +125,13 @@ def create_filter_descriptor(arr, format=cudnn.CUDNN_TENSOR_NCHW): desc = Descriptor(cudnn.createFilterDescriptor(), cudnn.destroyFilterDescriptor) data_type = get_data_type(arr.dtype) - if _cudnn_version >= 4000: - if arr.ndim == 4: - cudnn.setFilter4dDescriptor_v4(desc.value, data_type, format, - *arr.shape) - else: - c_shape = _to_ctypes_array(arr.shape) - cudnn.setFilterNdDescriptor_v4(desc.value, data_type, format, - arr.ndim, c_shape.data) + if arr.ndim == 4: + cudnn.setFilter4dDescriptor_v4(desc.value, data_type, format, + *arr.shape) else: - if arr.ndim == 4: - cudnn.setFilter4dDescriptor_v3(desc.value, data_type, *arr.shape) - else: - c_shape = _to_ctypes_array(arr.shape) - cudnn.setFilterNdDescriptor_v3(desc.value, data_type, arr.ndim, - c_shape.data) - + c_shape = _to_ctypes_array(arr.shape) + cudnn.setFilterNdDescriptor_v4(desc.value, data_type, format, + arr.ndim, c_shape.data) return desc @@ -173,19 +164,14 @@ def create_convolution_descriptor(pad, stride, dtype, c_pad = _to_ctypes_array(pad) c_stride = _to_ctypes_array(stride) c_dilation = _to_ctypes_array((1,) * ndim) - if _cudnn_version >= 3000: - data_type = get_data_type(dtype) - # TODO(takagi) Temporarily use computing precision of FP32 for - # storing precision of FP16. - if dtype == numpy.float16: - data_type = cudnn.CUDNN_DATA_FLOAT - cudnn.setConvolutionNdDescriptor_v3( - desc.value, ndim, c_pad.data, c_stride.data, c_dilation.data, - mode, data_type) - else: - cudnn.setConvolutionNdDescriptor_v2( - desc.value, ndim, c_pad.data, c_stride.data, c_dilation.data, - mode) + data_type = get_data_type(dtype) + # TODO(takagi) Temporarily use computing precision of FP32 for + # storing precision of FP16. + if dtype == numpy.float16: + data_type = cudnn.CUDNN_DATA_FLOAT + cudnn.setConvolutionNdDescriptor_v3( + desc.value, ndim, c_pad.data, c_stride.data, c_dilation.data, + mode, data_type) return desc @@ -198,26 +184,16 @@ def create_pooling_descriptor(ksize, stride, pad, mode): raise ValueError('ksize, stride, and pad must be of same length') if ndim == 2: - if _cudnn_version >= 4000: - cudnn.setPooling2dDescriptor_v4( - desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, ksize[0], - ksize[1], pad[0], pad[1], stride[0], stride[1]) - else: - cudnn.setPooling2dDescriptor_v3( - desc.value, mode, ksize[0], ksize[1], pad[0], pad[1], - stride[0], stride[1]) + cudnn.setPooling2dDescriptor_v4( + desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, ksize[0], + ksize[1], pad[0], pad[1], stride[0], stride[1]) else: c_ksize = _to_ctypes_array(ksize) c_pad = _to_ctypes_array(pad) c_stride = _to_ctypes_array(stride) - if _cudnn_version >= 4000: - cudnn.setPoolingNdDescriptor_v4( - desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, ndim, - c_ksize.data, c_pad.data, c_stride.data) - else: - cudnn.setPoolingNdDescriptor_v3( - desc.value, mode, ndim, c_ksize.data, c_pad.data, - c_stride.data) + cudnn.setPoolingNdDescriptor_v4( + desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, ndim, + c_ksize.data, c_pad.data, c_stride.data) return desc @@ -239,18 +215,13 @@ def activation_forward(x, mode): handle = get_handle() x_mat = _as4darray(x) desc = create_tensor_descriptor(x_mat) - if _cudnn_version >= 4000: - act_desc = Descriptor(cudnn.createActivationDescriptor(), - cudnn.destroyActivationDescriptor) - cudnn.setActivationDescriptor( - act_desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0) - cudnn.activationForward_v4( - handle, act_desc.value, one.data, desc.value, x_mat.data.ptr, - zero.data, desc.value, y.data.ptr) - else: - cudnn.activationForward_v3( - handle, mode, one.data, desc.value, x_mat.data.ptr, - zero.data, desc.value, y.data.ptr) + act_desc = Descriptor(cudnn.createActivationDescriptor(), + cudnn.destroyActivationDescriptor) + cudnn.setActivationDescriptor( + act_desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0) + cudnn.activationForward_v4( + handle, act_desc.value, one.data, desc.value, x_mat.data.ptr, + zero.data, desc.value, y.data.ptr) return y @@ -265,20 +236,14 @@ def activation_backward(x, y, gy, mode): handle = get_handle() y_mat = _as4darray(y) desc = create_tensor_descriptor(y_mat) - if _cudnn_version >= 4000: - act_desc = Descriptor(cudnn.createActivationDescriptor(), - cudnn.destroyActivationDescriptor) - cudnn.setActivationDescriptor( - act_desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0) - cudnn.activationBackward_v4( - handle, act_desc.value, one.data, desc.value, y.data.ptr, - desc.value, gy.data.ptr, desc.value, x.data.ptr, - zero.data, desc.value, gx.data.ptr) - else: - cudnn.activationBackward_v3( - handle, mode, one.data, desc.value, y.data.ptr, - desc.value, gy.data.ptr, desc.value, x.data.ptr, - zero.data, desc.value, gx.data.ptr) + act_desc = Descriptor(cudnn.createActivationDescriptor(), + cudnn.destroyActivationDescriptor) + cudnn.setActivationDescriptor( + act_desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0) + cudnn.activationBackward_v4( + handle, act_desc.value, one.data, desc.value, y.data.ptr, + desc.value, gy.data.ptr, desc.value, x.data.ptr, + zero.data, desc.value, gx.data.ptr) return gx @@ -351,13 +316,7 @@ def create_spatial_transformer_descriptor(sampler_type, dtype, nb_dims, dim_A): return desc -if _cudnn_version >= 3000: - def add_tensor(handle, alpha, biasDesc, biasData, beta, srcDestDesc, - srcDestData): - cudnn.addTensor_v3(handle, alpha, biasDesc, - biasData, beta, srcDestDesc, srcDestData) -else: - def add_tensor(handle, alpha, biasDesc, biasData, beta, srcDestDesc, - srcDestData): - cudnn.addTensor_v2(handle, cudnn.CUDNN_ADD_SAME_C, alpha, biasDesc, - biasData, beta, srcDestDesc, srcDestData) +def add_tensor(handle, alpha, biasDesc, biasData, beta, srcDestDesc, + srcDestData): + cudnn.addTensor_v3(handle, alpha, biasDesc, + biasData, beta, srcDestDesc, srcDestData)