diff --git a/cupy/cuda/cudnn.pxd b/cupy/cuda/cudnn.pxd index f4f6ae13f46..87f577873ae 100644 --- a/cupy/cuda/cudnn.pxd +++ b/cupy/cuda/cudnn.pxd @@ -116,6 +116,9 @@ cpdef enum: CUDNN_BATCHNORM_SPATIAL = 1 CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2 + CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0 + CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 + CUDNN_BATCHNORM_OPS_BN = 0 CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1 CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2 @@ -522,6 +525,25 @@ cpdef dropoutBackward( size_t reserveSpace, size_t reserveSpaceSizeInBytes) +############################################################################### +# CTC +############################################################################### + +cpdef size_t createCTCLossDescriptor() except? 0 +cpdef destroyCTCLossDescriptor(size_t ctcLossDesc) +cpdef setCTCLossDescriptor(size_t ctcLossDesc, int dataType) +cpdef getCTCLossDescriptor(size_t ctcLossDesc) +cpdef size_t getCTCLossWorkspaceSize( + size_t handle, size_t probsDesc, size_t gradientsDesc, + size_t labels, size_t labelLengths, size_t inputLengths, + int algo, size_t ctcLossDesc) except? 0 +cpdef CTCLoss( + size_t handle, size_t probsDesc, + size_t probs, size_t labels, size_t labelLengths, size_t inputLengths, + size_t costs, size_t gradientsDesc, size_t gradients, int algo, + size_t ctcLossDesc, size_t workspace, size_t workSpaceSizeInBytes) + + ############################################################################### # RNN ############################################################################### diff --git a/cupy/cuda/cudnn.pyx b/cupy/cuda/cudnn.pyx index 722d55604a3..aa06feced36 100644 --- a/cupy/cuda/cudnn.pyx +++ b/cupy/cuda/cudnn.pyx @@ -74,6 +74,7 @@ cdef extern from 'cupy_cudnn.h' nogil: ctypedef int NanPropagation 'cudnnNanPropagation_t' ctypedef int PoolingMode 'cudnnPoolingMode_t' ctypedef int RNNInputMode 'cudnnRNNInputMode_t' + ctypedef int CTCLossAlgo 'cudnnCTCLossAlgo_t' ctypedef int RNNMode 'cudnnRNNMode_t' ctypedef int RNNAlgo 'cudnnRNNAlgo_t' ctypedef int RNNDataLayout 'cudnnRNNDataLayout_t' @@ -95,6 +96,7 @@ cdef extern from 'cupy_cudnn.h' nogil: ctypedef void* FilterDescriptor 'cudnnFilterDescriptor_t' ctypedef void* Handle 'cudnnHandle_t' ctypedef void* PoolingDescriptor 'cudnnPoolingDescriptor_t' + ctypedef void* CTCLossDescriptor 'cudnnCTCLossDescriptor_t' ctypedef void* RNNDescriptor 'cudnnRNNDescriptor_t' ctypedef void* RNNDataDescriptor 'cudnnRNNDataDescriptor_t' ctypedef void* PersistentRNNPlan 'cudnnPersistentRNNPlan_t' @@ -509,6 +511,24 @@ cdef extern from 'cupy_cudnn.h' nogil: TensorDescriptor dydesc, void* dy, TensorDescriptor dxdesc, void* dx, void* reserveSpace, size_t reserveSpaceSizeInBytes) + # CTC + int cudnnCreateCTCLossDescriptor(CTCLossDescriptor* ctcLossDesc) + int cudnnDestroyCTCLossDescriptor(CTCLossDescriptor ctcLossDesc) + int cudnnSetCTCLossDescriptor( + CTCLossDescriptor ctcLossDesc, DataType dataType) + int cudnnGetCTCLossDescriptor( + CTCLossDescriptor ctcLossDesc, DataType* dataType) + int cudnnGetCTCLossWorkspaceSize( + Handle handle, TensorDescriptor probsDesc, + TensorDescriptor gradientsDesc, int* labels, + int* labelLengths, int* inputLengths, CTCLossAlgo algo, + CTCLossDescriptor ctcLossDesc, size_t* sizeInBytes) + int cudnnCTCLoss( + Handle handle, TensorDescriptor probsDesc, + void* probs, int* labels, int* labelLengths, int* inputLengths, + void* costs, TensorDescriptor gradientsDesc, void* gradients, + CTCLossAlgo algo, CTCLossDescriptor ctcLossDesc, + void* workspace, size_t workSpaceSizeInBytes) # RNN int cudnnCreateRNNDescriptor(RNNDescriptor* rnnDesc) int cudnnDestroyRNNDescriptor(RNNDescriptor rnnDesc) @@ -1879,6 +1899,59 @@ cpdef dropoutBackward( check_status(status) +############################################################################### +# CTC +############################################################################### +cpdef size_t createCTCLossDescriptor() except? 0: + cdef CTCLossDescriptor desc + status = cudnnCreateCTCLossDescriptor(&desc) + check_status(status) + return desc + +cpdef destroyCTCLossDescriptor(size_t ctcLossDesc): + status = cudnnDestroyCTCLossDescriptor(ctcLossDesc) + check_status(status) + +cpdef setCTCLossDescriptor(size_t ctcLossDesc, int dataType): + status = cudnnSetCTCLossDescriptor( + ctcLossDesc, dataType) + check_status(status) + +cpdef getCTCLossDescriptor(size_t ctcLossDesc): + cdef DataType compType + status = cudnnGetCTCLossDescriptor( + ctcLossDesc, &compType) + check_status(status) + return compType + +cpdef size_t getCTCLossWorkspaceSize( + size_t handle, size_t probsDesc, size_t gradientsDesc, + size_t labels, size_t labelLengths, size_t inputLengths, + int algo, size_t ctcLossDesc) except? 0: + cdef size_t sizeInBytes + status = cudnnGetCTCLossWorkspaceSize( + handle, probsDesc, + gradientsDesc, + labels, labelLengths, inputLengths, + algo, ctcLossDesc, &sizeInBytes) + check_status(status) + return sizeInBytes + +cpdef CTCLoss( + size_t handle, size_t probsDesc, + size_t probs, size_t labels, size_t labelLengths, size_t inputLengths, + size_t costs, size_t gradientsDesc, size_t gradients, + int algo, size_t ctcLossDesc, + size_t workspace, size_t workSpaceSizeInBytes): + status = cudnnCTCLoss( + handle, probsDesc, probs, + labels, labelLengths, inputLengths, + costs, gradientsDesc, gradients, + algo, ctcLossDesc, + workspace, workSpaceSizeInBytes) + check_status(status) + + ############################################################################### # RNN ############################################################################### diff --git a/cupy/cuda/cupy_cudnn.h b/cupy/cuda/cupy_cudnn.h index e8d4bd341c8..3991996347f 100644 --- a/cupy/cuda/cupy_cudnn.h +++ b/cupy/cuda/cupy_cudnn.h @@ -26,6 +26,7 @@ typedef enum {} cudnnActivationMode_t; typedef enum {} cudnnConvolutionFwdAlgo_t; typedef enum {} cudnnConvolutionFwdPreference_t; typedef enum {} cudnnConvolutionMode_t; +typedef enum {} cudnnCTCLossAlgo_t; typedef enum {} cudnnDataType_t; typedef enum {} cudnnPoolingMode_t; typedef enum {} cudnnSoftmaxAlgorithm_t; @@ -35,6 +36,7 @@ typedef enum {} cudnnErrQueryMode_t; typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t; typedef void* cudnnConvolutionDescriptor_t; +typedef void* cudnnCTCLossDescriptor_t; typedef void* cudnnFilterDescriptor_t; typedef void* cudnnHandle_t; typedef void* cudnnPoolingDescriptor_t; @@ -317,6 +319,26 @@ cudnnStatus_t cudnnActivationBackward_v4(...) { return CUDNN_STATUS_NOT_SUPPORTED; } +// CTC +cudnnStatus_t cudnnCreateCTCLossDescriptor(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnDestroyCTCLossDescriptor(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnSetCTCLossDescriptor(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnGetCTCLossDescriptor(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnGetCTCLossWorkspaceSize(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnCTCLoss(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} + typedef enum {} cudnnMathType_t; cudnnStatus_t cudnnSetConvolutionMathType(...) { @@ -674,6 +696,28 @@ cudnnStatus_t cudnnReduceTensor(...) { #define cudnnSetRNNDescriptor_v5 cudnnSetRNNDescriptor typedef enum {} cudnnMathType_t; +typedef enum {} cudnnCTCLossAlgo_t; +typedef void* cudnnCTCLossDescriptor_t; + +// CTC +cudnnStatus_t cudnnCreateCTCLossDescriptor(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnDestroyCTCLossDescriptor(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnSetCTCLossDescriptor(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnGetCTCLossDescriptor(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnGetCTCLossWorkspaceSize(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} +cudnnStatus_t cudnnCTCLoss(...) { + return CUDNN_STATUS_NOT_SUPPORTED; +} cudnnStatus_t cudnnSetConvolutionMathType(...) { return CUDNN_STATUS_NOT_SUPPORTED; @@ -789,7 +833,7 @@ typedef void* cudnnRNNDataDescriptor_t; typedef enum {} cudnnRNNDataLayout_t; typedef enum {} cudnnRNNPaddingMode_t; - + cudnnStatus_t cudnnSetRNNPaddingMode(...) { return CUDNN_STATUS_NOT_SUPPORTED; } diff --git a/cupy/cudnn.pyx b/cupy/cudnn.pyx index 2b7f85fa963..3c28308ca67 100644 --- a/cupy/cudnn.pyx +++ b/cupy/cudnn.pyx @@ -729,6 +729,44 @@ def set_dropout_descriptor(desc, handle, dropout): cudnn.setDropoutDescriptor(desc.value, handle, dropout, 0, 0, 0) +def _create_ctc_loss_descriptor(data_type): + desc = Descriptor(cudnn.createCTCLossDescriptor(), + py_cudnn.destroyCTCLossDescriptor) + cudnn.setCTCLossDescriptor(desc.value, data_type) + return desc + + +def ctc_loss(core.ndarray probs, labels, + label_length, input_length, int algo): + batch_size = probs.shape[1] + labels_ptr = labels.ctypes.data + label_length_ptr = label_length.ctypes.data + input_length_ptr = input_length.ctypes.data + handle = get_handle() + data_type = get_data_type(probs.dtype) + ctc_desc = Descriptor(cudnn.createCTCLossDescriptor(), + py_cudnn.destroyCTCLossDescriptor) + cudnn.setCTCLossDescriptor(ctc_desc.value, data_type) + + gradients = core.ndarray(probs._shape, probs.dtype) + loss = core.ndarray((batch_size, ), 'f') + probs_desc = create_tensor_descriptor(probs) + gradients_desc = create_tensor_descriptor(gradients) + + work_size = cudnn.getCTCLossWorkspaceSize( + handle, probs_desc.value, gradients_desc.value, + labels_ptr, label_length_ptr, + input_length_ptr, algo, ctc_desc.value) + workspace = core.ndarray((work_size,), 'b') + + cudnn.CTCLoss(handle, probs_desc.value, probs.data.ptr, + labels_ptr, label_length_ptr, + input_length_ptr, loss.data.ptr, gradients_desc.value, + gradients.data.ptr, algo, ctc_desc.value, + workspace.data.ptr, work_size) + return loss, gradients + + def create_rnn_descriptor(hidden_size, num_layers, dropout_desc, input_mode, direction, mode, data_type, algo=None): desc = Descriptor(cudnn.createRNNDescriptor(),