Merge pull request #1428 from anaruse/fix_tensorcore_autotune

Fix issue of cuDNN convolution math_type setting
cupy · Jun 29, 2018 · 5cb3958 · 5cb3958
2 parents 0645395 + 8c16b7e
commit 5cb3958
Showing 1 changed file with 35 additions and 18 deletions.
diff --git a/cupy/cudnn.pyx b/cupy/cudnn.pyx
@@ -551,15 +551,15 @@ cpdef tuple _find_algorithm_fwd(
         ret = cudnn.findConvolutionForwardAlgorithmEx_v7(
             handle, x_desc, x.data.ptr, filter_desc, W.data.ptr, conv_desc,
             y_desc, y.data.ptr, 1, workspace.ptr, max_workspace_size)
-        algo = (ret[0].algo, ret[0].memory)
+        algo = (ret[0].algo, ret[0].memory, ret[0].mathType)
         if use_tensor_core:
             if ret[0].mathType != cudnn.CUDNN_TENSOR_OP_MATH:
                 _warn_algorithm_fwd(x, W, y, conv_param)
     else:
         ret = cudnn.findConvolutionForwardAlgorithmEx(
             handle, x_desc, x.data.ptr, filter_desc, W.data.ptr, conv_desc,
             y_desc, y.data.ptr, 1, workspace.ptr, max_workspace_size)
-        algo = (ret[0]['algo'], ret[0]['memory'])
+        algo = (ret[0]['algo'], ret[0]['memory'], cudnn.CUDNN_DEFAULT_MATH)
     _algorithm_fwd[key] = algo
     return algo
 
@@ -585,7 +585,8 @@ cpdef tuple _get_algorithm_fwd(
             warnings.warn(msg)
         algo = ret[i].algo
         workspace_size = ret[i].memory
-        if ret[i].mathType != cudnn.CUDNN_TENSOR_OP_MATH:
+        math_type = ret[i].mathType
+        if math_type != cudnn.CUDNN_TENSOR_OP_MATH:
             _warn_algorithm_fwd(x, W, y, conv_param)
     else:
         algo = cudnn.getConvolutionForwardAlgorithm_v6(
@@ -594,7 +595,8 @@ cpdef tuple _get_algorithm_fwd(
             max_workspace_size)
         workspace_size = cudnn.getConvolutionForwardWorkspaceSize(
             handle, x_desc, filter_desc, conv_desc, y_desc, algo)
-    return algo, workspace_size
+        math_type = cudnn.CUDNN_DEFAULT_MATH
+    return algo, workspace_size, math_type
 
 
 cpdef _warn_algorithm_bwd_filter(
@@ -620,15 +622,15 @@ cpdef tuple _find_algorithm_bwd_filter(
         ret = cudnn.findConvolutionBackwardFilterAlgorithmEx_v7(
             handle, x_desc, x.data.ptr, dy_desc, dy.data.ptr, conv_desc,
             filter_desc, dW.data.ptr, 1, workspace.ptr, max_workspace_size)
-        algo = (ret[0].algo, ret[0].memory)
+        algo = (ret[0].algo, ret[0].memory, ret[0].mathType)
         if use_tensor_core:
             if ret[0].mathType != cudnn.CUDNN_TENSOR_OP_MATH:
                 _warn_algorithm_bwd_filter(x, dy, dW, conv_param)
     else:
         ret = cudnn.findConvolutionBackwardFilterAlgorithmEx(
             handle, x_desc, x.data.ptr, dy_desc, dy.data.ptr, conv_desc,
             filter_desc, dW.data.ptr, 1, workspace.ptr, max_workspace_size)
-        algo = (ret[0]['algo'], ret[0]['memory'])
+        algo = (ret[0]['algo'], ret[0]['memory'], cudnn.CUDNN_DEFAULT_MATH)
     _algorithm_bwd_filter[key] = algo
     return algo
 
@@ -656,7 +658,8 @@ cpdef tuple _get_algorithm_bwd_filter(
             warnings.warn(msg)
         algo = ret[i].algo
         workspace_size = ret[i].memory
-        if ret[i].mathType != cudnn.CUDNN_TENSOR_OP_MATH:
+        math_type = ret[i].mathType
+        if math_type != cudnn.CUDNN_TENSOR_OP_MATH:
             _warn_algorithm_bwd_filter(x, dy, dW, conv_param)
     else:
         algo = cudnn.getConvolutionBackwardFilterAlgorithm_v6(
@@ -665,7 +668,8 @@ cpdef tuple _get_algorithm_bwd_filter(
             max_workspace_size)
         workspace_size = cudnn.getConvolutionBackwardFilterWorkspaceSize(
             handle, x_desc, gy_desc, conv_desc, filter_desc, algo)
-    return algo, workspace_size
+        math_type = cudnn.CUDNN_DEFAULT_MATH
+    return algo, workspace_size, math_type
 
 
 cpdef _warn_algorithm_bwd_data(
@@ -691,15 +695,15 @@ cpdef tuple _find_algorithm_bwd_data(
         ret = cudnn.findConvolutionBackwardDataAlgorithmEx_v7(
             handle, filter_desc, W.data.ptr, x_desc, x.data.ptr, conv_desc,
             y_desc, y.data.ptr, 1, workspace.ptr, max_workspace_size)
-        algo = (ret[0].algo, ret[0].memory)
+        algo = (ret[0].algo, ret[0].memory, ret[0].mathType)
         if use_tensor_core:
             if ret[0].mathType != cudnn.CUDNN_TENSOR_OP_MATH:
                 _warn_algorithm_bwd_data(W, x, y, conv_param)
     else:
         ret = cudnn.findConvolutionBackwardDataAlgorithmEx(
             handle, filter_desc, W.data.ptr, x_desc, x.data.ptr, conv_desc,
             y_desc, y.data.ptr, 1, workspace.ptr, max_workspace_size)
-        algo = (ret[0]['algo'], ret[0]['memory'])
+        algo = (ret[0]['algo'], ret[0]['memory'], cudnn.CUDNN_DEFAULT_MATH)
     _algorithm_bwd_data[key] = algo
     return algo
 
@@ -726,7 +730,8 @@ cpdef tuple _get_algorithm_bwd_data(
             warnings.warn(msg)
         algo = ret[i].algo
         workspace_size = ret[i].memory
-        if ret[i].mathType != cudnn.CUDNN_TENSOR_OP_MATH:
+        math_type = ret[i].mathType
+        if math_type != cudnn.CUDNN_TENSOR_OP_MATH:
             _warn_algorithm_bwd_data(W, x, y, conv_param)
     else:
         algo = cudnn.getConvolutionBackwardDataAlgorithm_v6(
@@ -735,7 +740,8 @@ cpdef tuple _get_algorithm_bwd_data(
             max_workspace_size)
         workspace_size = cudnn.getConvolutionBackwardDataWorkspaceSize(
             handle, filter_desc, x_desc, conv_desc, y_desc, algo)
-    return algo, workspace_size
+        math_type = cudnn.CUDNN_DEFAULT_MATH
+    return algo, workspace_size, math_type
 
 
 cpdef bint _should_use_tensor_core(
@@ -804,14 +810,17 @@ def convolution_forward(
             cudnn.CUDNN_CROSS_CORRELATION, use_tensor_core)
 
         if auto_tune and _cudnn_version >= 5000:
-            algo, workspace_size = _find_algorithm_fwd(
+            algo, workspace_size, math_type = _find_algorithm_fwd(
                 x, W, y, conv_param, handle, x_desc, filter_desc,
                 conv_desc, y_desc, max_workspace_size, use_tensor_core)
         else:
-            algo, workspace_size = _get_algorithm_fwd(
+            algo, workspace_size, math_type = _get_algorithm_fwd(
                 x, W, y, conv_param, handle, x_desc, filter_desc,
                 conv_desc, y_desc, max_workspace_size, use_tensor_core)
 
+        if _cudnn_version >= 7000:
+            cudnn.setConvolutionMathType(conv_desc, math_type)
+
         workspace = memory.alloc(workspace_size)
 
         cudnn.convolutionForward(
@@ -881,16 +890,20 @@ def convolution_backward_filter(
             algo = cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
             workspace_size = cudnn.getConvolutionBackwardFilterWorkspaceSize(
                 handle, x_desc, gy_desc, conv_desc, filter_desc, algo)
+            math_type = cudnn.CUDNN_DEFAULT_MATH
             # TODO(okuta): check workspace size
         elif auto_tune and _cudnn_version >= 5000:
-            algo, workspace_size = _find_algorithm_bwd_filter(
+            algo, workspace_size, math_type = _find_algorithm_bwd_filter(
                 x, gy, gW, conv_param, handle, x_desc, gy_desc, conv_desc,
                 filter_desc, max_workspace_size, use_tensor_core)
         else:
-            algo, workspace_size = _get_algorithm_bwd_filter(
+            algo, workspace_size, math_type = _get_algorithm_bwd_filter(
                 x, gy, gW, conv_param, handle, x_desc, gy_desc, conv_desc,
                 filter_desc, max_workspace_size, use_tensor_core)
 
+        if _cudnn_version >= 7000:
+            cudnn.setConvolutionMathType(conv_desc, math_type)
+
         workspace = memory.alloc(workspace_size)
 
         cudnn.convolutionBackwardFilter_v3(
@@ -959,16 +972,20 @@ def convolution_backward_data(
             algo = cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
             workspace_size = cudnn.getConvolutionBackwardDataWorkspaceSize(
                 handle, filter_desc, x_desc, conv_desc, y_desc, algo)
+            math_type = cudnn.CUDNN_DEFAULT_MATH
             # TODO(okuta): check workspace size
         elif auto_tune and _cudnn_version >= 5000:
-            algo, workspace_size = _find_algorithm_bwd_data(
+            algo, workspace_size, math_type = _find_algorithm_bwd_data(
                 W, x, y, conv_param, handle, filter_desc, x_desc,
                 conv_desc, y_desc, max_workspace_size, use_tensor_core)
         else:
-            algo, workspace_size = _get_algorithm_bwd_data(
+            algo, workspace_size, math_type = _get_algorithm_bwd_data(
                 W, x, y, conv_param, handle, filter_desc, x_desc,
                 conv_desc, y_desc, max_workspace_size, use_tensor_core)
 
+        if _cudnn_version >= 7000:
+            cudnn.setConvolutionMathType(conv_desc, math_type)
+
         workspace = memory.alloc(workspace_size)
 
         cudnn.convolutionBackwardData_v3(