Merge pull request #4 from neuzxy/fuse_op

Fix bugs and Optimize fuse_seqpool_cvm_op
jack603047588 · Sep 23, 2020 · 5fac7a4 · 5fac7a4
2 parents 8aaccfb + fe6ffca
commit 5fac7a4
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 50 deletions.
diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
@@ -300,13 +300,14 @@ void DoFusedSeqpoolCVMGrad(const paddle::platform::Place &place,
                  PADDLE_CUDA_NUM_THREADS,
       PADDLE_CUDA_NUM_THREADS, 0, stream>>>(total_len, slot_lens,
       slot_num, key2slot);
-
-  FusedSeqpoolCVMGradKernel<<<(total_len + PADDLE_CUDA_NUM_THREADS - 1) /
-                              PADDLE_CUDA_NUM_THREADS,
-      PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
+
+  FusedSeqpoolCVMGradKernel<<<(total_len * embedding_size +
+                               PADDLE_CUDA_NUM_THREADS - 1) /
+                                  PADDLE_CUDA_NUM_THREADS,
+                              PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
       out_grads_values, out_seqpool_grads_values, in_grads_values,
-          gpu_cvm_values, lods_values, slot_lens, key2slot, total_len,
-          embedding_size, use_cvm);
+      gpu_cvm_values, lods_values, slot_lens, key2slot, total_len,
+      embedding_size, use_cvm);
 }
 
 void FusedSeqpoolCVMGrad(const paddle::platform::Place &place,

diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py
@@ -35,7 +35,7 @@
     'match_matrix_tensor', 'tree_conv', 'fused_embedding_seq_pool',
     'multiclass_nms2', 'search_pyramid_hash', 'shuffle_batch', 'partial_concat',
     'partial_sum', 'tdm_child', 'rank_attention', 'tdm_sampler', 'batch_fc',
-    '_pull_box_extended_sparse'
+    '_pull_box_extended_sparse', 'fused_seqpool_cvm'
 ]
 
 
@@ -1409,3 +1409,51 @@ def _pull_box_extended_sparse(input, size, extend_size=64, dtype='float32'):
     if len(outs) == 1:
         return outs[0], outs_extend[0]
     return outs, outs_extend
+
+
+def fused_seqpool_cvm(input, pool_type, cvm, pad_value=0.0, use_cvm=True):
+    """
+     **Notes: The Op only receives List of LoDTensor as input, only support SUM pooling now.
+    :attr:`input`.
+    Args:
+        input(Variable|list of Variable): Input is List of LoDTensor.
+        pool_type(str): pooling type, only support SUM pooling now.
+        cvm(Variable): cvm Variable.
+        pad_value(float): padding value of sequence pool.
+        use_cvm(bool): use cvm or not.
+    Returns:
+        Variable|list of Variable: The tensor variable storing sequence pool and cvm
+        of input.
+    """
+    helper = LayerHelper('fused_seqpool_cvm', **locals())
+
+    if pool_type.upper() != 'SUM':
+        raise ValueError(
+            "fused_seqpool_cvm only support SUM pooling now, and your type is: " +
+            pool_type)
+
+    check_type(input, 'input', list, 'fused_seqpool_cvm')
+    if isinstance(input, list):
+        for _input in input:
+            check_variable_and_dtype(_input, 'input', ['float32'],
+                                     'fused_seqpool_cvm')
+
+    dtype = helper.input_dtype()
+    inputs = helper.multiple_input()
+    outs = [
+        helper.create_variable_for_type_inference(dtype)
+        for i in range(len(inputs))
+    ]
+
+    helper.append_op(
+        type="fused_seqpool_cvm",
+        inputs={"X": inputs,
+                "CVM": cvm},
+        outputs={"Out": outs},
+        attrs={
+            "pooltype": pool_type.upper(),
+            "pad_value": pad_value,
+            "use_cvm": use_cvm
+        })
+
+    return outs
diff --git a/python/paddle/fluid/layers/sequence_lod.py b/python/paddle/fluid/layers/sequence_lod.py
@@ -37,7 +37,6 @@
     'sequence_enumerate',
     'sequence_mask',
     'sequence_reverse',
-    'fused_seqpool_cvm'
 ]
 
 
@@ -1437,45 +1436,3 @@ def sequence_reverse(x, name=None):
         outputs={"Y": out},
         attrs=dict())
     return out
-
-@templatedoc()
-def fused_seqpool_cvm(input, pool_type, cvm, pad_value=0.0, use_cvm=True):
-    """
-    :api_attr: Static Graph
-
-        **Notes: The Op only receives List of LoDTensor as input.
-    """
-    assert not in_dygraph_mode(), (
-        "fused_seqpool_cvm layer is not supported in dygraph mode yet.")
-    helper = LayerHelper('fused_seqpool_cvm', **locals())
-
-    if pool_type.upper() != 'SUM':
-        raise ValueError(
-            "fused_seqpool_cvm only support SUM pooling now, and your type is: " +
-            pool_type)
-
-    check_type(input, 'input', list, 'fused_seqpool_cvm')
-    if isinstance(input, list):
-        for _input in input:
-            check_variable_and_dtype(_input, 'input', ['float32'],
-                                     'fused_seqpool_cvm')
-
-    dtype = helper.input_dtype()
-    inputs = helper.multiple_input()
-    outs = [
-        helper.create_variable_for_type_inference(dtype)
-        for i in range(len(inputs))
-    ]
-
-    helper.append_op(
-        type="fused_seqpool_cvm",
-        inputs={"X": inputs,
-            "CVM": cvm},
-        outputs={"Out": outs},
-        attrs={
-            "pooltype": pool_type.upper(),
-            "pad_value": pad_value,
-            "use_cvm": use_cvm
-        })
-
-    return outs