Skip to content

Commit

Permalink
compilation optimization for matmul_grad_kernel (PaddlePaddle#57823)
Browse files Browse the repository at this point in the history
  • Loading branch information
tianhaodongbd committed Sep 28, 2023
1 parent f34bf3c commit ce95ff5
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
Expand Up @@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/dot_grad_kernel_impl.h"
#include "paddle/phi/kernels/impl/matmul_kernel_impl.h"
#include "paddle/phi/kernels/reduce_sum_kernel.h"

#if defined(__NVCC__) || defined(__HIPCC__)
#include "paddle/phi/kernels/gpu/reduce.h"
Expand Down Expand Up @@ -60,8 +61,8 @@ struct ReduceSumForMatmulGrad<GPUContext, T> {
const DenseTensor& input,
DenseTensor* output,
const std::vector<int>& reduce_dims) {
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx, input, output, kps::IdentityFunctor<T>(), reduce_dims);
phi::SumKernel<T, GPUContext>(
dev_ctx, input, reduce_dims, input.dtype(), false, output);
}
};
#endif
Expand Down

0 comments on commit ce95ff5

Please sign in to comment.