[Cherry-pick]FFT function enhancements and bugfixes (PaddlePaddle#36537)

* update fft api path (PaddlePaddle#36219) * update fft api path * add sample code for ihfft2 Co-authored-by: chenfeiyu <chenfeiyu@baidu.com> * fix fft axis (PaddlePaddle#36321) fix: `-1` is used when fft's axis is `0` * use unified external error message for cufft api (PaddlePaddle#36114) * fft: modify sample code result (PaddlePaddle#36325) * dynamic load mkl as a fft backend when it is avaialble and requested (PaddlePaddle#36414) * add rocm support for fft api (PaddlePaddle#36415) * move signal apis * move fft and signal API path (#2) * move signal apis * move fft.py and signal.py to paddle/, fix typos * fix relative imports from fft.py and signal.py * fix typos in signal.py (#3) * move signal apis * move fft.py and signal.py to paddle/, fix typos * fix relative imports from fft.py and signal.py * fix typos * disable Cache when CUFFT_VERSION >= 10200 (#4) * move signal apis * move fft.py and signal.py to paddle/, fix typos * fix relative imports from fft.py and signal.py * fix typos * Add LRUCache for fft plans * add LRUCache for cuff and hipfft (#5) * move signal apis * move fft.py and signal.py to paddle/, fix typos * fix relative imports from fft.py and signal.py * fix typos * WIP: add cache * delete move constructor and operator= for CuFFTHandle and FFTConfig * remove log from CuFFTHandle and FFTConfig * add lrucache for fft rocm backend * disable LRUCache when CUFFT_VERSION >= 10200 * disbale copy and move for hipFFTHandle; format code Co-authored-by: Xiaoxu Chen <chenxx_id@163.com> * remove debug message of cufftHandler * roll_op: support Tensor as input for shifts (PaddlePaddle#36727) * fix fftshift/ifftshift on static mode * update roll_op version * add more test cases for fftshift/ifftshift Co-authored-by: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Co-authored-by: chenfeiyu <chenfeiyu@baidu.com> Co-authored-by: LJQ❤️ <33169170+lijiaqi0612@users.noreply.github.com>
Xreki · Oct 28, 2021 · 11b9f5f · 11b9f5f
1 parent 96edcea
commit 11b9f5f
Show file tree

Hide file tree

Showing 29 changed files with 1,413 additions and 549 deletions.
diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
@@ -255,8 +255,8 @@ if(WITH_GPU)
         include(external/cub)       # download cub
         list(APPEND third_party_deps extern_cub)
     endif()
-    set(URL  "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz" CACHE STRING "" FORCE)
-    file_download_and_uncompress(${URL} "externalError" MD5 061f3b7895aadcbe2c3ed592590f8b10)   # download file externalErrorMsg.tar.gz
+    set(URL  "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE)
+    file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa)   # download file externalErrorMsg.tar.gz
     if(WITH_TESTING)
         # copy externalErrorMsg.pb, just for unittest can get error message correctly.
         set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data)

diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
@@ -105,10 +105,20 @@ else()
     op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
 endif()
 
-if (WITH_GPU AND (NOT WITH_ROCM))
-    op_library(spectral_op SRCS spectral_op.cc spectral_op.cu DEPS dynload_cuda ${OP_HEADER_DEPS})
+if (WITH_GPU OR WITH_ROCM)
+    if (MKL_FOUND AND WITH_ONEMKL)
+        op_library(spectral_op SRCS spectral_op.cc spectral_op.cu DEPS dynload_cuda dynload_mklrt ${OP_HEADER_DEPS})
+        target_include_directories(spectral_op PRIVATE ${MKL_INCLUDE})
+    else()
+        op_library(spectral_op SRCS spectral_op.cc spectral_op.cu DEPS dynload_cuda ${OP_HEADER_DEPS})
+    endif()
 else()
-    op_library(spectral_op SRCS spectral_op.cc DEPS ${OP_HEADER_DEPS})
+    if (MKL_FOUND AND WITH_ONEMKL)
+        op_library(spectral_op SRCS spectral_op.cc DEPS dynload_mklrt ${OP_HEADER_DEPS})
+        target_include_directories(spectral_op PRIVATE ${MKL_INCLUDE})
+    else()
+        op_library(spectral_op SRCS spectral_op.cc DEPS ${OP_HEADER_DEPS})
+    endif()
 endif()
 
 op_library(lstm_op DEPS ${OP_HEADER_DEPS}  lstm_compute)

diff --git a/paddle/fluid/operators/roll_op.cc b/paddle/fluid/operators/roll_op.cc
@@ -40,21 +40,23 @@ class RollOp : public framework::OperatorWithKernel {
     auto dims = ctx->Attrs().Get<std::vector<int64_t>>("axis");
     auto shifts = ctx->Attrs().Get<std::vector<int64_t>>("shifts");
 
-    if (dims.size() != 0) {
-      PADDLE_ENFORCE_EQ(dims.size(), shifts.size(),
-                        platform::errors::InvalidArgument(
-                            "When dims.size() != 0, dims.size() "
-                            "should be equal to "
-                            "shifts.size(). But received "
-                            "dims.size() = %d, shifts.size() = %d",
-                            dims.size(), shifts.size()));
-    } else {
-      PADDLE_ENFORCE_EQ(shifts.size(), 1,
-                        platform::errors::InvalidArgument(
-                            "When dims.size() == 0, shifts.size() "
-                            "should be equal to 1, But received "
-                            "shifts.size() = %d",
-                            shifts.size()));
+    if (!ctx->HasInput("ShiftsTensor")) {
+      if (dims.size() != 0) {
+        PADDLE_ENFORCE_EQ(dims.size(), shifts.size(),
+                          platform::errors::InvalidArgument(
+                              "When dims.size() != 0, dims.size() "
+                              "should be equal to "
+                              "shifts.size(). But received "
+                              "dims.size() = %d, shifts.size() = %d",
+                              dims.size(), shifts.size()));
+      } else {
+        PADDLE_ENFORCE_EQ(shifts.size(), 1,
+                          platform::errors::InvalidArgument(
+                              "When dims.size() == 0, shifts.size() "
+                              "should be equal to 1, But received "
+                              "shifts.size() = %d",
+                              shifts.size()));
+      }
     }
 
     ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
@@ -105,6 +107,10 @@ class RollOpMaker : public framework::OpProtoAndCheckerMaker {
                                   "The number of places by which the elements "
                                   "of the tensor are shifted.")
         .SetDefault({});
+    AddInput("ShiftsTensor",
+             "The number of places by which the elements of the tensor "
+             "are shifted.")
+        .AsDispensable();
     AddAttr<std::vector<int64_t>>(
         "axis",
         "Axis along which to roll. It must have the same size "
@@ -129,6 +135,9 @@ class RollGradMaker : public framework::SingleGradOpMaker<T> {
   void Apply(GradOpPtr<T> op) const override {
     op->SetType("roll_grad");
     op->SetInput("X", this->Input("X"));
+    if (this->HasInput("ShiftsTensor")) {
+      op->SetInput("ShiftsTensor", this->Input("ShiftsTensor"));
+    }
     op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
     op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
     op->SetAttrMap(this->Attrs());
@@ -174,7 +183,12 @@ REGISTER_OP_VERSION(roll)
                      "(std::vector<int64_t>) Axis along which to roll. "
                      "It must have the same size with shifts, or size = 0.",
                      std::vector<int64_t>())
-            .DeleteAttr(
-                "dims",
-                "(std::vector<int64_t>) Dims along which to roll. "
-                "It must have the same size with shifts, or size = 0."));
+            .DeleteAttr("dims",
+                        "(std::vector<int64_t>) Dims along which to roll. "
+                        "It must have the same size with shifts, or size = 0."))
+    .AddCheckpoint(
+        R"ROC(Upgrade roll add a dispensable input "ShiftsTensor".)ROC",
+        paddle::framework::compatible::OpVersionDesc().NewInput(
+            "ShiftsTensor",
+            "The number of places by which the elements of"
+            "the tensor are shifted."));
diff --git a/paddle/fluid/operators/roll_op.cu b/paddle/fluid/operators/roll_op.cu
@@ -59,6 +59,16 @@ class RollKernel<platform::CUDADeviceContext, T>
     auto* in = context.Input<LoDTensor>("X");
     auto* out = context.Output<LoDTensor>("Out");
     std::vector<int64_t> shifts = context.Attr<std::vector<int64_t>>("shifts");
+    if (context.HasInput("ShiftsTensor")) {
+      const auto* shifts_tensor =
+          context.Input<framework::Tensor>("ShiftsTensor");
+      PADDLE_ENFORCE_EQ(
+          shifts_tensor->dims().size(), 1,
+          platform::errors::InvalidArgument(
+              "The rank of ShiftsTensor is expected to be 1, got %s",
+              shifts_tensor->dims().size()));
+      shifts = GetDataFromTensor<int64_t>(shifts_tensor);
+    }
     std::vector<int64_t> dims = context.Attr<std::vector<int64_t>>("axis");
 
     auto* in_data = in->data<T>();
@@ -134,6 +144,16 @@ class RollGradKernel<platform::CUDADeviceContext, T>
     auto* in = context.Input<LoDTensor>(framework::GradVarName("Out"));
     auto* out = context.Output<LoDTensor>(framework::GradVarName("X"));
     std::vector<int64_t> shifts = context.Attr<std::vector<int64_t>>("shifts");
+    if (context.HasInput("ShiftsTensor")) {
+      const auto* shifts_tensor =
+          context.Input<framework::Tensor>("ShiftsTensor");
+      PADDLE_ENFORCE_EQ(
+          shifts_tensor->dims().size(), 1,
+          platform::errors::InvalidArgument(
+              "The rank of ShiftsTensor is expected to be 1, got %s",
+              shifts_tensor->dims().size()));
+      shifts = GetDataFromTensor<int64_t>(shifts_tensor);
+    }
     std::vector<int64_t> dims = context.Attr<std::vector<int64_t>>("axis");
 
     auto* in_data = in->data<T>();

diff --git a/paddle/fluid/operators/roll_op.h b/paddle/fluid/operators/roll_op.h
@@ -16,6 +16,8 @@
 #include <memory>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/utils.h"
+#include "paddle/fluid/platform/enforce.h"
 
 namespace paddle {
 namespace operators {
@@ -85,6 +87,16 @@ class RollKernel : public framework::OpKernel<T> {
     auto& input = input_var->Get<LoDTensor>();
     auto* output = output_var->GetMutable<LoDTensor>();
     std::vector<int64_t> shifts = context.Attr<std::vector<int64_t>>("shifts");
+    if (context.HasInput("ShiftsTensor")) {
+      const auto* shifts_tensor =
+          context.Input<framework::Tensor>("ShiftsTensor");
+      PADDLE_ENFORCE_EQ(
+          shifts_tensor->dims().size(), 1,
+          platform::errors::InvalidArgument(
+              "The rank of ShiftsTensor is expected to be 1, got %s",
+              shifts_tensor->dims().size()));
+      shifts = GetDataFromTensor<int64_t>(shifts_tensor);
+    }
     std::vector<int64_t> dims = context.Attr<std::vector<int64_t>>("axis");
 
     std::vector<T> out_vec;
@@ -123,6 +135,11 @@ class RollGradKernel : public framework::OpKernel<T> {
     auto& input = input_var->Get<LoDTensor>();
     auto* output = output_var->GetMutable<LoDTensor>();
     std::vector<int64_t> shifts = context.Attr<std::vector<int64_t>>("shifts");
+    if (context.HasInput("ShiftsTensor")) {
+      const auto* shifts_tensor =
+          context.Input<framework::Tensor>("ShiftsTensor");
+      shifts = GetDataFromTensor<int64_t>(shifts_tensor);
+    }
     std::vector<int64_t> dims = context.Attr<std::vector<int64_t>>("axis");
 
     std::vector<T> out_vec;