【complex】 No.32 support complex for softsign (PaddlePaddle#58545)

danleifeng · Nov 3, 2023 · 9dd94a5 · 9dd94a5
1 parent a13ba32
commit 9dd94a5
Show file tree

Hide file tree

Showing 7 changed files with 76 additions and 5 deletions.
diff --git a/paddle/phi/kernels/cpu/activation_grad_kernel.cc b/paddle/phi/kernels/cpu/activation_grad_kernel.cc
@@ -423,7 +423,8 @@ PD_REGISTER_KERNEL(cos_triple_grad,
                    phi::dtype::complex<float>,
                    phi::dtype::complex<double>) {}
 
-PD_REGISTER_ACTIVATION_GRAD_KERNEL(softsign_grad, SoftsignGradKernel)
+PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(softsign_grad,
+                                                SoftsignGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(sigmoid_grad, SigmoidGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(sigmoid_double_grad,
                                                 SigmoidDoubleGradKernel)

diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc
@@ -230,7 +230,7 @@ PD_REGISTER_KERNEL(expm1,
 PD_REGISTER_KERNEL(logit, CPU, ALL_LAYOUT, phi::LogitKernel, float, double) {}
 PD_REGISTER_KERNEL(
     square, CPU, ALL_LAYOUT, phi::SquareKernel, float, double, int, int64_t) {}
-PD_REGISTER_ACTIVATION_KERNEL(softsign, SoftsignKernel)
+PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(softsign, SoftsignKernel)
 PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(sigmoid, SigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(logsigmoid, LogSigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hardsigmoid, HardSigmoidKernel)

diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h
@@ -107,6 +107,14 @@ struct Conj {
   }
 };
 
+// T is phi::dtype::complex<float> or phi::dtype::complex<double>
+template <typename T>
+struct Real {
+  HOSTDEVICE ComplexType<T> operator()(const ComplexType<T>& val) const {
+    return ComplexType<T>(val.real);
+  }
+};
+
 // sine'(x) = cos(x)
 template <typename T>
 struct SinGradFunctor : public BaseActivationFunctor<T> {
@@ -2129,6 +2137,24 @@ struct SoftsignGradFunctor : public BaseActivationFunctor<T> {
   static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
 };
 
+template <typename T>
+struct SoftsignGradFunctor<ComplexType<T>>
+    : public BaseActivationFunctor<ComplexType<T>> {
+  template <typename Device,
+            typename X,
+            typename Out,
+            typename dOut,
+            typename dX>
+  void operator()(Device d, X x, Out out UNUSED, dOut dout, dX dx) const {
+    ComplexType<T> one = static_cast<ComplexType<T>>(1.0f);
+    auto temp = (-x / (one + x.abs()).square()).unaryExpr(Real<T>());
+
+    dx.device(d) = dout * (one / (one + x.abs()) + temp * x / x.abs());
+  }
+
+  static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
+};
+
 // sigmoid(x) = 1 / (1 + exp(-x))
 template <typename T>
 struct SigmoidFunctor : public BaseActivationFunctor<T> {
@@ -4339,6 +4365,17 @@ struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
   }
 };
 
+template <typename T>
+struct CudaSoftsignFunctor<ComplexType<T>>
+    : public BaseActivationFunctor<ComplexType<T>> {
+  using Complex = ComplexType<T>;
+  Complex one = static_cast<Complex>(1.0f);
+
+  __device__ __forceinline__ Complex operator()(const Complex x) const {
+    return x / (one + static_cast<Complex>(abs(x)));
+  }
+};
+
 template <typename T>
 struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
   T one = static_cast<T>(1.0f);
@@ -4353,6 +4390,23 @@ struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
   static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
 };
 
+template <typename T>
+struct CudaSoftsignGradFunctor<ComplexType<T>>
+    : public BaseActivationFunctor<ComplexType<T>> {
+  using Complex = ComplexType<T>;
+  Complex one = static_cast<Complex>(1.0f);
+
+  __device__ __forceinline__ Complex operator()(const Complex dout,
+                                                const Complex x) const {
+    Complex abs_x = static_cast<Complex>(abs(x));
+    Complex abs_x_plus = one + abs_x;
+    Complex temp = static_cast<Complex>((-x / (abs_x_plus * abs_x_plus)).real);
+    return dout * (one / abs_x_plus + temp * x / abs_x);
+  }
+
+  static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
+};
+
 template <typename T>
 struct CudaSigmoidFunctor : public BaseActivationFunctor<T> {
   using MPType = typename phi::dtype::MPTypeTrait<T>::Type;

diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu
@@ -495,7 +495,8 @@ PD_REGISTER_KERNEL(cos_triple_grad,
                    phi::dtype::complex<float>,
                    phi::dtype::complex<double>) {}
 
-PD_REGISTER_ACTIVATION_GRAD_KERNEL(softsign_grad, SoftsignGradKernel)
+PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(softsign_grad,
+                                                SoftsignGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(sigmoid_grad, SigmoidGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(sigmoid_double_grad,
                                                 SigmoidDoubleGradKernel)

diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu
@@ -292,7 +292,7 @@ PD_REGISTER_ACTIVATION_KERNEL(softshrink, SoftShrinkKernel)
 PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
 PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
 PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(silu, SiluKernel)
-PD_REGISTER_ACTIVATION_KERNEL(softsign, SoftsignKernel)
+PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(softsign, SoftsignKernel)
 PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(sigmoid, SigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(logsigmoid, LogSigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hardsigmoid, HardSigmoidKernel)

diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
@@ -1389,7 +1389,7 @@ def softsign(x, name=None):
         softsign(x) = \frac{x}{1 + |x|}
 
     Parameters:
-        x (Tensor): The input Tensor with data type float32, float64.
+        x (Tensor): The input Tensor with data type float32, float64, complex64 or complex128.
         name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
 
     Returns:

diff --git a/test/legacy_test/test_activation_op.py b/test/legacy_test/test_activation_op.py
@@ -4147,6 +4147,11 @@ def setUp(self):
 
         np.random.seed(1024)
         x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
+        if self.dtype == np.complex64 or self.dtype == np.complex128:
+            x = (
+                np.random.uniform(-1, 1, self.shape)
+                + 1j * np.random.uniform(-1, 1, self.shape)
+            ).astype(self.dtype)
         out = ref_softsign(x)
 
         self.inputs = {'X': OpTest.np_dtype_to_base_dtype(x)}
@@ -4162,6 +4167,16 @@ def test_check_grad(self):
         self.check_grad(['X'], 'Out')
 
 
+class TestSoftsign_Complex64(TestSoftsign):
+    def init_dtype(self):
+        self.dtype = np.complex64
+
+
+class TestSoftsign_Complex128(TestSoftsign):
+    def init_dtype(self):
+        self.dtype = np.complex128
+
+
 class TestSoftsign_ZeroDim(TestSoftsign):
     def init_shape(self):
         self.shape = []