[AutoParallel] Eager method support autoparallel2 (PaddlePaddle#58469)

* PHI copy support auto parallel
danleifeng · Oct 30, 2023 · b500d06 · b500d06
1 parent 00dadd4
commit b500d06
Show file tree

Hide file tree

Showing 4 changed files with 147 additions and 14 deletions.
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
@@ -1979,15 +1979,24 @@ static PyObject* tensor__use_gpudnn(TensorObject* self,
                                     PyObject* args,
                                     PyObject* kwargs) {
   EAGER_TRY
-  PADDLE_ENFORCE(self->tensor.defined() && self->tensor.is_dense_tensor(),
-                 paddle::platform::errors::Fatal(
-                     "function _use_gpudnn is only effective for DenseTensor"));
+  PADDLE_ENFORCE(
+      self->tensor.defined() &&
+          (self->tensor.is_dense_tensor() || self->tensor.is_dist_tensor()),
+      paddle::platform::errors::Fatal("Function _use_gpudnn is only effective "
+                                      "for DenseTensor and DistTensor."));
 
   bool use_gpudnn = pybind::CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 0), 0);
 
   // Set the same use_gpudnn attribute, return directly
-  phi::DenseTensor* dense_tensor =
-      static_cast<phi::DenseTensor*>(self->tensor.impl().get());
+  phi::DenseTensor* dense_tensor = nullptr;
+  if (self->tensor.is_dist_tensor()) {
+    dense_tensor =
+        static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get())
+            ->unsafe_mutable_value();
+  } else {
+    dense_tensor = static_cast<phi::DenseTensor*>(self->tensor.impl().get());
+  }
+
   phi::DenseTensorMeta* dense_tensor_meta =
       phi::DenseTensorUtils::GetMutableMeta(dense_tensor);
   if (use_gpudnn == dense_tensor_meta->use_gpudnn) {
@@ -2001,10 +2010,20 @@ static PyObject* tensor__use_gpudnn(TensorObject* self,
   target_dense_tensor.ShareDataWith(*dense_tensor);
   target_dense_tensor.set_meta(target_dense_meta);
   // Construct returned tensor
-  paddle::Tensor target_tensor(
-      std::make_shared<phi::DenseTensor>(target_dense_tensor),
-      self->tensor.name());
+  paddle::Tensor target_tensor(self->tensor.name());
   target_tensor.set_autograd_meta(self->tensor.mutable_autograd_meta());
+  if (self->tensor.is_dist_tensor()) {
+    auto dist_tensor =
+        static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get());
+    auto target_dist_tensor = std::make_shared<phi::distributed::DistTensor>(
+        dist_tensor->dims(), dist_tensor->dist_attr());
+    *(target_dist_tensor->unsafe_mutable_value()) = target_dense_tensor;
+    target_tensor.set_impl(target_dist_tensor);
+  } else {
+    target_tensor.set_impl(
+        std::make_shared<phi::DenseTensor>(target_dense_tensor));
+  }
+
   VLOG(4) << "Tensor: " << target_tensor.name()
           << " set use_gpudnn = " << use_gpudnn;
 
@@ -2657,8 +2676,8 @@ static PyObject* tensor__reset_grad_inplace_version(TensorObject* self,
   }
 
   paddle::Tensor* grad = egr::EagerUtils::mutable_grad(self->tensor);
-  if (grad && grad->defined() && grad->is_dense_tensor() &&
-      grad->initialized()) {
+  if (grad && grad->defined() && grad->initialized() &&
+      (grad->is_dense_tensor() || grad->is_dist_tensor())) {
     grad->reset_inplace_version(set_to_zero);
   }
   RETURN_PY_NONE
@@ -2709,14 +2728,21 @@ static PyObject* tensor__offset(TensorObject* self,
                                 PyObject* args,
                                 PyObject* kwargs) {
   EAGER_TRY
-  auto t = std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
+  phi::DenseTensor* dense_tensor = nullptr;
+  if (self->tensor.is_dist_tensor()) {
+    dense_tensor =
+        static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get())
+            ->unsafe_mutable_value();
+  } else {
+    dense_tensor = static_cast<phi::DenseTensor*>(self->tensor.impl().get());
+  }
   PADDLE_ENFORCE_EQ(
-      t->IsInitialized(),
+      dense_tensor->IsInitialized(),
       true,
       platform::errors::InvalidArgument("Tensor %s has not been initialized!",
                                         self->tensor.name()));
 
-  return ToPyObject(t->offset());
+  return ToPyObject(dense_tensor->offset());
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
@@ -2753,9 +2779,14 @@ static PyObject* tensor__grad_value(TensorObject* self,
   if (grad->is_dense_tensor()) {
     auto* grad_tensor = static_cast<phi::DenseTensor*>(grad->impl().get());
     return ToPyObject(grad_tensor);
+  } else if (grad->is_dist_tensor()) {
+    auto* grad_tensor =
+        static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get())
+            ->unsafe_mutable_value();
+    return ToPyObject(grad_tensor);
   } else {
     PADDLE_THROW(paddle::platform::errors::Fatal(
-        "this method is only supported for DenseTensor"));
+        "This method is only supported for DenseTensor and DistTensor."));
     RETURN_PY_NONE
   }
   EAGER_CATCH_AND_THROW_RETURN_NULL
@@ -2838,7 +2869,15 @@ static PyObject* tensor_data_ptr(TensorObject* self,
         (int64_t)std::dynamic_pointer_cast<phi::DenseTensor>(  // NOLINT
             self->tensor.impl())
             ->data());
+  } else if (self->tensor.initialized() && self->tensor.is_dist_tensor()) {
+    return ToPyObject(
+        (int64_t)
+            std::dynamic_pointer_cast<phi::distributed::DistTensor>(  // NOLINT
+                self->tensor.impl())
+                ->unsafe_mutable_value()
+                ->data());
   }
+
   RETURN_PY_NONE
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }

diff --git a/test/auto_parallel/semi_auto_parallel_for_grad_api.py b/test/auto_parallel/semi_auto_parallel_for_grad_api.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import numpy as np
+from semi_auto_parallel_simple_net import MPDemoNet
+
+import paddle
+import paddle.distributed as dist
+from paddle import nn
+
+BATCH_SIZE = 16
+BATCH_NUM = 4
+IMAGE_SIZE = 784
+CLASS_NUM = 10
+
+
+def run_dynamic(layer, image, label):
+    # create loss
+    loss_fn = nn.MSELoss()
+    # run forward and backward
+    image = paddle.to_tensor(image)
+    image.stop_gradient = False
+    out = layer(image)
+
+    label = paddle.to_tensor(label)
+    loss = loss_fn(out, label)
+
+    loss.backward()
+
+    layer.w0._reset_grad_inplace_version()
+    tmp = layer.w1._grad_value()
+
+
+class TestSemiAutoParallelGradAPI:
+    def test_grad_api():
+        mesh = dist.ProcessMesh([0, 1], dim_names=["x"])
+        image = np.random.random([BATCH_SIZE, IMAGE_SIZE]).astype('float32')
+        label = np.random.random([BATCH_SIZE, CLASS_NUM]).astype('float32')
+        w0 = np.random.random([IMAGE_SIZE, IMAGE_SIZE]).astype('float32')
+        w1 = np.random.random([IMAGE_SIZE, CLASS_NUM]).astype('float32')
+        run_dynamic(layer=MPDemoNet(w0, w1, mesh), image=image, label=label)
+
+
+if __name__ == "__main__":
+    TestSemiAutoParallelGradAPI.test_grad_api()
diff --git a/test/auto_parallel/test_semi_auto_parallel_basic.py b/test/auto_parallel/test_semi_auto_parallel_basic.py
@@ -56,6 +56,16 @@ def test_reduction_api(self):
                 user_defined_envs=envs,
             )
 
+    def test_several_grad_api(self):
+        envs_list = test_base.gen_product_envs_list(
+            self._default_envs, self._changeable_envs
+        )
+        for envs in envs_list:
+            self.run_test_case(
+                "semi_auto_parallel_for_grad_api.py",
+                user_defined_envs=envs,
+            )
+
     def test_several_replicated_spmd_api(self):
         envs_list = test_base.gen_product_envs_list(
             self._default_envs, self._changeable_envs

diff --git a/test/auto_parallel/test_semi_auto_parallel_functional_in_single_card.py b/test/auto_parallel/test_semi_auto_parallel_functional_in_single_card.py
@@ -19,6 +19,33 @@
 
 
 class TestSemiAutoParallelFunctionalInSingleCard(unittest.TestCase):
+    def test_tensor_use_gpudnn(self):
+        mesh = dist.ProcessMesh([0, 1], dim_names=["x"])
+        dense_tensor = paddle.randn([10, 20])
+        dist_tensor = dist.shard_tensor(
+            dense_tensor,
+            dist_attr=dist.DistAttr(mesh=mesh, sharding_specs=[None, None]),
+        )
+        dist_tensor._use_gpudnn(False)
+
+    def test_tensor_data_ptr(self):
+        mesh = dist.ProcessMesh([0, 1], dim_names=["x"])
+        dense_tensor = paddle.randn([10, 20])
+        dist_tensor = dist.shard_tensor(
+            dense_tensor,
+            dist_attr=dist.DistAttr(mesh=mesh, sharding_specs=[None, None]),
+        )
+        prt = dist_tensor.data_ptr()
+
+    def test_tensor_offset(self):
+        mesh = dist.ProcessMesh([0, 1], dim_names=["x"])
+        dense_tensor = paddle.randn([10, 20])
+        dist_tensor = dist.shard_tensor(
+            dense_tensor,
+            dist_attr=dist.DistAttr(mesh=mesh, sharding_specs=[None, None]),
+        )
+        offset = dist_tensor._offset()
+
     def test_tensor_copy_to(self):
         mesh = dist.ProcessMesh([0, 1], dim_names=["x"])
         dense_tensor = paddle.randn([10, 20])