ginkgo-project · tcojean · Dec 11, 2020 · Sep 11, 2020 · Nov 25, 2020 · Dec 2, 2020
diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
@@ -64,6 +64,14 @@ void OmpExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes,
     GKO_NOT_COMPILED(dpcpp);
 
 
+bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const
+{
+    // Dummy check
+    auto dev_type = dest_exec->get_device_type();
+    return dev_type == "cpu" || dev_type == "host";
+}
+
+
 void DpcppExecutor::raw_free(void *ptr) const noexcept
 {
     // Free must never fail, as it can be called in destructors.
@@ -112,6 +120,20 @@ int DpcppExecutor::get_num_devices(std::string) { return 0; }
 void DpcppExecutor::set_device_property() {}
 
 
+bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const
+{
+    // Dummy check
+    return device_type_ == "cpu" || device_type_ == "host";
+}
+
+bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const
+{
+    // Dummy check
+    return dest_exec->get_device_type() == device_type_ &&
+           dest_exec->get_device_id() == device_id_;
+}
+
+
 }  // namespace gko
 
 

diff --git a/core/devices/cuda/executor.cpp b/core/devices/cuda/executor.cpp
@@ -48,6 +48,22 @@ std::shared_ptr<const Executor> CudaExecutor::get_master() const noexcept
 }
 
 
+bool CudaExecutor::verify_memory_to(const CudaExecutor *dest_exec) const
+{
+    return device_id_ == dest_exec->get_device_id();
+}
+
+
+bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const
+{
+#if GINKGO_HIP_PLATFORM_NVCC
+    return device_id_ == dest_exec->get_device_id();
+#else
+    return false;
+#endif
+}
+
+
 unsigned CudaExecutor::num_execs[max_devices];
 
 

diff --git a/core/devices/hip/executor.cpp b/core/devices/hip/executor.cpp
@@ -45,6 +45,22 @@ std::shared_ptr<const Executor> HipExecutor::get_master() const noexcept
 }
 
 
+bool HipExecutor::verify_memory_to(const HipExecutor *dest_exec) const
+{
+    return device_id_ == dest_exec->get_device_id();
+}
+
+
+bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const
+{
+#if GINKGO_HIP_PLATFORM_NVCC
+    return device_id_ == dest_exec->get_device_id();
+#else
+    return false;
+#endif
+}
+
+
 int HipExecutor::num_execs[max_devices];
 
 

diff --git a/core/test/base/array.cpp b/core/test/base/array.cpp
@@ -57,12 +57,13 @@ class Array : public ::testing::Test {
         x.get_data()[1] = 2;
     }
 
-    static void assert_equal_to_original_x(gko::Array<T> &a)
+    static void assert_equal_to_original_x(gko::Array<T> &a,
+                                           bool check_zero = true)
     {
         ASSERT_EQ(a.get_num_elems(), 2);
-        EXPECT_EQ(a.get_data()[0], T{5});
+        if (check_zero) EXPECT_EQ(a.get_data()[0], T{5});
         EXPECT_EQ(a.get_data()[1], T{2});
-        EXPECT_EQ(a.get_const_data()[0], T{5});
+        if (check_zero) EXPECT_EQ(a.get_const_data()[0], T{5});
         EXPECT_EQ(a.get_const_data()[1], T{2});
     }
 
@@ -271,29 +272,30 @@ TYPED_TEST(Array, CanCreateTemporaryCloneOnSameExecutor)
 }
 
 
-TYPED_TEST(Array, CanCreateTemporaryCloneOnDifferentExecutor)
+// For tests between different memory, check cuda/test/base/array.cu
+TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory)
 {
-    auto omp = gko::OmpExecutor::create();
+    auto other = gko::ReferenceExecutor::create();
 
-    auto tmp_clone = make_temporary_clone(omp, &this->x);
+    auto tmp_clone = make_temporary_clone(other, &this->x);
 
     this->assert_equal_to_original_x(*tmp_clone.get());
-    ASSERT_NE(tmp_clone.get(), &this->x);
+    ASSERT_EQ(tmp_clone.get(), &this->x);
 }
 
 
-TYPED_TEST(Array, CanCopyBackTemporaryCloneOnDifferentExecutor)
+TYPED_TEST(Array, DoesNotCopyBackTemporaryCloneBetweenSameMemory)
 {
-    auto omp = gko::OmpExecutor::create();
+    auto other = gko::ReferenceExecutor::create();
 
     {
-        auto tmp_clone = make_temporary_clone(omp, &this->x);
-        // change x, so it no longer matches the original x
-        // the copy-back will overwrite it again with the correct value
+        auto tmp_clone = make_temporary_clone(other, &this->x);
+        // change x, and check that there is no copy-back to overwrite it again
         this->x.get_data()[0] = 0;
     }
 
-    this->assert_equal_to_original_x(this->x);
+    this->assert_equal_to_original_x(this->x, false);
+    EXPECT_EQ(this->x.get_data()[0], TypeParam{0});
 }
 
 

diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
@@ -474,6 +474,79 @@ TEST(DpcppExecutor, KnowsItsDeviceId)
 }
 
 
+TEST(Executor, CanVerifyMemory)
+{
+    auto ref = gko::ReferenceExecutor::create();
+    auto omp = gko::OmpExecutor::create();
+    auto hip = gko::HipExecutor::create(0, omp);
+    auto cuda = gko::CudaExecutor::create(0, omp);
+    auto omp2 = gko::OmpExecutor::create();
+    auto hip2 = gko::HipExecutor::create(0, omp);
+    auto cuda2 = gko::CudaExecutor::create(0, omp);
+    auto hip_1 = gko::HipExecutor::create(1, omp);
+    auto cuda_1 = gko::CudaExecutor::create(1, omp);
+    std::shared_ptr<gko::DpcppExecutor> host_dpcpp;
+    std::shared_ptr<gko::DpcppExecutor> cpu_dpcpp;
+    std::shared_ptr<gko::DpcppExecutor> gpu_dpcpp;
+    if (gko::DpcppExecutor::get_num_devices("host")) {
+        host_dpcpp = gko::DpcppExecutor::create(0, omp, "host");
+    }
+    if (gko::DpcppExecutor::get_num_devices("cpu")) {
+        cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu");
+    }
+    if (gko::DpcppExecutor::get_num_devices("gpu")) {
+        gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu");
+    }
+
+    ASSERT_EQ(false, ref->memory_accessible(omp));
+    ASSERT_EQ(false, omp->memory_accessible(ref));
+    ASSERT_EQ(false, ref->memory_accessible(hip));
+    ASSERT_EQ(false, hip->memory_accessible(ref));
+    ASSERT_EQ(false, omp->memory_accessible(hip));
+    ASSERT_EQ(false, hip->memory_accessible(omp));
+    ASSERT_EQ(false, ref->memory_accessible(cuda));
+    ASSERT_EQ(false, cuda->memory_accessible(ref));
+    ASSERT_EQ(false, omp->memory_accessible(cuda));
+    ASSERT_EQ(false, cuda->memory_accessible(omp));
+    if (gko::DpcppExecutor::get_num_devices("host")) {
+        ASSERT_EQ(false, host_dpcpp->memory_accessible(ref));
+        ASSERT_EQ(false, ref->memory_accessible(host_dpcpp));
+        ASSERT_EQ(true, host_dpcpp->memory_accessible(omp));
+        ASSERT_EQ(true, omp->memory_accessible(host_dpcpp));
+    }
+    if (gko::DpcppExecutor::get_num_devices("cpu")) {
+        ASSERT_EQ(false, ref->memory_accessible(cpu_dpcpp));
+        ASSERT_EQ(false, cpu_dpcpp->memory_accessible(ref));
+        ASSERT_EQ(true, cpu_dpcpp->memory_accessible(omp));
+        ASSERT_EQ(true, omp->memory_accessible(cpu_dpcpp));
+    }
+    if (gko::DpcppExecutor::get_num_devices("gpu")) {
+        ASSERT_EQ(false, gpu_dpcpp->memory_accessible(ref));
+        ASSERT_EQ(false, ref->memory_accessible(gpu_dpcpp));
+        ASSERT_EQ(false, gpu_dpcpp->memory_accessible(omp));
+        ASSERT_EQ(false, omp->memory_accessible(gpu_dpcpp));
+    }
+#if GINKGO_HIP_PLATFORM_NVCC
+    ASSERT_EQ(true, hip->memory_accessible(cuda));
+    ASSERT_EQ(true, cuda->memory_accessible(hip));
+    ASSERT_EQ(true, hip_1->memory_accessible(cuda_1));
+    ASSERT_EQ(true, cuda_1->memory_accessible(hip_1));
+#else
+    ASSERT_EQ(false, hip->memory_accessible(cuda));
+    ASSERT_EQ(false, cuda->memory_accessible(hip));
+    ASSERT_EQ(false, hip_1->memory_accessible(cuda_1));
+    ASSERT_EQ(false, cuda_1->memory_accessible(hip_1));
+#endif
+    ASSERT_EQ(true, omp->memory_accessible(omp2));
+    ASSERT_EQ(true, hip->memory_accessible(hip2));
+    ASSERT_EQ(true, cuda->memory_accessible(cuda2));
+    ASSERT_EQ(false, hip->memory_accessible(hip_1));
+    ASSERT_EQ(false, cuda->memory_accessible(hip_1));
+    ASSERT_EQ(false, cuda->memory_accessible(cuda_1));
+    ASSERT_EQ(false, hip->memory_accessible(cuda_1));
+}
+
+
 template <typename T>
 struct mock_free : T {
     /**

diff --git a/core/test/base/lin_op.cpp b/core/test/base/lin_op.cpp
@@ -93,16 +93,16 @@ class EnableLinOp : public ::testing::Test {
 protected:
     EnableLinOp()
         : ref{gko::ReferenceExecutor::create()},
-          omp{gko::OmpExecutor::create()},
-          op{DummyLinOp::create(omp, gko::dim<2>{3, 5})},
+          ref2{gko::ReferenceExecutor::create()},
+          op{DummyLinOp::create(ref2, gko::dim<2>{3, 5})},
           alpha{DummyLinOp::create(ref, gko::dim<2>{1})},
           beta{DummyLinOp::create(ref, gko::dim<2>{1})},
           b{DummyLinOp::create(ref, gko::dim<2>{5, 4})},
           x{DummyLinOp::create(ref, gko::dim<2>{3, 4})}
     {}
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
-    std::shared_ptr<const gko::OmpExecutor> omp;
+    std::shared_ptr<const gko::ReferenceExecutor> ref2;
     std::unique_ptr<DummyLinOp> op;
     std::unique_ptr<DummyLinOp> alpha;
     std::unique_ptr<DummyLinOp> beta;
@@ -115,15 +115,15 @@ TEST_F(EnableLinOp, CallsApplyImpl)
 {
     op->apply(gko::lend(b), gko::lend(x));
 
-    ASSERT_EQ(op->last_access, omp);
+    ASSERT_EQ(op->last_access, ref2);
 }
 
 
 TEST_F(EnableLinOp, CallsExtendedApplyImpl)
 {
     op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x));
 
-    ASSERT_EQ(op->last_access, omp);
+    ASSERT_EQ(op->last_access, ref2);
 }
 
 
@@ -204,43 +204,44 @@ TEST_F(EnableLinOp, ExtendedApplyFailsOnWrongBetaDimension)
 }
 
 
-TEST_F(EnableLinOp, ApplyCopiesDataToCorrectExecutor)
+// For tests between different memory, check cuda/test/base/lin_op.cu
+TEST_F(EnableLinOp, ApplyDoesNotCopyBetweenSameMemory)
 {
     op->apply(gko::lend(b), gko::lend(x));
 
-    ASSERT_EQ(op->last_b_access, omp);
-    ASSERT_EQ(op->last_x_access, omp);
+    ASSERT_EQ(op->last_b_access, ref);
+    ASSERT_EQ(op->last_x_access, ref);
 }
 
 
-TEST_F(EnableLinOp, ApplyCopiesBackOnlyX)
+TEST_F(EnableLinOp, ApplyNoCopyBackBetweenSameMemory)
 {
     op->apply(gko::lend(b), gko::lend(x));
 
-    ASSERT_EQ(b->last_access, nullptr);
-    ASSERT_EQ(x->last_access, omp);
+    ASSERT_EQ(b->last_access, ref);
+    ASSERT_EQ(x->last_access, ref);
 }
 
 
-TEST_F(EnableLinOp, ExtendedApplyCopiesDataToCorrectExecutor)
+TEST_F(EnableLinOp, ExtendedApplyDoesNotCopyBetweenSameMemory)
 {
     op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x));
 
-    ASSERT_EQ(op->last_alpha_access, omp);
-    ASSERT_EQ(op->last_b_access, omp);
-    ASSERT_EQ(op->last_beta_access, omp);
-    ASSERT_EQ(op->last_x_access, omp);
+    ASSERT_EQ(op->last_alpha_access, ref);
+    ASSERT_EQ(op->last_b_access, ref);
+    ASSERT_EQ(op->last_beta_access, ref);
+    ASSERT_EQ(op->last_x_access, ref);
 }
 
 
-TEST_F(EnableLinOp, ExtendedApplyCopiesBackOnlyX)
+TEST_F(EnableLinOp, ExtendedApplyNoCopyBackBetweenSameMemory)
 {
-    op->apply(gko::lend(b), gko::lend(x));
+    op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x));
 
-    ASSERT_EQ(alpha->last_access, nullptr);
-    ASSERT_EQ(b->last_access, nullptr);
-    ASSERT_EQ(beta->last_access, nullptr);
-    ASSERT_EQ(x->last_access, omp);
+    ASSERT_EQ(alpha->last_access, ref);
+    ASSERT_EQ(b->last_access, ref);
+    ASSERT_EQ(beta->last_access, ref);
+    ASSERT_EQ(x->last_access, ref);
 }
 
 

diff --git a/core/test/base/utils.cpp b/core/test/base/utils.cpp
@@ -400,11 +400,12 @@ class TemporaryClone : public ::testing::Test {
 };
 
 
-TEST_F(TemporaryClone, CopiesToAnotherExecutor)
+TEST_F(TemporaryClone, DoesNotCopyToSameMemory)
 {
-    auto clone = make_temporary_clone(omp, gko::lend(obj));
+    auto other = gko::ReferenceExecutor::create();
+    auto clone = make_temporary_clone(other, gko::lend(obj));
 
-    ASSERT_EQ(clone.get()->get_executor(), omp);
+    ASSERT_NE(clone.get()->get_executor(), other);
     ASSERT_EQ(obj->get_executor(), ref);
 }
 

diff --git a/core/test/utils.hpp b/core/test/utils.hpp
@@ -69,6 +69,8 @@ using ValueAndIndexTypes =
     ::testing::Types<float, double, std::complex<float>, std::complex<double>,
                      gko::int32, gko::int64, gko::size_type>;
 
+using RealValueAndIndexTypes =
+    ::testing::Types<float, double, gko::int32, gko::int64, gko::size_type>;
 
 using ValueIndexTypes = ::testing::Types<
     std::tuple<float, gko::int32>, std::tuple<double, gko::int32>,

diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_cuda_test(array)
 ginkgo_create_cuda_test(cuda_executor)
 ginkgo_create_cuda_test(exception_helpers)
 ginkgo_create_cuda_test(lin_op)