Review updates.

+ Do not use `operator==`, but a funciton `memory_accessible` instead. + Make DPC++ host and CPU be memory compatible. + Use pointers for the interface instead of references. + Ensure DPC++ tests always work. + Fix some typos. Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com> Co-authored-by: Tobias Ribizel <ribizel@kit.edu> Co-authored-by: Pratik Nayak <pratikvn@protonmail.com>
ginkgo-project · Dec 1, 2020 · 78f8a62 · 78f8a62
1 parent 94faee0
commit 78f8a62
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 66 deletions.
diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
@@ -480,56 +480,63 @@ TEST(Executor, CanVerifyMemory)
     auto omp = gko::OmpExecutor::create();
     auto hip = gko::HipExecutor::create(0, omp);
     auto cuda = gko::CudaExecutor::create(0, omp);
-    auto cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu");
-    auto host_dpcpp = gko::DpcppExecutor::create(0, omp, "host");
-    auto gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu");
     auto omp2 = gko::OmpExecutor::create();
     auto hip2 = gko::HipExecutor::create(0, omp);
     auto cuda2 = gko::CudaExecutor::create(0, omp);
-    std::shared_ptr<gko::HipExecutor> hip_1 = gko::HipExecutor::create(1, omp);
-    std::shared_ptr<gko::CudaExecutor> cuda_1 =
-        gko::CudaExecutor::create(1, omp);
-
-    ASSERT_EQ(true, *ref == *omp);
-    ASSERT_EQ(true, *omp == *ref);
-    ASSERT_EQ(false, *ref == *hip);
-    ASSERT_EQ(false, *hip == *ref);
-    ASSERT_EQ(false, *omp == *hip);
-    ASSERT_EQ(false, *hip == *omp);
-    ASSERT_EQ(false, *ref == *cuda);
-    ASSERT_EQ(false, *cuda == *ref);
-    ASSERT_EQ(false, *omp == *cuda);
-    ASSERT_EQ(false, *cuda == *omp);
-    ASSERT_EQ(true, *cpu_dpcpp == *ref);
-    ASSERT_EQ(true, *host_dpcpp == *ref);
-    ASSERT_EQ(false, *gpu_dpcpp == *ref);
-    ASSERT_EQ(true, *ref == *cpu_dpcpp);
-    ASSERT_EQ(true, *ref == *host_dpcpp);
-    ASSERT_EQ(false, *ref == *gpu_dpcpp);
-    ASSERT_EQ(true, *cpu_dpcpp == *omp);
-    ASSERT_EQ(true, *host_dpcpp == *omp);
-    ASSERT_EQ(false, *gpu_dpcpp == *omp);
-    ASSERT_EQ(true, *omp == *cpu_dpcpp);
-    ASSERT_EQ(true, *omp == *host_dpcpp);
-    ASSERT_EQ(false, *omp == *gpu_dpcpp);
+    auto hip_1 = gko::HipExecutor::create(1, omp);
+    auto cuda_1 = gko::CudaExecutor::create(1, omp);
+    auto host_dpcpp = gko::DpcppExecutor::create(0, omp, "host");
+    std::shared_ptr<gko::DpcppExecutor> cpu_dpcpp;
+    std::shared_ptr<gko::DpcppExecutor> gpu_dpcpp;
+    if (gko::DpcppExecutor::get_num_devices("cpu"))
+        cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu");
+    if (gko::DpcppExecutor::get_num_devices("gpu"))
+        gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu");
+
+    ASSERT_EQ(true, ref->memory_accessible(omp));
+    ASSERT_EQ(true, omp->memory_accessible(ref));
+    ASSERT_EQ(false, ref->memory_accessible(hip));
+    ASSERT_EQ(false, hip->memory_accessible(ref));
+    ASSERT_EQ(false, omp->memory_accessible(hip));
+    ASSERT_EQ(false, hip->memory_accessible(omp));
+    ASSERT_EQ(false, ref->memory_accessible(cuda));
+    ASSERT_EQ(false, cuda->memory_accessible(ref));
+    ASSERT_EQ(false, omp->memory_accessible(cuda));
+    ASSERT_EQ(false, cuda->memory_accessible(omp));
+    ASSERT_EQ(true, host_dpcpp->memory_accessible(ref));
+    ASSERT_EQ(true, ref->memory_accessible(host_dpcpp));
+    ASSERT_EQ(true, host_dpcpp->memory_accessible(omp));
+    ASSERT_EQ(true, omp->memory_accessible(host_dpcpp));
+    if (gko::DpcppExecutor::get_num_devices("cpu")) {
+        ASSERT_EQ(true, ref->memory_accessible(cpu_dpcpp));
+        ASSERT_EQ(true, cpu_dpcpp->memory_accessible(ref));
+        ASSERT_EQ(true, cpu_dpcpp->memory_accessible(omp));
+        ASSERT_EQ(true, omp->memory_accessible(cpu_dpcpp));
+    }
+    if (gko::DpcppExecutor::get_num_devices("gpu")) {
+        ASSERT_EQ(false, gpu_dpcpp->memory_accessible(ref));
+        ASSERT_EQ(false, ref->memory_accessible(gpu_dpcpp));
+        ASSERT_EQ(false, gpu_dpcpp->memory_accessible(omp));
+        ASSERT_EQ(false, omp->memory_accessible(gpu_dpcpp));
+    }
 #if GINKGO_HIP_PLATFORM_NVCC
-    ASSERT_EQ(true, *hip == *cuda);
-    ASSERT_EQ(true, *cuda == *hip);
-    ASSERT_EQ(true, *hip_1 == *cuda_1);
-    ASSERT_EQ(true, *cuda_1 == *hip_1);
+    ASSERT_EQ(true, hip->memory_accessible(cuda));
+    ASSERT_EQ(true, cuda->memory_accessible(hip));
+    ASSERT_EQ(true, hip_1->memory_accessible(cuda_1));
+    ASSERT_EQ(true, cuda_1->memory_accessible(hip_1));
 #else
-    ASSERT_EQ(false, *hip == *cuda);
-    ASSERT_EQ(false, *cuda == *hip);
-    ASSERT_EQ(false, *hip_1 == *cuda_1);
-    ASSERT_EQ(false, *cuda_1 == *hip_1);
+    ASSERT_EQ(false, hip->memory_accessible(cuda));
+    ASSERT_EQ(false, cuda->memory_accessible(hip));
+    ASSERT_EQ(false, hip_1->memory_accessible(cuda_1));
+    ASSERT_EQ(false, cuda_1->memory_accessible(hip_1));
 #endif
-    ASSERT_EQ(true, *omp == *omp2);
-    ASSERT_EQ(true, *hip == *hip2);
-    ASSERT_EQ(true, *cuda == *cuda2);
-    ASSERT_EQ(false, *hip == *hip_1);
-    ASSERT_EQ(false, *cuda == *hip_1);
-    ASSERT_EQ(false, *cuda == *cuda_1);
-    ASSERT_EQ(false, *hip == *cuda_1);
+    ASSERT_EQ(true, omp->memory_accessible(omp2));
+    ASSERT_EQ(true, hip->memory_accessible(hip2));
+    ASSERT_EQ(true, cuda->memory_accessible(cuda2));
+    ASSERT_EQ(false, hip->memory_accessible(hip_1));
+    ASSERT_EQ(false, cuda->memory_accessible(hip_1));
+    ASSERT_EQ(false, cuda->memory_accessible(cuda_1));
+    ASSERT_EQ(false, hip->memory_accessible(cuda_1));
 }
 
 

diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp
@@ -80,7 +80,7 @@ void OmpExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes,
 bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const
 {
     auto device = detail::get_devices(
-        dest_exec->getdevice_type())[dest_exec->get_device_id()];
+        dest_exec->get_device_type())[dest_exec->get_device_id()];
     return device.is_host() || device.is_cpu();
 }
 
@@ -177,9 +177,11 @@ bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const
     auto device = detail::get_devices(device_type_)[device_id_];
     auto other_device = detail::get_devices(
         dest_exec->get_device_type())[dest_exec->get_device_id()];
-    return device.get_info<cl::sycl::info::device::device_type>() ==
-               other_device.get_info<cl::sycl::info::device::device_type>() &&
-           device.get() == other_device.get();
+    return ((device.is_host() || device.is_cpu()) &&
+            (other_device.is_host() || other_device.is_cpu())) ||
+           (device.get_info<cl::sycl::info::device::device_type>() ==
+                other_device.get_info<cl::sycl::info::device::device_type>() &&
+            device.get() == other_device.get());
 }
 
 
@@ -200,11 +202,13 @@ void DpcppExecutor::set_device_property()
 {
     assert(device_id_ < DpcppExecutor::get_num_devices(device_type_));
     auto device = detail::get_devices(device_type_)[device_id_];
-    try {
-        subgroup_sizes_ =
-            device.get_info<cl::sycl::info::device::sub_group_sizes>();
-    } catch (cl::sycl::runtime_error &err) {
-        GKO_NOT_SUPPORTED(device);
+    if (!device.is_host()) {
+        try {
+            subgroup_sizes_ =
+                device.get_info<cl::sycl::info::device::sub_group_sizes>();
+        } catch (cl::sycl::runtime_error &err) {
+            GKO_NOT_SUPPORTED(device);
+        }
     }
     num_computing_units_ =
         device.get_info<sycl::info::device::max_compute_units>();
@@ -214,10 +218,8 @@ void DpcppExecutor::set_device_property()
     for (std::size_t i = 0; i < 3; i++) {
         max_workitem_sizes_.push_back(max_workitem_sizes[i]);
     }
-    if (!device.is_host()) {
-        max_workgroup_size_ =
-            device.get_info<sycl::info::device::max_work_group_size>();
-    }
+    max_workgroup_size_ =
+        device.get_info<sycl::info::device::max_work_group_size>();
     // Here we declare the queue with the property `in_order` which ensures the
     // kernels are executed in the submission order. Otherwise, calls to
     // `wait()` would be needed after every call to a DPC++ function or kernel.

diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
@@ -634,14 +634,15 @@ class Executor : public log::EnableLogging<Executor> {
     virtual void synchronize() const = 0;
 
     /**
-     * Overload the equal-to operator which verifies whether the executors share
-     * the same memory.
+     * Verifies whether the executors share the same memory.
      *
      * @param other  the other Executor to compare against
+     *
+     * @return whether the executors this and other share the same memory.
      */
-    bool operator==(const Executor &other) const
+    bool memory_accessible(const std::shared_ptr<const Executor> &other) const
     {
-        return this->verify_memory_from(other);
+        return this->verify_memory_from(other.get());
     }
 
 protected:
@@ -702,7 +703,7 @@ class Executor : public log::EnableLogging<Executor> {
      *
      * @return whether this executor and src_exec share the same memory.
      */
-    virtual bool verify_memory_from(const Executor &src_exec) const = 0;
+    virtual bool verify_memory_from(const Executor *src_exec) const = 0;
 
 /**
  * @internal
@@ -718,7 +719,7 @@ class Executor : public log::EnableLogging<Executor> {
 
     GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_VERIFY_MEMORY_TO);
 
-    GKO_ENABLE_VERIFY_MEMORY_TO(ReferenceExecutor);
+    GKO_ENABLE_VERIFY_MEMORY_TO(ReferenceExecutor, ref);
 
 #undef GKO_ENABLE_VERIFY_MEMORY_TO
 
@@ -870,9 +871,9 @@ class ExecutorBase : public Executor {
         src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr);
     }
 
-    bool verify_memory_from(const Executor &src_exec) const override
+    bool verify_memory_from(const Executor *src_exec) const override
     {
-        return src_exec.verify_memory_to(self());
+        return src_exec->verify_memory_to(self());
     }
 
 private:

diff --git a/include/ginkgo/core/base/temporary_clone.hpp b/include/ginkgo/core/base/temporary_clone.hpp
@@ -140,7 +140,7 @@ class temporary_clone {
      */
     explicit temporary_clone(std::shared_ptr<const Executor> exec, pointer ptr)
     {
-        if (*ptr->get_executor() == *exec) {
+        if (ptr->get_executor()->memory_accessible(exec)) {
             // just use the object we already have
             handle_ = handle_type(ptr, null_deleter<T>());
         } else {