From 33c576aae4c1278580c1eede69bbae9081cb3f9a Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Fri, 11 Sep 2020 17:28:35 +0200 Subject: [PATCH 1/5] Add executor operator== for memory compatibility Co-authored-with: Terry Cojean --- core/device_hooks/cuda_hooks.cpp | 3 + core/device_hooks/dpcpp_hooks.cpp | 28 + core/device_hooks/hip_hooks.cpp | 3 + core/test/base/array.cpp | 20 +- core/test/base/executor.cpp | 59 ++ core/test/base/lin_op.cpp | 35 +- core/test/base/utils.cpp | 4 +- core/test/utils.hpp | 2 + cuda/base/executor.cpp | 10 + cuda/test/base/CMakeLists.txt | 1 + cuda/test/base/array.cu | 92 +++ cuda/test/base/lin_op.cu | 104 ++++ dpcpp/base/executor.dp.cpp | 39 +- hip/base/executor.hip.cpp | 10 + include/ginkgo/core/base/dim.hpp | 1 - .../ginkgo/core/base/exception_helpers.hpp | 1 + include/ginkgo/core/base/executor.hpp | 119 ++++ include/ginkgo/core/base/temporary_clone.hpp | 202 +++++++ include/ginkgo/core/base/utils.hpp | 553 +----------------- include/ginkgo/core/base/utils_helper.hpp | 438 ++++++++++++++ include/ginkgo/core/log/logger.hpp | 2 +- 21 files changed, 1143 insertions(+), 583 deletions(-) create mode 100644 cuda/test/base/array.cu create mode 100644 include/ginkgo/core/base/temporary_clone.hpp create mode 100644 include/ginkgo/core/base/utils_helper.hpp diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index 46e342d9c0a..a5c33c6541d 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -98,6 +98,9 @@ void CudaExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, GKO_NOT_COMPILED(cuda); +bool CudaExecutor::verify_memory_to(const HipExecutor *) const { return false; } + + void CudaExecutor::synchronize() const GKO_NOT_COMPILED(cuda); diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp index 9d6c57f11a4..472451c6513 100644 --- a/core/device_hooks/dpcpp_hooks.cpp +++ b/core/device_hooks/dpcpp_hooks.cpp @@ -64,6 +64,14 @@ void OmpExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, GKO_NOT_COMPILED(dpcpp); +bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +{ + // Dummy check + auto dev_type = dest_exec->get_device_type(); + return dev_type == "cpu" || dev_type == "host"; +} + + void DpcppExecutor::raw_free(void *ptr) const noexcept { // Free must never fail, as it can be called in destructors. @@ -112,6 +120,26 @@ int DpcppExecutor::get_num_devices(std::string) { return 0; } void DpcppExecutor::set_device_property() {} +bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const +{ + // Dummy check + return device_type_ == "cpu" || device_type_ == "host"; +} + +bool DpcppExecutor::verify_memory_to(const ReferenceExecutor *dest_exec) const +{ + // Dummy check + return device_type_ == "cpu" || device_type_ == "host"; +} + +bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +{ + // Dummy check + return dest_exec->get_device_type() == device_type_ && + dest_exec->get_device_id() == device_id_; +} + + } // namespace gko diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index b98b2fcb8a4..45180980159 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -95,6 +95,9 @@ void HipExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, GKO_NOT_COMPILED(hip); +bool HipExecutor::verify_memory_to(const CudaExecutor *) const { return false; } + + void HipExecutor::synchronize() const GKO_NOT_COMPILED(hip); diff --git a/core/test/base/array.cpp b/core/test/base/array.cpp index 959392e5c71..c20782651f7 100644 --- a/core/test/base/array.cpp +++ b/core/test/base/array.cpp @@ -57,12 +57,13 @@ class Array : public ::testing::Test { x.get_data()[1] = 2; } - static void assert_equal_to_original_x(gko::Array &a) + static void assert_equal_to_original_x(gko::Array &a, + bool check_zero = true) { ASSERT_EQ(a.get_num_elems(), 2); - EXPECT_EQ(a.get_data()[0], T{5}); + if (check_zero) EXPECT_EQ(a.get_data()[0], T{5}); EXPECT_EQ(a.get_data()[1], T{2}); - EXPECT_EQ(a.get_const_data()[0], T{5}); + if (check_zero) EXPECT_EQ(a.get_const_data()[0], T{5}); EXPECT_EQ(a.get_const_data()[1], T{2}); } @@ -271,29 +272,30 @@ TYPED_TEST(Array, CanCreateTemporaryCloneOnSameExecutor) } -TYPED_TEST(Array, CanCreateTemporaryCloneOnDifferentExecutor) +// For tests between different memory, check cuda/test/base/array.cu +TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory) { auto omp = gko::OmpExecutor::create(); auto tmp_clone = make_temporary_clone(omp, &this->x); this->assert_equal_to_original_x(*tmp_clone.get()); - ASSERT_NE(tmp_clone.get(), &this->x); + ASSERT_EQ(tmp_clone.get(), &this->x); } -TYPED_TEST(Array, CanCopyBackTemporaryCloneOnDifferentExecutor) +TYPED_TEST(Array, CanDoesNotCopyBackTemporaryCloneBetweenSameMemory) { auto omp = gko::OmpExecutor::create(); { auto tmp_clone = make_temporary_clone(omp, &this->x); - // change x, so it no longer matches the original x - // the copy-back will overwrite it again with the correct value + // change x, and check that there is no copy-back to overwrite it again this->x.get_data()[0] = 0; } - this->assert_equal_to_original_x(this->x); + this->assert_equal_to_original_x(this->x, false); + EXPECT_EQ(this->x.get_data()[0], TypeParam{0}); } diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index b968bbe210a..73c82a06fa8 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -474,6 +474,65 @@ TEST(DpcppExecutor, KnowsItsDeviceId) } +TEST(Executor, CanVerifyMemory) +{ + auto ref = gko::ReferenceExecutor::create(); + auto omp = gko::OmpExecutor::create(); + auto hip = gko::HipExecutor::create(0, omp); + auto cuda = gko::CudaExecutor::create(0, omp); + auto cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu"); + auto host_dpcpp = gko::DpcppExecutor::create(0, omp, "host"); + auto gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu"); + auto omp2 = gko::OmpExecutor::create(); + auto hip2 = gko::HipExecutor::create(0, omp); + auto cuda2 = gko::CudaExecutor::create(0, omp); + std::shared_ptr hip_1 = gko::HipExecutor::create(1, omp); + std::shared_ptr cuda_1 = + gko::CudaExecutor::create(1, omp); + + ASSERT_EQ(true, *ref == *omp); + ASSERT_EQ(true, *omp == *ref); + ASSERT_EQ(false, *ref == *hip); + ASSERT_EQ(false, *hip == *ref); + ASSERT_EQ(false, *omp == *hip); + ASSERT_EQ(false, *hip == *omp); + ASSERT_EQ(false, *ref == *cuda); + ASSERT_EQ(false, *cuda == *ref); + ASSERT_EQ(false, *omp == *cuda); + ASSERT_EQ(false, *cuda == *omp); + ASSERT_EQ(true, *cpu_dpcpp == *ref); + ASSERT_EQ(true, *host_dpcpp == *ref); + ASSERT_EQ(false, *gpu_dpcpp == *ref); + ASSERT_EQ(true, *ref == *cpu_dpcpp); + ASSERT_EQ(true, *ref == *host_dpcpp); + ASSERT_EQ(false, *ref == *gpu_dpcpp); + ASSERT_EQ(true, *cpu_dpcpp == *omp); + ASSERT_EQ(true, *host_dpcpp == *omp); + ASSERT_EQ(false, *gpu_dpcpp == *omp); + ASSERT_EQ(true, *omp == *cpu_dpcpp); + ASSERT_EQ(true, *omp == *host_dpcpp); + ASSERT_EQ(false, *omp == *gpu_dpcpp); +#if GINKGO_HIP_PLATFORM_NVCC + ASSERT_EQ(true, *hip == *cuda); + ASSERT_EQ(true, *cuda == *hip); + ASSERT_EQ(true, *hip_1 == *cuda_1); + ASSERT_EQ(true, *cuda_1 == *hip_1); +#else + ASSERT_EQ(false, *hip == *cuda); + ASSERT_EQ(false, *cuda == *hip); + ASSERT_EQ(false, *hip_1 == *cuda_1); + ASSERT_EQ(false, *cuda_1 == *hip_1); +#endif + ASSERT_EQ(true, *omp == *omp2); + ASSERT_EQ(true, *hip == *hip2); + ASSERT_EQ(true, *cuda == *cuda2); + ASSERT_EQ(false, *hip == *hip_1); + ASSERT_EQ(false, *cuda == *hip_1); + ASSERT_EQ(false, *cuda == *cuda_1); + ASSERT_EQ(false, *hip == *cuda_1); +} + + template struct mock_free : T { /** diff --git a/core/test/base/lin_op.cpp b/core/test/base/lin_op.cpp index 303e8bd6e89..3da8eda55f0 100644 --- a/core/test/base/lin_op.cpp +++ b/core/test/base/lin_op.cpp @@ -204,43 +204,44 @@ TEST_F(EnableLinOp, ExtendedApplyFailsOnWrongBetaDimension) } -TEST_F(EnableLinOp, ApplyCopiesDataToCorrectExecutor) +// For tests between different memory, check cuda/test/base/lin_op.cu +TEST_F(EnableLinOp, ApplyDoesNotCopyBetweenSameMemory) { op->apply(gko::lend(b), gko::lend(x)); - ASSERT_EQ(op->last_b_access, omp); - ASSERT_EQ(op->last_x_access, omp); + ASSERT_EQ(op->last_b_access, ref); + ASSERT_EQ(op->last_x_access, ref); } -TEST_F(EnableLinOp, ApplyCopiesBackOnlyX) +TEST_F(EnableLinOp, ApplyNoCopyBackBetweenSameMemory) { op->apply(gko::lend(b), gko::lend(x)); - ASSERT_EQ(b->last_access, nullptr); - ASSERT_EQ(x->last_access, omp); + ASSERT_EQ(b->last_access, ref); + ASSERT_EQ(x->last_access, ref); } -TEST_F(EnableLinOp, ExtendedApplyCopiesDataToCorrectExecutor) +TEST_F(EnableLinOp, ExtendedApplyDoesNotCopyBetweenSameMemory) { op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); - ASSERT_EQ(op->last_alpha_access, omp); - ASSERT_EQ(op->last_b_access, omp); - ASSERT_EQ(op->last_beta_access, omp); - ASSERT_EQ(op->last_x_access, omp); + ASSERT_EQ(op->last_alpha_access, ref); + ASSERT_EQ(op->last_b_access, ref); + ASSERT_EQ(op->last_beta_access, ref); + ASSERT_EQ(op->last_x_access, ref); } -TEST_F(EnableLinOp, ExtendedApplyCopiesBackOnlyX) +TEST_F(EnableLinOp, ExtendedApplyNoCopyBackBetweenSameMemory) { - op->apply(gko::lend(b), gko::lend(x)); + op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); - ASSERT_EQ(alpha->last_access, nullptr); - ASSERT_EQ(b->last_access, nullptr); - ASSERT_EQ(beta->last_access, nullptr); - ASSERT_EQ(x->last_access, omp); + ASSERT_EQ(alpha->last_access, ref); + ASSERT_EQ(b->last_access, ref); + ASSERT_EQ(beta->last_access, ref); + ASSERT_EQ(x->last_access, ref); } diff --git a/core/test/base/utils.cpp b/core/test/base/utils.cpp index df187cfb81f..822449e8f24 100644 --- a/core/test/base/utils.cpp +++ b/core/test/base/utils.cpp @@ -400,11 +400,11 @@ class TemporaryClone : public ::testing::Test { }; -TEST_F(TemporaryClone, CopiesToAnotherExecutor) +TEST_F(TemporaryClone, DoesNotCopyToSameMemory) { auto clone = make_temporary_clone(omp, gko::lend(obj)); - ASSERT_EQ(clone.get()->get_executor(), omp); + ASSERT_NE(clone.get()->get_executor(), omp); ASSERT_EQ(obj->get_executor(), ref); } diff --git a/core/test/utils.hpp b/core/test/utils.hpp index d6ae92bb951..5838379c5f0 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -69,6 +69,8 @@ using ValueAndIndexTypes = ::testing::Types, std::complex, gko::int32, gko::int64, gko::size_type>; +using RealValueAndIndexTypes = + ::testing::Types; using ValueIndexTypes = ::testing::Types< std::tuple, std::tuple, diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 7926ae98a21..5284daa2dd1 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -162,6 +162,16 @@ void CudaExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, } +bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const +{ +#if GINKGO_HIP_PLATFORM_NVCC + return device_id_ == dest_exec->get_device_id(); +#else + return false; +#endif +} + + void CudaExecutor::synchronize() const { cuda::device_guard g(this->get_device_id()); diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt index d9ba808fa21..acbe3244fd4 100644 --- a/cuda/test/base/CMakeLists.txt +++ b/cuda/test/base/CMakeLists.txt @@ -1,3 +1,4 @@ +ginkgo_create_cuda_test(array) ginkgo_create_cuda_test(cuda_executor) ginkgo_create_cuda_test(exception_helpers) ginkgo_create_cuda_test(lin_op) diff --git a/cuda/test/base/array.cu b/cuda/test/base/array.cu new file mode 100644 index 00000000000..c4757c42b44 --- /dev/null +++ b/cuda/test/base/array.cu @@ -0,0 +1,92 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include "core/test/utils.hpp" + + +template +class Array : public ::testing::Test { +protected: + Array() : exec(gko::ReferenceExecutor::create()), x(exec, 2) + { + x.get_data()[0] = 5; + x.get_data()[1] = 2; + } + + static void assert_equal_to_original_x(gko::Array &a) + { + ASSERT_EQ(a.get_num_elems(), 2); + EXPECT_EQ(a.get_data()[0], T{5}); + EXPECT_EQ(a.get_data()[1], T{2}); + EXPECT_EQ(a.get_const_data()[0], T{5}); + EXPECT_EQ(a.get_const_data()[1], T{2}); + } + + std::shared_ptr exec; + gko::Array x; +}; + +TYPED_TEST_SUITE(Array, gko::test::ValueAndIndexTypes); + + +TYPED_TEST(Array, CanCreateTemporaryCloneOnDifferentExecutor) +{ + auto cuda = gko::CudaExecutor::create(0, this->exec); + + auto tmp_clone = make_temporary_clone(cuda, &this->x); + + ASSERT_NE(tmp_clone.get(), &this->x); + tmp_clone->set_executor(this->exec); + this->assert_equal_to_original_x(*tmp_clone.get()); +} + + +TYPED_TEST(Array, CanCopyBackTemporaryCloneOnDifferentExecutor) +{ + auto cuda = gko::CudaExecutor::create(0, this->exec); + + { + auto tmp_clone = make_temporary_clone(cuda, &this->x); + // change x, so it no longer matches the original x + // the copy-back will overwrite it again with the correct value + this->x.get_data()[0] = 0; + } + + this->assert_equal_to_original_x(this->x); +} diff --git a/cuda/test/base/lin_op.cu b/cuda/test/base/lin_op.cu index 82016f87816..668c2df339c 100644 --- a/cuda/test/base/lin_op.cu +++ b/cuda/test/base/lin_op.cu @@ -39,6 +39,110 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +class DummyLinOp : public gko::EnableLinOp, + public gko::EnableCreateMethod { +public: + DummyLinOp(std::shared_ptr exec, + gko::dim<2> size = gko::dim<2>{}) + : EnableLinOp(exec, size) + {} + + void access() const { last_access = this->get_executor(); } + + mutable std::shared_ptr last_access; + mutable std::shared_ptr last_b_access; + mutable std::shared_ptr last_x_access; + mutable std::shared_ptr last_alpha_access; + mutable std::shared_ptr last_beta_access; + +protected: + void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + { + this->access(); + static_cast(b)->access(); + static_cast(x)->access(); + last_b_access = b->get_executor(); + last_x_access = x->get_executor(); + } + + void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, + const gko::LinOp *beta, gko::LinOp *x) const override + { + this->access(); + static_cast(alpha)->access(); + static_cast(b)->access(); + static_cast(beta)->access(); + static_cast(x)->access(); + last_alpha_access = alpha->get_executor(); + last_b_access = b->get_executor(); + last_beta_access = beta->get_executor(); + last_x_access = x->get_executor(); + } +}; + + +class EnableLinOp : public ::testing::Test { +protected: + EnableLinOp() + : ref{gko::ReferenceExecutor::create()}, + cuda{gko::CudaExecutor::create(0, ref)}, + op{DummyLinOp::create(cuda, gko::dim<2>{3, 5})}, + alpha{DummyLinOp::create(ref, gko::dim<2>{1})}, + beta{DummyLinOp::create(ref, gko::dim<2>{1})}, + b{DummyLinOp::create(ref, gko::dim<2>{5, 4})}, + x{DummyLinOp::create(ref, gko::dim<2>{3, 4})} + {} + + std::shared_ptr ref; + std::shared_ptr cuda; + std::unique_ptr op; + std::unique_ptr alpha; + std::unique_ptr beta; + std::unique_ptr b; + std::unique_ptr x; +}; + + +TEST_F(EnableLinOp, ApplyCopiesDataToCorrectExecutor) +{ + op->apply(gko::lend(b), gko::lend(x)); + + ASSERT_EQ(op->last_b_access, cuda); + ASSERT_EQ(op->last_x_access, cuda); +} + + +TEST_F(EnableLinOp, ApplyCopiesBackOnlyX) +{ + op->apply(gko::lend(b), gko::lend(x)); + + ASSERT_EQ(b->last_access, nullptr); + ASSERT_EQ(x->last_access, cuda); +} + + +TEST_F(EnableLinOp, ExtendedApplyCopiesDataToCorrectExecutor) +{ + op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); + + ASSERT_EQ(op->last_alpha_access, cuda); + ASSERT_EQ(op->last_b_access, cuda); + ASSERT_EQ(op->last_beta_access, cuda); + ASSERT_EQ(op->last_x_access, cuda); +} + + +TEST_F(EnableLinOp, ExtendedApplyCopiesBackOnlyX) +{ + op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); + + ASSERT_EQ(alpha->last_access, nullptr); + ASSERT_EQ(b->last_access, nullptr); + ASSERT_EQ(beta->last_access, nullptr); + ASSERT_EQ(x->last_access, cuda); +} + + class FactoryParameter : public ::testing::Test { protected: FactoryParameter() {} diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index 7e311d489c7..1381f079945 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -57,6 +57,7 @@ const std::vector get_devices(std::string device_type) {"accelerator", sycl::info::device_type::accelerator}, {"all", sycl::info::device_type::all}, {"cpu", sycl::info::device_type::cpu}, + {"host", sycl::info::device_type::host}, {"gpu", sycl::info::device_type::gpu}}; std::for_each(device_type.begin(), device_type.end(), [](char &c) { c = std::tolower(c); }); @@ -76,6 +77,14 @@ void OmpExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, } +bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +{ + auto device = detail::get_devices( + dest_exec->getdevice_type())[dest_exec->get_device_id()]; + return device.is_host() || device.is_cpu(); +} + + std::shared_ptr DpcppExecutor::create( int device_id, std::shared_ptr master, std::string device_type) { @@ -150,6 +159,30 @@ int DpcppExecutor::get_num_devices(std::string device_type) } +bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const +{ + auto device = detail::get_devices(device_type_)[device_id_]; + + return device.is_host() || device.is_cpu(); +} + +bool DpcppExecutor::verify_memory_to(const ReferenceExecutor *dest_exec) const +{ + auto device = detail::get_devices(device_type_)[device_id_]; + return device.is_host() || device.is_cpu(); +} + +bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +{ + auto device = detail::get_devices(device_type_)[device_id_]; + auto other_device = detail::get_devices( + dest_exec->get_device_type())[dest_exec->get_device_id()]; + return device.get_info() == + other_device.get_info() && + device.get() == other_device.get(); +} + + namespace detail { @@ -181,8 +214,10 @@ void DpcppExecutor::set_device_property() for (std::size_t i = 0; i < 3; i++) { max_workitem_sizes_.push_back(max_workitem_sizes[i]); } - max_workgroup_size_ = - device.get_info(); + if (!device.is_host()) { + max_workgroup_size_ = + device.get_info(); + } // Here we declare the queue with the property `in_order` which ensures the // kernels are executed in the submission order. Otherwise, calls to // `wait()` would be needed after every call to a DPC++ function or kernel. diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index 3e400986e2b..8f8bbbf7d9d 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -162,6 +162,16 @@ void HipExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, } +bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const +{ +#if GINKGO_HIP_PLATFORM_NVCC + return device_id_ == dest_exec->get_device_id(); +#else + return false; +#endif +} + + void HipExecutor::synchronize() const { hip::device_guard g(this->get_device_id()); diff --git a/include/ginkgo/core/base/dim.hpp b/include/ginkgo/core/base/dim.hpp index 3030f0d1442..052c50e13b8 100644 --- a/include/ginkgo/core/base/dim.hpp +++ b/include/ginkgo/core/base/dim.hpp @@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include namespace gko { diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp index e14bb1c0c61..af3e1efb598 100644 --- a/include/ginkgo/core/base/exception_helpers.hpp +++ b/include/ginkgo/core/base/exception_helpers.hpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include namespace gko { diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 2cc18bf3502..dd0293d30ee 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -633,6 +633,17 @@ class Executor : public log::EnableLogging { */ virtual void synchronize() const = 0; + /** + * Overload the equal-to operator which verifies whether the executors share + * the same memory. + * + * @param other the other Executor to compare against + */ + bool operator==(const Executor &other) const + { + return this->verify_memory_from(other); + } + protected: /** * Allocates raw memory in this Executor. @@ -684,6 +695,33 @@ class Executor : public log::EnableLogging { #undef GKO_ENABLE_RAW_COPY_TO + /** + * Verify the memory from another Executor. + * + * @param src_exec Executor from which to verify the memory. + * + * @return whether this executor and src_exec share the same memory. + */ + virtual bool verify_memory_from(const Executor &src_exec) const = 0; + +/** + * @internal + * Declares a verify_memory_to() overload for a specified Executor subclass. + * + * This is the second stage of the double dispatch emulation required to + * implement verify_memory_from(). + * + * @param _exec_type the Executor subclass + */ +#define GKO_ENABLE_VERIFY_MEMORY_TO(_exec_type, ...) \ + virtual bool verify_memory_to(const _exec_type *dest_exec) const = 0 + + GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_VERIFY_MEMORY_TO); + + GKO_ENABLE_VERIFY_MEMORY_TO(ReferenceExecutor); + +#undef GKO_ENABLE_VERIFY_MEMORY_TO + private: /** * The LambdaOperation class wraps three functor objects into an @@ -832,6 +870,11 @@ class ExecutorBase : public Executor { src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr); } + bool verify_memory_from(const Executor &src_exec) const override + { + return src_exec.verify_memory_to(self()); + } + private: ConcreteExecutor *self() noexcept { @@ -891,6 +934,13 @@ class EnableDeviceReset { const void *src_ptr, void *dest_ptr) const override +#define GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(dest_, bool_) \ + bool verify_memory_to(const dest_ *other) const override { return bool_; } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + /** * This is the Executor subclass which represents the OpenMP device * (typically CPU). @@ -925,6 +975,16 @@ class OmpExecutor : public detail::ExecutorBase, void raw_free(void *ptr) const noexcept override; GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, true); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, true); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(HipExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); + + bool verify_memory_to(const DpcppExecutor *dest_exec) const override; }; @@ -1090,6 +1150,19 @@ class CudaExecutor : public detail::ExecutorBase, GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); + + bool verify_memory_to(const HipExecutor *dest_exec) const override; + + bool verify_memory_to(const CudaExecutor *dest_exec) const override + { + return device_id_ == dest_exec->get_device_id(); + } + static void increase_num_execs(unsigned device_id) { std::lock_guard guard(mutex[device_id]); @@ -1256,6 +1329,19 @@ class HipExecutor : public detail::ExecutorBase, GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); + + bool verify_memory_to(const CudaExecutor *dest_exec) const override; + + bool verify_memory_to(const HipExecutor *dest_exec) const override + { + return device_id_ == dest_exec->get_device_id(); + } + static void increase_num_execs(int device_id) { std::lock_guard guard(mutex[device_id]); @@ -1335,6 +1421,8 @@ class DpcppExecutor : public detail::ExecutorBase, /** * Get the DPCPP device id of the device associated to this executor. + * + * @return the DPCPP device id of the device associated to this executor */ int get_device_id() const noexcept { return device_id_; } @@ -1342,11 +1430,17 @@ class DpcppExecutor : public detail::ExecutorBase, /** * Get the number of devices present on the system. + * + * @param device_type a string representing the device type + * + * @return the number of devices present on the system */ static int get_num_devices(std::string device_type); /** * Get the available subgroup sizes for this device. + * + * @return the available subgroup sizes for this device */ const std::vector &get_subgroup_sizes() const noexcept { @@ -1355,6 +1449,8 @@ class DpcppExecutor : public detail::ExecutorBase, /** * Get the number of Computing Units of this executor. + * + * @return the number of Computing Units of this executor */ size_type get_num_computing_units() const noexcept { @@ -1363,6 +1459,8 @@ class DpcppExecutor : public detail::ExecutorBase, /** * Get the maximum work item sizes. + * + * @return the maximum work item sizes */ const std::vector &get_max_workitem_sizes() const noexcept { @@ -1371,12 +1469,21 @@ class DpcppExecutor : public detail::ExecutorBase, /** * Get the maximum workgroup size. + * + * @return the maximum workgroup size */ size_type get_max_workgroup_size() const noexcept { return max_workgroup_size_; } + /** + * Get a string representing the device type. + * + * @return a string representing the device type + */ + std::string get_device_type() const noexcept { return device_type_; } + protected: void set_device_property(); @@ -1384,6 +1491,8 @@ class DpcppExecutor : public detail::ExecutorBase, std::string device_type = "all") : device_id_(device_id), master_(master), device_type_(device_type) { + std::for_each(device_type_.begin(), device_type_.end(), + [](char &c) { c = std::tolower(c); }); this->set_device_property(); } @@ -1393,6 +1502,16 @@ class DpcppExecutor : public detail::ExecutorBase, GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(HipExecutor, false); + + bool verify_memory_to(const OmpExecutor *dest_exec) const override; + + bool verify_memory_to(const ReferenceExecutor *dest_exec) const override; + + bool verify_memory_to(const DpcppExecutor *dest_exec) const override; + private: int device_id_; std::shared_ptr master_; diff --git a/include/ginkgo/core/base/temporary_clone.hpp b/include/ginkgo/core/base/temporary_clone.hpp new file mode 100644 index 00000000000..a0f59f73919 --- /dev/null +++ b/include/ginkgo/core/base/temporary_clone.hpp @@ -0,0 +1,202 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_BASE_TEMPORARY_CLONE_HPP_ +#define GKO_CORE_BASE_TEMPORARY_CLONE_HPP_ + + +#include +#include +#include + + +#include +#include +#include + + +namespace gko { +namespace detail { + + +/** + * A copy_back_deleter is a type of deleter that copies the data to an + * internally referenced object before performing the deletion. + * + * The deleter will use the `copy_from` method to perform the copy, and then + * delete the passed object using the `delete` keyword. This kind of deleter is + * useful when temporarily copying an object with the intent of copying it back + * once it goes out of scope. + * + * There is also a specialization for constant objects that does not perform the + * copy, since a constant object couldn't have been changed. + * + * @tparam T the type of object being deleted + */ +template +class copy_back_deleter { +public: + using pointer = T *; + + /** + * Creates a new deleter object. + * + * @param original the origin object where the data will be copied before + * deletion + */ + copy_back_deleter(pointer original) : original_{original} {} + + /** + * Deletes the object. + * + * @param ptr pointer to the object being deleted + */ + void operator()(pointer ptr) const + { + original_->copy_from(ptr); + delete ptr; + } + +private: + pointer original_; +}; + +// specialization for constant objects, no need to copy back something that +// cannot change +template +class copy_back_deleter { +public: + using pointer = const T *; + copy_back_deleter(pointer original) : original_{original} {} + + void operator()(pointer ptr) const { delete ptr; } + +private: + pointer original_; +}; + + +template +struct temporary_clone_helper { + static std::unique_ptr create(std::shared_ptr exec, + T *ptr) + { + return gko::clone(std::move(exec), ptr); + } +}; + + +/** + * A temporary_clone is a special smart pointer-like object that is designed to + * hold an object temporarily copied to another executor. + * + * After the temporary_clone goes out of scope, the stored object will be copied + * back to its original location. This class is optimized to avoid copies if the + * object is already on the correct executor, in which case it will just hold a + * reference to that object, without performing the copy. + * + * @tparam T the type of object held in the temporary_clone + */ +template +class temporary_clone { +public: + using value_type = T; + using pointer = T *; + + /** + * Creates a temporary_clone. + * + * @param exec the executor where the clone will be created + * @param ptr a pointer to the object of which the clone will be created + */ + explicit temporary_clone(std::shared_ptr exec, pointer ptr) + { + if (*ptr->get_executor() == *exec) { + // just use the object we already have + handle_ = handle_type(ptr, null_deleter()); + } else { + // clone the object to the new executor and make sure it's copied + // back before we delete it + handle_ = handle_type( + temporary_clone_helper::create(std::move(exec), ptr) + .release(), + copy_back_deleter(ptr)); + } + } + + /** + * Returns the object held by temporary_clone. + * + * @return the object held by temporary_clone + */ + T *get() const { return handle_.get(); } + + /** + * Calls a method on the underlying object. + * + * @return the underlying object + */ + T *operator->() const { return handle_.get(); } + +private: + // std::function deleter allows to decide the (type of) deleter at runtime + using handle_type = std::unique_ptr>; + + handle_type handle_; +}; + + +} // namespace detail + + +/** + * Creates a temporary_clone. + * + * This is a helper function which avoids the need to explicitly specify the + * type of the object, as would be the case if using the constructor of + * temporary_clone. + * + * @param exec the executor where the clone will be created + * @param ptr a pointer to the object of which the clone will be created + */ +template +detail::temporary_clone make_temporary_clone( + std::shared_ptr exec, T *ptr) +{ + return detail::temporary_clone(std::move(exec), ptr); +} + + +} // namespace gko + + +#endif // GKO_CORE_BASE_TEMPORARY_CLONE_HPP_ diff --git a/include/ginkgo/core/base/utils.hpp b/include/ginkgo/core/base/utils.hpp index 5270d619445..509213e8476 100644 --- a/include/ginkgo/core/base/utils.hpp +++ b/include/ginkgo/core/base/utils.hpp @@ -34,557 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_PUBLIC_CORE_BASE_UTILS_HPP_ -#include -#include -#include - - -#include -#include -#include -#include - - -#ifndef NDEBUG -#include -#endif // NDEBUG - - -namespace gko { - - -class Executor; - - -namespace detail { - - -template -struct pointee_impl {}; - -template -struct pointee_impl { - using type = T; -}; - -template -struct pointee_impl> { - using type = T; -}; - -template -struct pointee_impl> { - using type = T; -}; - -template -using pointee = typename pointee_impl::type>::type; - - -template -struct is_clonable_impl : std::false_type {}; - -template -struct is_clonable_impl().clone())>> - : std::true_type {}; - -template -constexpr bool is_clonable() -{ - return is_clonable_impl::type>::value; -} - - -template -struct is_clonable_to_impl : std::false_type {}; - -template -struct is_clonable_to_impl< - T, xstd::void_t().clone( - std::declval>()))>> - : std::true_type {}; - -template -constexpr bool is_clonable_to() -{ - return is_clonable_to_impl::type>::value; -} - - -template -struct have_ownership_impl : std::false_type {}; - -template -struct have_ownership_impl> : std::true_type {}; - -template -struct have_ownership_impl> : std::true_type {}; - -template -using have_ownership_s = have_ownership_impl::type>; - -template -constexpr bool have_ownership() -{ - return have_ownership_s::value; -} - - -template -using cloned_type = - std::unique_ptr>::type>; - - -template -using shared_type = std::shared_ptr>; - - -} // namespace detail - - -/** - * Creates a unique clone of the object pointed to by `p`. - * - * The pointee (i.e. `*p`) needs to have a clone method that returns a - * std::unique_ptr in order for this method to work. - * - * @tparam Pointer type of pointer to the object (plain or smart pointer) - * - * @param p a pointer to the object - * - * @note The difference between this function and directly calling - * LinOp::clone() is that this one preserves the static type of the - * object. - */ -template -inline detail::cloned_type clone(const Pointer &p) -{ - static_assert(detail::is_clonable>(), - "Object is not clonable"); - return detail::cloned_type( - static_cast>::type *>( - p->clone().release())); -} - - -/** - * Creates a unique clone of the object pointed to by `p` on Executor `exec`. - * - * The pointee (i.e. `*p`) needs to have a clone method that takes an - * executor and returns a std::unique_ptr in order for this method to work. - * - * @tparam Pointer type of pointer to the object (plain or smart pointer) - * - * @param exec the executor where the cloned object should be stored - * @param p a pointer to the object - * - * @note The difference between this function and directly calling - * LinOp::clone() is that this one preserves the static type of the - * object. - */ -template -inline detail::cloned_type clone(std::shared_ptr exec, - const Pointer &p) -{ - static_assert(detail::is_clonable_to>(), - "Object is not clonable"); - return detail::cloned_type( - static_cast>::type *>( - p->clone(std::move(exec)).release())); -} - - -/** - * Marks the object pointed to by `p` as shared. - * - * Effectively converts a pointer with ownership to std::shared_ptr. - * - * @tparam OwningPointer type of pointer with ownership to the object - * (has to be a smart pointer) - * - * @param p a pointer to the object - * - * @note The original pointer `p` becomes invalid after this call. - */ -template -inline detail::shared_type share(OwningPointer &&p) -{ - static_assert(detail::have_ownership(), - "OwningPointer does not have ownership of the object"); - return detail::shared_type(std::move(p)); -} - - -/** - * Marks that the object pointed to by `p` can be given to the callee. - * - * Effectively calls `std::move(p)`. - * - * @tparam OwningPointer type of pointer with ownership to the object - * (has to be a smart pointer) - * - * @param p a pointer to the object - * - * @note The original pointer `p` becomes invalid after this call. - */ -template -inline typename std::remove_reference::type &&give( - OwningPointer &&p) -{ - static_assert(detail::have_ownership(), - "OwningPointer does not have ownership of the object"); - return std::move(p); -} - - -/** - * Returns a non-owning (plain) pointer to the object pointed to by `p`. - * - * @tparam Pointer type of pointer to the object (plain or smart pointer) - * - * @param p a pointer to the object - * - * @note This is the overload for owning (smart) pointers, that behaves the - * same as calling .get() on the smart pointer. - */ -template -inline typename std::enable_if::value, - detail::pointee *>::type -lend(const Pointer &p) -{ - return p.get(); -} - -/** - * Returns a non-owning (plain) pointer to the object pointed to by `p`. - * - * @tparam Pointer type of pointer to the object (plain or smart pointer) - * - * @param p a pointer to the object - * - * @note This is the overload for non-owning (plain) pointers, that just - * returns `p`. - */ -template -inline typename std::enable_if::value, - detail::pointee *>::type -lend(const Pointer &p) -{ - return p; -} - - -/** - * Performs polymorphic type conversion. - * - * @tparam T requested result type - * @tparam U static type of the passed object - * - * @param obj the object which should be converted - * - * @return If successful, returns a pointer to the subtype, otherwise throws - * NotSupported. - */ -template -inline typename std::decay::type *as(U *obj) -{ - if (auto p = dynamic_cast::type *>(obj)) { - return p; - } else { - throw NotSupported(__FILE__, __LINE__, - std::string{"gko::as<"} + - name_demangling::get_type_name(typeid(T)) + ">", - name_demangling::get_type_name(typeid(*obj))); - } -} - -/** - * Performs polymorphic type conversion. - * - * This is the constant version of the function. - * - * @tparam T requested result type - * @tparam U static type of the passed object - * - * @param obj the object which should be converted - * - * @return If successful, returns a pointer to the subtype, otherwise throws - * NotSupported. - */ -template -inline const typename std::decay::type *as(const U *obj) -{ - if (auto p = dynamic_cast::type *>(obj)) { - return p; - } else { - throw NotSupported(__FILE__, __LINE__, - std::string{"gko::as<"} + - name_demangling::get_type_name(typeid(T)) + ">", - name_demangling::get_type_name(typeid(*obj))); - } -} - - -/** - * Performs polymorphic type conversion of a unique_ptr. - * - * @tparam T requested result type - * @tparam U static type of the passed object - * - * @param obj the unique_ptr to the object which should be converted. - * If successful, it will be reset to a nullptr. - * - * @return If successful, returns a unique_ptr to the subtype, otherwise throws - * NotSupported. - */ -template -inline std::unique_ptr::type> as( - std::unique_ptr &&obj) -{ - if (auto p = dynamic_cast::type *>(obj.get())) { - obj.release(); - return std::unique_ptr::type>{p}; - } else { - throw NotSupported(__FILE__, __LINE__, __func__, - name_demangling::get_type_name(typeid(*obj))); - } -} - - -/** - * Performs polymorphic type conversion of a shared_ptr. - * - * @tparam T requested result type - * @tparam U static type of the passed object - * - * @param obj the shared_ptr to the object which should be converted. - * - * @return If successful, returns a shared_ptr to the subtype, otherwise throws - * NotSupported. This pointer shares ownership with the input pointer. - */ -template -inline std::shared_ptr::type> as(std::shared_ptr obj) -{ - auto ptr = std::dynamic_pointer_cast::type>(obj); - if (ptr) { - return ptr; - } else { - throw NotSupported(__FILE__, __LINE__, __func__, - name_demangling::get_type_name(typeid(*obj))); - } -} - - -/** - * Performs polymorphic type conversion of a shared_ptr. - * - * This is the constant version of the function. - * - * @tparam T requested result type - * @tparam U static type of the passed object - * - * @param obj the shared_ptr to the object which should be converted. - * - * @return If successful, returns a shared_ptr to the subtype, otherwise throws - * NotSupported. This pointer shares ownership with the input pointer. - */ -template -inline std::shared_ptr::type> as( - std::shared_ptr obj) -{ - auto ptr = - std::dynamic_pointer_cast::type>(obj); - if (ptr) { - return ptr; - } else { - throw NotSupported(__FILE__, __LINE__, __func__, - name_demangling::get_type_name(typeid(*obj))); - } -} - - -/** - * This is a deleter that does not delete the object. - * - * It is useful where the object has been allocated elsewhere and will be - * deleted manually. - */ -template -class null_deleter { -public: - using pointer = T *; - - /** - * Deletes the object. - * - * @param ptr pointer to the object being deleted - */ - void operator()(pointer) const noexcept {} -}; - -// a specialization for arrays -template -class null_deleter { -public: - using pointer = T[]; - - void operator()(pointer) const noexcept {} -}; - - -namespace detail { - - -/** - * A copy_back_deleter is a type of deleter that copies the data to an - * internally referenced object before performing the deletion. - * - * The deleter will use the `copy_from` method to perform the copy, and then - * delete the passed object using the `delete` keyword. This kind of deleter is - * useful when temporarily copying an object with the intent of copying it back - * once it goes out of scope. - * - * There is also a specialization for constant objects that does not perform the - * copy, since a constant object couldn't have been changed. - * - * @tparam T the type of object being deleted - */ -template -class copy_back_deleter { -public: - using pointer = T *; - - /** - * Creates a new deleter object. - * - * @param original the origin object where the data will be copied before - * deletion - */ - copy_back_deleter(pointer original) : original_{original} {} - - /** - * Deletes the object. - * - * @param ptr pointer to the object being deleted - */ - void operator()(pointer ptr) const - { - original_->copy_from(ptr); - delete ptr; - } - -private: - pointer original_; -}; - - -// specialization for constant objects, no need to copy back something that -// cannot change -template -class copy_back_deleter { -public: - using pointer = const T *; - copy_back_deleter(pointer original) : original_{original} {} - - void operator()(pointer ptr) const { delete ptr; } - -private: - pointer original_; -}; - - -template -struct temporary_clone_helper { - static std::unique_ptr create(std::shared_ptr exec, - T *ptr) - { - return gko::clone(std::move(exec), ptr); - } -}; - - -/** - * A temporary_clone is a special smart pointer-like object that is designed to - * hold an object temporarily copied to another executor. - * - * After the temporary_clone goes out of scope, the stored object will be copied - * back to its original location. This class is optimized to avoid copies if the - * object is already on the correct executor, in which case it will just hold a - * reference to that object, without performing the copy. - * - * @tparam T the type of object held in the temporary_clone - */ -template -class temporary_clone { -public: - using value_type = T; - using pointer = T *; - - /** - * Creates a temporary_clone. - * - * @param exec the executor where the clone will be created - * @param ptr a pointer to the object of which the clone will be created - */ - explicit temporary_clone(std::shared_ptr exec, pointer ptr) - { - if (ptr->get_executor() == exec) { - // just use the object we already have - handle_ = handle_type(ptr, null_deleter()); - } else { - // clone the object to the new executor and make sure it's copied - // back before we delete it - handle_ = handle_type( - temporary_clone_helper::create(std::move(exec), ptr) - .release(), - copy_back_deleter(ptr)); - } - } - - /** - * Returns the object held by temporary_clone. - * - * @return the object held by temporary_clone - */ - T *get() const { return handle_.get(); } - - /** - * Calls a method on the underlying object. - * - * @return the underlying object - */ - T *operator->() const { return handle_.get(); } - -private: - // std::function deleter allows to decide the (type of) deleter at runtime - using handle_type = std::unique_ptr>; - - handle_type handle_; -}; - - -} // namespace detail - - -/** - * Creates a temporary_clone. - * - * This is a helper function which avoids the need to explicitly specify the - * type of the object, as would be the case if using the constructor of - * temporary_clone. - * - * @param exec the executor where the clone will be created - * @param ptr a pointer to the object of which the clone will be created - */ -template -detail::temporary_clone make_temporary_clone( - std::shared_ptr exec, T *ptr) -{ - return detail::temporary_clone(std::move(exec), ptr); -} - - -} // namespace gko +#include +#include #endif // GKO_PUBLIC_CORE_BASE_UTILS_HPP_ diff --git a/include/ginkgo/core/base/utils_helper.hpp b/include/ginkgo/core/base/utils_helper.hpp new file mode 100644 index 00000000000..01c6b5cf5f5 --- /dev/null +++ b/include/ginkgo/core/base/utils_helper.hpp @@ -0,0 +1,438 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_BASE_UTILS_HELPER_HPP_ +#define GKO_CORE_BASE_UTILS_HELPER_HPP_ + + +#include +#include +#include + + +#include +#include +#include +#include + + +#ifndef NDEBUG +#include +#endif // NDEBUG + + +namespace gko { + + +class Executor; + + +namespace detail { + + +template +struct pointee_impl {}; + +template +struct pointee_impl { + using type = T; +}; + +template +struct pointee_impl> { + using type = T; +}; + +template +struct pointee_impl> { + using type = T; +}; + +template +using pointee = typename pointee_impl::type>::type; + + +template +struct is_clonable_impl : std::false_type {}; + +template +struct is_clonable_impl().clone())>> + : std::true_type {}; + +template +constexpr bool is_clonable() +{ + return is_clonable_impl::type>::value; +} + + +template +struct is_clonable_to_impl : std::false_type {}; + +template +struct is_clonable_to_impl< + T, xstd::void_t().clone( + std::declval>()))>> + : std::true_type {}; + +template +constexpr bool is_clonable_to() +{ + return is_clonable_to_impl::type>::value; +} + + +template +struct have_ownership_impl : std::false_type {}; + +template +struct have_ownership_impl> : std::true_type {}; + +template +struct have_ownership_impl> : std::true_type {}; + +template +using have_ownership_s = have_ownership_impl::type>; + +template +constexpr bool have_ownership() +{ + return have_ownership_s::value; +} + + +template +using cloned_type = + std::unique_ptr>::type>; + + +template +using shared_type = std::shared_ptr>; + + +} // namespace detail + + +/** + * Creates a unique clone of the object pointed to by `p`. + * + * The pointee (i.e. `*p`) needs to have a clone method that returns a + * std::unique_ptr in order for this method to work. + * + * @tparam Pointer type of pointer to the object (plain or smart pointer) + * + * @param p a pointer to the object + * + * @note The difference between this function and directly calling + * LinOp::clone() is that this one preserves the static type of the + * object. + */ +template +inline detail::cloned_type clone(const Pointer &p) +{ + static_assert(detail::is_clonable>(), + "Object is not clonable"); + return detail::cloned_type( + static_cast>::type *>( + p->clone().release())); +} + + +/** + * Creates a unique clone of the object pointed to by `p` on Executor `exec`. + * + * The pointee (i.e. `*p`) needs to have a clone method that takes an + * executor and returns a std::unique_ptr in order for this method to work. + * + * @tparam Pointer type of pointer to the object (plain or smart pointer) + * + * @param exec the executor where the cloned object should be stored + * @param p a pointer to the object + * + * @note The difference between this function and directly calling + * LinOp::clone() is that this one preserves the static type of the + * object. + */ +template +inline detail::cloned_type clone(std::shared_ptr exec, + const Pointer &p) +{ + static_assert(detail::is_clonable_to>(), + "Object is not clonable"); + return detail::cloned_type( + static_cast>::type *>( + p->clone(std::move(exec)).release())); +} + + +/** + * Marks the object pointed to by `p` as shared. + * + * Effectively converts a pointer with ownership to std::shared_ptr. + * + * @tparam OwningPointer type of pointer with ownership to the object + * (has to be a smart pointer) + * + * @param p a pointer to the object + * + * @note The original pointer `p` becomes invalid after this call. + */ +template +inline detail::shared_type share(OwningPointer &&p) +{ + static_assert(detail::have_ownership(), + "OwningPointer does not have ownership of the object"); + return detail::shared_type(std::move(p)); +} + + +/** + * Marks that the object pointed to by `p` can be given to the callee. + * + * Effectively calls `std::move(p)`. + * + * @tparam OwningPointer type of pointer with ownership to the object + * (has to be a smart pointer) + * + * @param p a pointer to the object + * + * @note The original pointer `p` becomes invalid after this call. + */ +template +inline typename std::remove_reference::type &&give( + OwningPointer &&p) +{ + static_assert(detail::have_ownership(), + "OwningPointer does not have ownership of the object"); + return std::move(p); +} + + +/** + * Returns a non-owning (plain) pointer to the object pointed to by `p`. + * + * @tparam Pointer type of pointer to the object (plain or smart pointer) + * + * @param p a pointer to the object + * + * @note This is the overload for owning (smart) pointers, that behaves the + * same as calling .get() on the smart pointer. + */ +template +inline typename std::enable_if::value, + detail::pointee *>::type +lend(const Pointer &p) +{ + return p.get(); +} + +/** + * Returns a non-owning (plain) pointer to the object pointed to by `p`. + * + * @tparam Pointer type of pointer to the object (plain or smart pointer) + * + * @param p a pointer to the object + * + * @note This is the overload for non-owning (plain) pointers, that just + * returns `p`. + */ +template +inline typename std::enable_if::value, + detail::pointee *>::type +lend(const Pointer &p) +{ + return p; +} + + +/** + * Performs polymorphic type conversion. + * + * @tparam T requested result type + * @tparam U static type of the passed object + * + * @param obj the object which should be converted + * + * @return If successful, returns a pointer to the subtype, otherwise throws + * NotSupported. + */ +template +inline typename std::decay::type *as(U *obj) +{ + if (auto p = dynamic_cast::type *>(obj)) { + return p; + } else { + throw NotSupported(__FILE__, __LINE__, + std::string{"gko::as<"} + + name_demangling::get_type_name(typeid(T)) + ">", + name_demangling::get_type_name(typeid(*obj))); + } +} + +/** + * Performs polymorphic type conversion. + * + * This is the constant version of the function. + * + * @tparam T requested result type + * @tparam U static type of the passed object + * + * @param obj the object which should be converted + * + * @return If successful, returns a pointer to the subtype, otherwise throws + * NotSupported. + */ +template +inline const typename std::decay::type *as(const U *obj) +{ + if (auto p = dynamic_cast::type *>(obj)) { + return p; + } else { + throw NotSupported(__FILE__, __LINE__, + std::string{"gko::as<"} + + name_demangling::get_type_name(typeid(T)) + ">", + name_demangling::get_type_name(typeid(*obj))); + } +} + + +/** + * Performs polymorphic type conversion of a unique_ptr. + * + * @tparam T requested result type + * @tparam U static type of the passed object + * + * @param obj the unique_ptr to the object which should be converted. + * If successful, it will be reset to a nullptr. + * + * @return If successful, returns a unique_ptr to the subtype, otherwise throws + * NotSupported. + */ +template +inline std::unique_ptr::type> as( + std::unique_ptr &&obj) +{ + if (auto p = dynamic_cast::type *>(obj.get())) { + obj.release(); + return std::unique_ptr::type>{p}; + } else { + throw NotSupported(__FILE__, __LINE__, __func__, + name_demangling::get_type_name(typeid(*obj))); + } +} + + +/** + * Performs polymorphic type conversion of a shared_ptr. + * + * @tparam T requested result type + * @tparam U static type of the passed object + * + * @param obj the shared_ptr to the object which should be converted. + * + * @return If successful, returns a shared_ptr to the subtype, otherwise throws + * NotSupported. This pointer shares ownership with the input pointer. + */ +template +inline std::shared_ptr::type> as(std::shared_ptr obj) +{ + auto ptr = std::dynamic_pointer_cast::type>(obj); + if (ptr) { + return ptr; + } else { + throw NotSupported(__FILE__, __LINE__, __func__, + name_demangling::get_type_name(typeid(*obj))); + } +} + + +/** + * Performs polymorphic type conversion of a shared_ptr. + * + * This is the constant version of the function. + * + * @tparam T requested result type + * @tparam U static type of the passed object + * + * @param obj the shared_ptr to the object which should be converted. + * + * @return If successful, returns a shared_ptr to the subtype, otherwise throws + * NotSupported. This pointer shares ownership with the input pointer. + */ +template +inline std::shared_ptr::type> as( + std::shared_ptr obj) +{ + auto ptr = + std::dynamic_pointer_cast::type>(obj); + if (ptr) { + return ptr; + } else { + throw NotSupported(__FILE__, __LINE__, __func__, + name_demangling::get_type_name(typeid(*obj))); + } +} + + +/** + * This is a deleter that does not delete the object. + * + * It is useful where the object has been allocated elsewhere and will be + * deleted manually. + */ +template +class null_deleter { +public: + using pointer = T *; + + /** + * Deletes the object. + * + * @param ptr pointer to the object being deleted + */ + void operator()(pointer) const noexcept {} +}; + +// a specialization for arrays +template +class null_deleter { +public: + using pointer = T[]; + + void operator()(pointer) const noexcept {} +}; + + +} // namespace gko + + +#endif // GKO_CORE_BASE_UTILS_HELPER_HPP_ diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp index d84e51c8ed6..f9df5660fd2 100644 --- a/include/ginkgo/core/log/logger.hpp +++ b/include/ginkgo/core/log/logger.hpp @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include +#include namespace gko { From 7d6b209ce813a470d4f982481402fd81db928128 Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Wed, 25 Nov 2020 18:39:00 +0100 Subject: [PATCH 2/5] Review updates. + Do not use `operator==`, but a funciton `memory_accessible` instead. + Make DPC++ host and CPU be memory compatible. + Use pointers for the interface instead of references. + Ensure DPC++ tests always work. + Fix some typos. Co-authored-by: Yuhsiang M. Tsai Co-authored-by: Tobias Ribizel Co-authored-by: Pratik Nayak Co-authored-by: Aditya Kashi --- core/test/base/array.cpp | 2 +- core/test/base/executor.cpp | 99 +++++++++++--------- dpcpp/base/executor.dp.cpp | 28 +++--- include/ginkgo/core/base/executor.hpp | 17 ++-- include/ginkgo/core/base/temporary_clone.hpp | 2 +- 5 files changed, 81 insertions(+), 67 deletions(-) diff --git a/core/test/base/array.cpp b/core/test/base/array.cpp index c20782651f7..54f7cfea053 100644 --- a/core/test/base/array.cpp +++ b/core/test/base/array.cpp @@ -284,7 +284,7 @@ TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory) } -TYPED_TEST(Array, CanDoesNotCopyBackTemporaryCloneBetweenSameMemory) +TYPED_TEST(Array, DoesNotCopyBackTemporaryCloneBetweenSameMemory) { auto omp = gko::OmpExecutor::create(); diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index 73c82a06fa8..783e20f1008 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -480,56 +480,67 @@ TEST(Executor, CanVerifyMemory) auto omp = gko::OmpExecutor::create(); auto hip = gko::HipExecutor::create(0, omp); auto cuda = gko::CudaExecutor::create(0, omp); - auto cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu"); - auto host_dpcpp = gko::DpcppExecutor::create(0, omp, "host"); - auto gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu"); auto omp2 = gko::OmpExecutor::create(); auto hip2 = gko::HipExecutor::create(0, omp); auto cuda2 = gko::CudaExecutor::create(0, omp); - std::shared_ptr hip_1 = gko::HipExecutor::create(1, omp); - std::shared_ptr cuda_1 = - gko::CudaExecutor::create(1, omp); - - ASSERT_EQ(true, *ref == *omp); - ASSERT_EQ(true, *omp == *ref); - ASSERT_EQ(false, *ref == *hip); - ASSERT_EQ(false, *hip == *ref); - ASSERT_EQ(false, *omp == *hip); - ASSERT_EQ(false, *hip == *omp); - ASSERT_EQ(false, *ref == *cuda); - ASSERT_EQ(false, *cuda == *ref); - ASSERT_EQ(false, *omp == *cuda); - ASSERT_EQ(false, *cuda == *omp); - ASSERT_EQ(true, *cpu_dpcpp == *ref); - ASSERT_EQ(true, *host_dpcpp == *ref); - ASSERT_EQ(false, *gpu_dpcpp == *ref); - ASSERT_EQ(true, *ref == *cpu_dpcpp); - ASSERT_EQ(true, *ref == *host_dpcpp); - ASSERT_EQ(false, *ref == *gpu_dpcpp); - ASSERT_EQ(true, *cpu_dpcpp == *omp); - ASSERT_EQ(true, *host_dpcpp == *omp); - ASSERT_EQ(false, *gpu_dpcpp == *omp); - ASSERT_EQ(true, *omp == *cpu_dpcpp); - ASSERT_EQ(true, *omp == *host_dpcpp); - ASSERT_EQ(false, *omp == *gpu_dpcpp); + auto hip_1 = gko::HipExecutor::create(1, omp); + auto cuda_1 = gko::CudaExecutor::create(1, omp); + std::shared_ptr host_dpcpp; + std::shared_ptr cpu_dpcpp; + std::shared_ptr gpu_dpcpp; + if (gko::DpcppExecutor::get_num_devices("host")) + host_dpcpp = gko::DpcppExecutor::create(0, omp, "host"); + if (gko::DpcppExecutor::get_num_devices("cpu")) + cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu"); + if (gko::DpcppExecutor::get_num_devices("gpu")) + gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu"); + + ASSERT_EQ(true, ref->memory_accessible(omp)); + ASSERT_EQ(true, omp->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(hip)); + ASSERT_EQ(false, hip->memory_accessible(ref)); + ASSERT_EQ(false, omp->memory_accessible(hip)); + ASSERT_EQ(false, hip->memory_accessible(omp)); + ASSERT_EQ(false, ref->memory_accessible(cuda)); + ASSERT_EQ(false, cuda->memory_accessible(ref)); + ASSERT_EQ(false, omp->memory_accessible(cuda)); + ASSERT_EQ(false, cuda->memory_accessible(omp)); + if (gko::DpcppExecutor::get_num_devices("host")) { + ASSERT_EQ(true, host_dpcpp->memory_accessible(ref)); + ASSERT_EQ(true, ref->memory_accessible(host_dpcpp)); + ASSERT_EQ(true, host_dpcpp->memory_accessible(omp)); + ASSERT_EQ(true, omp->memory_accessible(host_dpcpp)); + } + if (gko::DpcppExecutor::get_num_devices("cpu")) { + ASSERT_EQ(true, ref->memory_accessible(cpu_dpcpp)); + ASSERT_EQ(true, cpu_dpcpp->memory_accessible(ref)); + ASSERT_EQ(true, cpu_dpcpp->memory_accessible(omp)); + ASSERT_EQ(true, omp->memory_accessible(cpu_dpcpp)); + } + if (gko::DpcppExecutor::get_num_devices("gpu")) { + ASSERT_EQ(false, gpu_dpcpp->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(gpu_dpcpp)); + ASSERT_EQ(false, gpu_dpcpp->memory_accessible(omp)); + ASSERT_EQ(false, omp->memory_accessible(gpu_dpcpp)); + } #if GINKGO_HIP_PLATFORM_NVCC - ASSERT_EQ(true, *hip == *cuda); - ASSERT_EQ(true, *cuda == *hip); - ASSERT_EQ(true, *hip_1 == *cuda_1); - ASSERT_EQ(true, *cuda_1 == *hip_1); + ASSERT_EQ(true, hip->memory_accessible(cuda)); + ASSERT_EQ(true, cuda->memory_accessible(hip)); + ASSERT_EQ(true, hip_1->memory_accessible(cuda_1)); + ASSERT_EQ(true, cuda_1->memory_accessible(hip_1)); #else - ASSERT_EQ(false, *hip == *cuda); - ASSERT_EQ(false, *cuda == *hip); - ASSERT_EQ(false, *hip_1 == *cuda_1); - ASSERT_EQ(false, *cuda_1 == *hip_1); + ASSERT_EQ(false, hip->memory_accessible(cuda)); + ASSERT_EQ(false, cuda->memory_accessible(hip)); + ASSERT_EQ(false, hip_1->memory_accessible(cuda_1)); + ASSERT_EQ(false, cuda_1->memory_accessible(hip_1)); #endif - ASSERT_EQ(true, *omp == *omp2); - ASSERT_EQ(true, *hip == *hip2); - ASSERT_EQ(true, *cuda == *cuda2); - ASSERT_EQ(false, *hip == *hip_1); - ASSERT_EQ(false, *cuda == *hip_1); - ASSERT_EQ(false, *cuda == *cuda_1); - ASSERT_EQ(false, *hip == *cuda_1); + ASSERT_EQ(true, omp->memory_accessible(omp2)); + ASSERT_EQ(true, hip->memory_accessible(hip2)); + ASSERT_EQ(true, cuda->memory_accessible(cuda2)); + ASSERT_EQ(false, hip->memory_accessible(hip_1)); + ASSERT_EQ(false, cuda->memory_accessible(hip_1)); + ASSERT_EQ(false, cuda->memory_accessible(cuda_1)); + ASSERT_EQ(false, hip->memory_accessible(cuda_1)); } diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index 1381f079945..25e106a2546 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -80,7 +80,7 @@ void OmpExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const { auto device = detail::get_devices( - dest_exec->getdevice_type())[dest_exec->get_device_id()]; + dest_exec->get_device_type())[dest_exec->get_device_id()]; return device.is_host() || device.is_cpu(); } @@ -177,9 +177,11 @@ bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const auto device = detail::get_devices(device_type_)[device_id_]; auto other_device = detail::get_devices( dest_exec->get_device_type())[dest_exec->get_device_id()]; - return device.get_info() == - other_device.get_info() && - device.get() == other_device.get(); + return ((device.is_host() || device.is_cpu()) && + (other_device.is_host() || other_device.is_cpu())) || + (device.get_info() == + other_device.get_info() && + device.get() == other_device.get()); } @@ -200,11 +202,13 @@ void DpcppExecutor::set_device_property() { assert(device_id_ < DpcppExecutor::get_num_devices(device_type_)); auto device = detail::get_devices(device_type_)[device_id_]; - try { - subgroup_sizes_ = - device.get_info(); - } catch (cl::sycl::runtime_error &err) { - GKO_NOT_SUPPORTED(device); + if (!device.is_host()) { + try { + subgroup_sizes_ = + device.get_info(); + } catch (cl::sycl::runtime_error &err) { + GKO_NOT_SUPPORTED(device); + } } num_computing_units_ = device.get_info(); @@ -214,10 +218,8 @@ void DpcppExecutor::set_device_property() for (std::size_t i = 0; i < 3; i++) { max_workitem_sizes_.push_back(max_workitem_sizes[i]); } - if (!device.is_host()) { - max_workgroup_size_ = - device.get_info(); - } + max_workgroup_size_ = + device.get_info(); // Here we declare the queue with the property `in_order` which ensures the // kernels are executed in the submission order. Otherwise, calls to // `wait()` would be needed after every call to a DPC++ function or kernel. diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index dd0293d30ee..2ff58f6fe82 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -634,14 +634,15 @@ class Executor : public log::EnableLogging { virtual void synchronize() const = 0; /** - * Overload the equal-to operator which verifies whether the executors share - * the same memory. + * Verifies whether the executors share the same memory. * * @param other the other Executor to compare against + * + * @return whether the executors this and other share the same memory. */ - bool operator==(const Executor &other) const + bool memory_accessible(const std::shared_ptr &other) const { - return this->verify_memory_from(other); + return this->verify_memory_from(other.get()); } protected: @@ -702,7 +703,7 @@ class Executor : public log::EnableLogging { * * @return whether this executor and src_exec share the same memory. */ - virtual bool verify_memory_from(const Executor &src_exec) const = 0; + virtual bool verify_memory_from(const Executor *src_exec) const = 0; /** * @internal @@ -718,7 +719,7 @@ class Executor : public log::EnableLogging { GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_VERIFY_MEMORY_TO); - GKO_ENABLE_VERIFY_MEMORY_TO(ReferenceExecutor); + GKO_ENABLE_VERIFY_MEMORY_TO(ReferenceExecutor, ref); #undef GKO_ENABLE_VERIFY_MEMORY_TO @@ -870,9 +871,9 @@ class ExecutorBase : public Executor { src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr); } - bool verify_memory_from(const Executor &src_exec) const override + bool verify_memory_from(const Executor *src_exec) const override { - return src_exec.verify_memory_to(self()); + return src_exec->verify_memory_to(self()); } private: diff --git a/include/ginkgo/core/base/temporary_clone.hpp b/include/ginkgo/core/base/temporary_clone.hpp index a0f59f73919..33f80848336 100644 --- a/include/ginkgo/core/base/temporary_clone.hpp +++ b/include/ginkgo/core/base/temporary_clone.hpp @@ -140,7 +140,7 @@ class temporary_clone { */ explicit temporary_clone(std::shared_ptr exec, pointer ptr) { - if (*ptr->get_executor() == *exec) { + if (ptr->get_executor()->memory_accessible(exec)) { // just use the object we already have handle_ = handle_type(ptr, null_deleter()); } else { From bd4936fe5354fd20282367889b704bfbbfc95974 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Wed, 2 Dec 2020 11:48:39 +0000 Subject: [PATCH 3/5] Format files Co-authored-by: tcojean --- cuda/test/base/array.cu | 2 ++ include/ginkgo/core/base/temporary_clone.hpp | 6 +++--- include/ginkgo/core/base/utils_helper.hpp | 6 +++--- include/ginkgo/ginkgo.hpp | 2 ++ 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cuda/test/base/array.cu b/cuda/test/base/array.cu index c4757c42b44..73770b36bb5 100644 --- a/cuda/test/base/array.cu +++ b/cuda/test/base/array.cu @@ -34,6 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include + + #include diff --git a/include/ginkgo/core/base/temporary_clone.hpp b/include/ginkgo/core/base/temporary_clone.hpp index 33f80848336..fa0c2e32b13 100644 --- a/include/ginkgo/core/base/temporary_clone.hpp +++ b/include/ginkgo/core/base/temporary_clone.hpp @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_TEMPORARY_CLONE_HPP_ -#define GKO_CORE_BASE_TEMPORARY_CLONE_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_TEMPORARY_CLONE_HPP_ +#define GKO_PUBLIC_CORE_BASE_TEMPORARY_CLONE_HPP_ #include @@ -199,4 +199,4 @@ detail::temporary_clone make_temporary_clone( } // namespace gko -#endif // GKO_CORE_BASE_TEMPORARY_CLONE_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_TEMPORARY_CLONE_HPP_ diff --git a/include/ginkgo/core/base/utils_helper.hpp b/include/ginkgo/core/base/utils_helper.hpp index 01c6b5cf5f5..d277d20cb4c 100644 --- a/include/ginkgo/core/base/utils_helper.hpp +++ b/include/ginkgo/core/base/utils_helper.hpp @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_UTILS_HELPER_HPP_ -#define GKO_CORE_BASE_UTILS_HELPER_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_UTILS_HELPER_HPP_ +#define GKO_PUBLIC_CORE_BASE_UTILS_HELPER_HPP_ #include @@ -435,4 +435,4 @@ class null_deleter { } // namespace gko -#endif // GKO_CORE_BASE_UTILS_HELPER_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_UTILS_HELPER_HPP_ diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 40f415b2156..0602875c4dd 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -56,8 +56,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include +#include #include #include From 8608400e1a2eb4214a4941465b1b1eef3812c48a Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 8 Dec 2020 09:15:34 +0100 Subject: [PATCH 4/5] Review updates. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some code style issues. Co-authored-by: Thomas Grützmacher --- core/test/base/executor.cpp | 9 ++++++--- dpcpp/base/executor.dp.cpp | 1 - 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index 783e20f1008..7e2f4ac23b4 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -488,12 +488,15 @@ TEST(Executor, CanVerifyMemory) std::shared_ptr host_dpcpp; std::shared_ptr cpu_dpcpp; std::shared_ptr gpu_dpcpp; - if (gko::DpcppExecutor::get_num_devices("host")) + if (gko::DpcppExecutor::get_num_devices("host")) { host_dpcpp = gko::DpcppExecutor::create(0, omp, "host"); - if (gko::DpcppExecutor::get_num_devices("cpu")) + } + if (gko::DpcppExecutor::get_num_devices("cpu")) { cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu"); - if (gko::DpcppExecutor::get_num_devices("gpu")) + } + if (gko::DpcppExecutor::get_num_devices("gpu")) { gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu"); + } ASSERT_EQ(true, ref->memory_accessible(omp)); ASSERT_EQ(true, omp->memory_accessible(ref)); diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index 25e106a2546..39f9120701a 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -162,7 +162,6 @@ int DpcppExecutor::get_num_devices(std::string device_type) bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const { auto device = detail::get_devices(device_type_)[device_id_]; - return device.is_host() || device.is_cpu(); } From 018244f0191765749588edf2c6d34d9330781e69 Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 8 Dec 2020 19:11:41 +0100 Subject: [PATCH 5/5] Isolate Reference from other executors in memory. --- core/device_hooks/cuda_hooks.cpp | 3 -- core/device_hooks/dpcpp_hooks.cpp | 6 ---- core/device_hooks/hip_hooks.cpp | 3 -- core/devices/cuda/executor.cpp | 16 ++++++++++ core/devices/hip/executor.cpp | 16 ++++++++++ core/test/base/array.cpp | 8 ++--- core/test/base/executor.cpp | 12 +++---- core/test/base/lin_op.cpp | 10 +++--- core/test/base/utils.cpp | 5 +-- cuda/base/executor.cpp | 10 ------ dpcpp/base/executor.dp.cpp | 6 ---- hip/base/executor.hip.cpp | 10 ------ include/ginkgo/core/base/executor.hpp | 46 +++++++++++++++++---------- 13 files changed, 80 insertions(+), 71 deletions(-) diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index a5c33c6541d..46e342d9c0a 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -98,9 +98,6 @@ void CudaExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, GKO_NOT_COMPILED(cuda); -bool CudaExecutor::verify_memory_to(const HipExecutor *) const { return false; } - - void CudaExecutor::synchronize() const GKO_NOT_COMPILED(cuda); diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp index 472451c6513..88caac1ccd3 100644 --- a/core/device_hooks/dpcpp_hooks.cpp +++ b/core/device_hooks/dpcpp_hooks.cpp @@ -126,12 +126,6 @@ bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const return device_type_ == "cpu" || device_type_ == "host"; } -bool DpcppExecutor::verify_memory_to(const ReferenceExecutor *dest_exec) const -{ - // Dummy check - return device_type_ == "cpu" || device_type_ == "host"; -} - bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const { // Dummy check diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index 45180980159..b98b2fcb8a4 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -95,9 +95,6 @@ void HipExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, GKO_NOT_COMPILED(hip); -bool HipExecutor::verify_memory_to(const CudaExecutor *) const { return false; } - - void HipExecutor::synchronize() const GKO_NOT_COMPILED(hip); diff --git a/core/devices/cuda/executor.cpp b/core/devices/cuda/executor.cpp index 3566578a681..2d6af2b91ef 100644 --- a/core/devices/cuda/executor.cpp +++ b/core/devices/cuda/executor.cpp @@ -48,6 +48,22 @@ std::shared_ptr CudaExecutor::get_master() const noexcept } +bool CudaExecutor::verify_memory_to(const CudaExecutor *dest_exec) const +{ + return device_id_ == dest_exec->get_device_id(); +} + + +bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const +{ +#if GINKGO_HIP_PLATFORM_NVCC + return device_id_ == dest_exec->get_device_id(); +#else + return false; +#endif +} + + unsigned CudaExecutor::num_execs[max_devices]; diff --git a/core/devices/hip/executor.cpp b/core/devices/hip/executor.cpp index f4787523290..b9a643eb7e2 100644 --- a/core/devices/hip/executor.cpp +++ b/core/devices/hip/executor.cpp @@ -45,6 +45,22 @@ std::shared_ptr HipExecutor::get_master() const noexcept } +bool HipExecutor::verify_memory_to(const HipExecutor *dest_exec) const +{ + return device_id_ == dest_exec->get_device_id(); +} + + +bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const +{ +#if GINKGO_HIP_PLATFORM_NVCC + return device_id_ == dest_exec->get_device_id(); +#else + return false; +#endif +} + + int HipExecutor::num_execs[max_devices]; diff --git a/core/test/base/array.cpp b/core/test/base/array.cpp index 54f7cfea053..c20f5ca72a9 100644 --- a/core/test/base/array.cpp +++ b/core/test/base/array.cpp @@ -275,9 +275,9 @@ TYPED_TEST(Array, CanCreateTemporaryCloneOnSameExecutor) // For tests between different memory, check cuda/test/base/array.cu TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory) { - auto omp = gko::OmpExecutor::create(); + auto other = gko::ReferenceExecutor::create(); - auto tmp_clone = make_temporary_clone(omp, &this->x); + auto tmp_clone = make_temporary_clone(other, &this->x); this->assert_equal_to_original_x(*tmp_clone.get()); ASSERT_EQ(tmp_clone.get(), &this->x); @@ -286,10 +286,10 @@ TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory) TYPED_TEST(Array, DoesNotCopyBackTemporaryCloneBetweenSameMemory) { - auto omp = gko::OmpExecutor::create(); + auto other = gko::ReferenceExecutor::create(); { - auto tmp_clone = make_temporary_clone(omp, &this->x); + auto tmp_clone = make_temporary_clone(other, &this->x); // change x, and check that there is no copy-back to overwrite it again this->x.get_data()[0] = 0; } diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index 7e2f4ac23b4..488334c9ab9 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -498,8 +498,8 @@ TEST(Executor, CanVerifyMemory) gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu"); } - ASSERT_EQ(true, ref->memory_accessible(omp)); - ASSERT_EQ(true, omp->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(omp)); + ASSERT_EQ(false, omp->memory_accessible(ref)); ASSERT_EQ(false, ref->memory_accessible(hip)); ASSERT_EQ(false, hip->memory_accessible(ref)); ASSERT_EQ(false, omp->memory_accessible(hip)); @@ -509,14 +509,14 @@ TEST(Executor, CanVerifyMemory) ASSERT_EQ(false, omp->memory_accessible(cuda)); ASSERT_EQ(false, cuda->memory_accessible(omp)); if (gko::DpcppExecutor::get_num_devices("host")) { - ASSERT_EQ(true, host_dpcpp->memory_accessible(ref)); - ASSERT_EQ(true, ref->memory_accessible(host_dpcpp)); + ASSERT_EQ(false, host_dpcpp->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(host_dpcpp)); ASSERT_EQ(true, host_dpcpp->memory_accessible(omp)); ASSERT_EQ(true, omp->memory_accessible(host_dpcpp)); } if (gko::DpcppExecutor::get_num_devices("cpu")) { - ASSERT_EQ(true, ref->memory_accessible(cpu_dpcpp)); - ASSERT_EQ(true, cpu_dpcpp->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(cpu_dpcpp)); + ASSERT_EQ(false, cpu_dpcpp->memory_accessible(ref)); ASSERT_EQ(true, cpu_dpcpp->memory_accessible(omp)); ASSERT_EQ(true, omp->memory_accessible(cpu_dpcpp)); } diff --git a/core/test/base/lin_op.cpp b/core/test/base/lin_op.cpp index 3da8eda55f0..e608654ca1f 100644 --- a/core/test/base/lin_op.cpp +++ b/core/test/base/lin_op.cpp @@ -93,8 +93,8 @@ class EnableLinOp : public ::testing::Test { protected: EnableLinOp() : ref{gko::ReferenceExecutor::create()}, - omp{gko::OmpExecutor::create()}, - op{DummyLinOp::create(omp, gko::dim<2>{3, 5})}, + ref2{gko::ReferenceExecutor::create()}, + op{DummyLinOp::create(ref2, gko::dim<2>{3, 5})}, alpha{DummyLinOp::create(ref, gko::dim<2>{1})}, beta{DummyLinOp::create(ref, gko::dim<2>{1})}, b{DummyLinOp::create(ref, gko::dim<2>{5, 4})}, @@ -102,7 +102,7 @@ class EnableLinOp : public ::testing::Test { {} std::shared_ptr ref; - std::shared_ptr omp; + std::shared_ptr ref2; std::unique_ptr op; std::unique_ptr alpha; std::unique_ptr beta; @@ -115,7 +115,7 @@ TEST_F(EnableLinOp, CallsApplyImpl) { op->apply(gko::lend(b), gko::lend(x)); - ASSERT_EQ(op->last_access, omp); + ASSERT_EQ(op->last_access, ref2); } @@ -123,7 +123,7 @@ TEST_F(EnableLinOp, CallsExtendedApplyImpl) { op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); - ASSERT_EQ(op->last_access, omp); + ASSERT_EQ(op->last_access, ref2); } diff --git a/core/test/base/utils.cpp b/core/test/base/utils.cpp index 822449e8f24..a3bafcb5b69 100644 --- a/core/test/base/utils.cpp +++ b/core/test/base/utils.cpp @@ -402,9 +402,10 @@ class TemporaryClone : public ::testing::Test { TEST_F(TemporaryClone, DoesNotCopyToSameMemory) { - auto clone = make_temporary_clone(omp, gko::lend(obj)); + auto other = gko::ReferenceExecutor::create(); + auto clone = make_temporary_clone(other, gko::lend(obj)); - ASSERT_NE(clone.get()->get_executor(), omp); + ASSERT_NE(clone.get()->get_executor(), other); ASSERT_EQ(obj->get_executor(), ref); } diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 5284daa2dd1..7926ae98a21 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -162,16 +162,6 @@ void CudaExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, } -bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const -{ -#if GINKGO_HIP_PLATFORM_NVCC - return device_id_ == dest_exec->get_device_id(); -#else - return false; -#endif -} - - void CudaExecutor::synchronize() const { cuda::device_guard g(this->get_device_id()); diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index 39f9120701a..92089616c02 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -165,12 +165,6 @@ bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const return device.is_host() || device.is_cpu(); } -bool DpcppExecutor::verify_memory_to(const ReferenceExecutor *dest_exec) const -{ - auto device = detail::get_devices(device_type_)[device_id_]; - return device.is_host() || device.is_cpu(); -} - bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const { auto device = detail::get_devices(device_type_)[device_id_]; diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index 8f8bbbf7d9d..3e400986e2b 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -162,16 +162,6 @@ void HipExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, } -bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const -{ -#if GINKGO_HIP_PLATFORM_NVCC - return device_id_ == dest_exec->get_device_id(); -#else - return false; -#endif -} - - void HipExecutor::synchronize() const { hip::device_guard g(this->get_device_id()); diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 2ff58f6fe82..f02c5834c73 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -468,6 +468,7 @@ class Executor : public log::EnableLogging { friend class detail::ExecutorBase; GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_DECLARE_EXECUTOR_FRIEND); + friend class ReferenceExecutor; public: virtual ~Executor() = default; @@ -855,6 +856,7 @@ namespace detail { template class ExecutorBase : public Executor { GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_DECLARE_EXECUTOR_FRIEND); + friend class ReferenceExecutor; public: void run(const Operation &op) const override @@ -871,7 +873,7 @@ class ExecutorBase : public Executor { src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr); } - bool verify_memory_from(const Executor *src_exec) const override + virtual bool verify_memory_from(const Executor *src_exec) const override { return src_exec->verify_memory_to(self()); } @@ -935,10 +937,13 @@ class EnableDeviceReset { const void *src_ptr, void *dest_ptr) const override -#define GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(dest_, bool_) \ - bool verify_memory_to(const dest_ *other) const override { return bool_; } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ +#define GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(dest_, bool_) \ + virtual bool verify_memory_to(const dest_ *other) const override \ + { \ + return bool_; \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -979,7 +984,7 @@ class OmpExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, true); - GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, true); + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(HipExecutor, false); @@ -1020,6 +1025,21 @@ class ReferenceExecutor : public OmpExecutor { protected: ReferenceExecutor() = default; + + bool verify_memory_from(const Executor *src_exec) const override + { + return src_exec->verify_memory_to(this); + } + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, true); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(HipExecutor, false); }; @@ -1159,10 +1179,7 @@ class CudaExecutor : public detail::ExecutorBase, bool verify_memory_to(const HipExecutor *dest_exec) const override; - bool verify_memory_to(const CudaExecutor *dest_exec) const override - { - return device_id_ == dest_exec->get_device_id(); - } + bool verify_memory_to(const CudaExecutor *dest_exec) const override; static void increase_num_execs(unsigned device_id) { @@ -1338,10 +1355,7 @@ class HipExecutor : public detail::ExecutorBase, bool verify_memory_to(const CudaExecutor *dest_exec) const override; - bool verify_memory_to(const HipExecutor *dest_exec) const override - { - return device_id_ == dest_exec->get_device_id(); - } + bool verify_memory_to(const HipExecutor *dest_exec) const override; static void increase_num_execs(int device_id) { @@ -1507,9 +1521,9 @@ class DpcppExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(HipExecutor, false); - bool verify_memory_to(const OmpExecutor *dest_exec) const override; + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); - bool verify_memory_to(const ReferenceExecutor *dest_exec) const override; + bool verify_memory_to(const OmpExecutor *dest_exec) const override; bool verify_memory_to(const DpcppExecutor *dest_exec) const override;