Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check if executors share the same memory #670

Merged
merged 5 commits into from Dec 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 22 additions & 0 deletions core/device_hooks/dpcpp_hooks.cpp
Expand Up @@ -64,6 +64,14 @@ void OmpExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes,
GKO_NOT_COMPILED(dpcpp);


bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const
{
// Dummy check
auto dev_type = dest_exec->get_device_type();
return dev_type == "cpu" || dev_type == "host";
}


void DpcppExecutor::raw_free(void *ptr) const noexcept
{
// Free must never fail, as it can be called in destructors.
Expand Down Expand Up @@ -112,6 +120,20 @@ int DpcppExecutor::get_num_devices(std::string) { return 0; }
void DpcppExecutor::set_device_property() {}


bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const
{
// Dummy check
return device_type_ == "cpu" || device_type_ == "host";
}

bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const
{
// Dummy check
return dest_exec->get_device_type() == device_type_ &&
dest_exec->get_device_id() == device_id_;
}


} // namespace gko


Expand Down
16 changes: 16 additions & 0 deletions core/devices/cuda/executor.cpp
Expand Up @@ -48,6 +48,22 @@ std::shared_ptr<const Executor> CudaExecutor::get_master() const noexcept
}


bool CudaExecutor::verify_memory_to(const CudaExecutor *dest_exec) const
{
return device_id_ == dest_exec->get_device_id();
}


bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const
{
#if GINKGO_HIP_PLATFORM_NVCC
return device_id_ == dest_exec->get_device_id();
#else
return false;
#endif
}


unsigned CudaExecutor::num_execs[max_devices];


Expand Down
16 changes: 16 additions & 0 deletions core/devices/hip/executor.cpp
Expand Up @@ -45,6 +45,22 @@ std::shared_ptr<const Executor> HipExecutor::get_master() const noexcept
}


bool HipExecutor::verify_memory_to(const HipExecutor *dest_exec) const
{
return device_id_ == dest_exec->get_device_id();
}


bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const
{
#if GINKGO_HIP_PLATFORM_NVCC
return device_id_ == dest_exec->get_device_id();
#else
return false;
#endif
}


int HipExecutor::num_execs[max_devices];


Expand Down
28 changes: 15 additions & 13 deletions core/test/base/array.cpp
Expand Up @@ -57,12 +57,13 @@ class Array : public ::testing::Test {
x.get_data()[1] = 2;
}

static void assert_equal_to_original_x(gko::Array<T> &a)
static void assert_equal_to_original_x(gko::Array<T> &a,
bool check_zero = true)
{
ASSERT_EQ(a.get_num_elems(), 2);
EXPECT_EQ(a.get_data()[0], T{5});
if (check_zero) EXPECT_EQ(a.get_data()[0], T{5});
EXPECT_EQ(a.get_data()[1], T{2});
EXPECT_EQ(a.get_const_data()[0], T{5});
if (check_zero) EXPECT_EQ(a.get_const_data()[0], T{5});
EXPECT_EQ(a.get_const_data()[1], T{2});
}

Expand Down Expand Up @@ -271,29 +272,30 @@ TYPED_TEST(Array, CanCreateTemporaryCloneOnSameExecutor)
}


TYPED_TEST(Array, CanCreateTemporaryCloneOnDifferentExecutor)
// For tests between different memory, check cuda/test/base/array.cu
TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory)
{
auto omp = gko::OmpExecutor::create();
auto other = gko::ReferenceExecutor::create();

auto tmp_clone = make_temporary_clone(omp, &this->x);
auto tmp_clone = make_temporary_clone(other, &this->x);

this->assert_equal_to_original_x(*tmp_clone.get());
ASSERT_NE(tmp_clone.get(), &this->x);
ASSERT_EQ(tmp_clone.get(), &this->x);
}


TYPED_TEST(Array, CanCopyBackTemporaryCloneOnDifferentExecutor)
TYPED_TEST(Array, DoesNotCopyBackTemporaryCloneBetweenSameMemory)
{
auto omp = gko::OmpExecutor::create();
auto other = gko::ReferenceExecutor::create();

{
auto tmp_clone = make_temporary_clone(omp, &this->x);
// change x, so it no longer matches the original x
// the copy-back will overwrite it again with the correct value
auto tmp_clone = make_temporary_clone(other, &this->x);
// change x, and check that there is no copy-back to overwrite it again
this->x.get_data()[0] = 0;
}

this->assert_equal_to_original_x(this->x);
this->assert_equal_to_original_x(this->x, false);
EXPECT_EQ(this->x.get_data()[0], TypeParam{0});
}


Expand Down
73 changes: 73 additions & 0 deletions core/test/base/executor.cpp
Expand Up @@ -474,6 +474,79 @@ TEST(DpcppExecutor, KnowsItsDeviceId)
}


TEST(Executor, CanVerifyMemory)
{
auto ref = gko::ReferenceExecutor::create();
auto omp = gko::OmpExecutor::create();
auto hip = gko::HipExecutor::create(0, omp);
auto cuda = gko::CudaExecutor::create(0, omp);
auto omp2 = gko::OmpExecutor::create();
auto hip2 = gko::HipExecutor::create(0, omp);
auto cuda2 = gko::CudaExecutor::create(0, omp);
auto hip_1 = gko::HipExecutor::create(1, omp);
auto cuda_1 = gko::CudaExecutor::create(1, omp);
std::shared_ptr<gko::DpcppExecutor> host_dpcpp;
std::shared_ptr<gko::DpcppExecutor> cpu_dpcpp;
std::shared_ptr<gko::DpcppExecutor> gpu_dpcpp;
if (gko::DpcppExecutor::get_num_devices("host")) {
host_dpcpp = gko::DpcppExecutor::create(0, omp, "host");
}
if (gko::DpcppExecutor::get_num_devices("cpu")) {
cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu");
}
if (gko::DpcppExecutor::get_num_devices("gpu")) {
gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu");
}

ASSERT_EQ(false, ref->memory_accessible(omp));
ASSERT_EQ(false, omp->memory_accessible(ref));
ASSERT_EQ(false, ref->memory_accessible(hip));
ASSERT_EQ(false, hip->memory_accessible(ref));
ASSERT_EQ(false, omp->memory_accessible(hip));
ASSERT_EQ(false, hip->memory_accessible(omp));
ASSERT_EQ(false, ref->memory_accessible(cuda));
ASSERT_EQ(false, cuda->memory_accessible(ref));
ASSERT_EQ(false, omp->memory_accessible(cuda));
ASSERT_EQ(false, cuda->memory_accessible(omp));
if (gko::DpcppExecutor::get_num_devices("host")) {
ASSERT_EQ(false, host_dpcpp->memory_accessible(ref));
ASSERT_EQ(false, ref->memory_accessible(host_dpcpp));
ASSERT_EQ(true, host_dpcpp->memory_accessible(omp));
ASSERT_EQ(true, omp->memory_accessible(host_dpcpp));
}
if (gko::DpcppExecutor::get_num_devices("cpu")) {
ASSERT_EQ(false, ref->memory_accessible(cpu_dpcpp));
ASSERT_EQ(false, cpu_dpcpp->memory_accessible(ref));
ASSERT_EQ(true, cpu_dpcpp->memory_accessible(omp));
ASSERT_EQ(true, omp->memory_accessible(cpu_dpcpp));
}
if (gko::DpcppExecutor::get_num_devices("gpu")) {
ASSERT_EQ(false, gpu_dpcpp->memory_accessible(ref));
ASSERT_EQ(false, ref->memory_accessible(gpu_dpcpp));
ASSERT_EQ(false, gpu_dpcpp->memory_accessible(omp));
ASSERT_EQ(false, omp->memory_accessible(gpu_dpcpp));
}
#if GINKGO_HIP_PLATFORM_NVCC
ASSERT_EQ(true, hip->memory_accessible(cuda));
ASSERT_EQ(true, cuda->memory_accessible(hip));
ASSERT_EQ(true, hip_1->memory_accessible(cuda_1));
ASSERT_EQ(true, cuda_1->memory_accessible(hip_1));
#else
ASSERT_EQ(false, hip->memory_accessible(cuda));
ASSERT_EQ(false, cuda->memory_accessible(hip));
ASSERT_EQ(false, hip_1->memory_accessible(cuda_1));
ASSERT_EQ(false, cuda_1->memory_accessible(hip_1));
#endif
ASSERT_EQ(true, omp->memory_accessible(omp2));
ASSERT_EQ(true, hip->memory_accessible(hip2));
ASSERT_EQ(true, cuda->memory_accessible(cuda2));
ASSERT_EQ(false, hip->memory_accessible(hip_1));
ASSERT_EQ(false, cuda->memory_accessible(hip_1));
ASSERT_EQ(false, cuda->memory_accessible(cuda_1));
ASSERT_EQ(false, hip->memory_accessible(cuda_1));
}


template <typename T>
struct mock_free : T {
/**
Expand Down
45 changes: 23 additions & 22 deletions core/test/base/lin_op.cpp
Expand Up @@ -93,16 +93,16 @@ class EnableLinOp : public ::testing::Test {
protected:
EnableLinOp()
: ref{gko::ReferenceExecutor::create()},
omp{gko::OmpExecutor::create()},
op{DummyLinOp::create(omp, gko::dim<2>{3, 5})},
ref2{gko::ReferenceExecutor::create()},
op{DummyLinOp::create(ref2, gko::dim<2>{3, 5})},
alpha{DummyLinOp::create(ref, gko::dim<2>{1})},
beta{DummyLinOp::create(ref, gko::dim<2>{1})},
b{DummyLinOp::create(ref, gko::dim<2>{5, 4})},
x{DummyLinOp::create(ref, gko::dim<2>{3, 4})}
{}

std::shared_ptr<const gko::ReferenceExecutor> ref;
std::shared_ptr<const gko::OmpExecutor> omp;
std::shared_ptr<const gko::ReferenceExecutor> ref2;
std::unique_ptr<DummyLinOp> op;
std::unique_ptr<DummyLinOp> alpha;
std::unique_ptr<DummyLinOp> beta;
Expand All @@ -115,15 +115,15 @@ TEST_F(EnableLinOp, CallsApplyImpl)
{
op->apply(gko::lend(b), gko::lend(x));

ASSERT_EQ(op->last_access, omp);
ASSERT_EQ(op->last_access, ref2);
}


TEST_F(EnableLinOp, CallsExtendedApplyImpl)
{
op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x));

ASSERT_EQ(op->last_access, omp);
ASSERT_EQ(op->last_access, ref2);
}


Expand Down Expand Up @@ -204,43 +204,44 @@ TEST_F(EnableLinOp, ExtendedApplyFailsOnWrongBetaDimension)
}


TEST_F(EnableLinOp, ApplyCopiesDataToCorrectExecutor)
// For tests between different memory, check cuda/test/base/lin_op.cu
TEST_F(EnableLinOp, ApplyDoesNotCopyBetweenSameMemory)
{
op->apply(gko::lend(b), gko::lend(x));

ASSERT_EQ(op->last_b_access, omp);
ASSERT_EQ(op->last_x_access, omp);
ASSERT_EQ(op->last_b_access, ref);
ASSERT_EQ(op->last_x_access, ref);
}


TEST_F(EnableLinOp, ApplyCopiesBackOnlyX)
TEST_F(EnableLinOp, ApplyNoCopyBackBetweenSameMemory)
{
op->apply(gko::lend(b), gko::lend(x));

ASSERT_EQ(b->last_access, nullptr);
ASSERT_EQ(x->last_access, omp);
ASSERT_EQ(b->last_access, ref);
ASSERT_EQ(x->last_access, ref);
}


TEST_F(EnableLinOp, ExtendedApplyCopiesDataToCorrectExecutor)
TEST_F(EnableLinOp, ExtendedApplyDoesNotCopyBetweenSameMemory)
{
op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x));

ASSERT_EQ(op->last_alpha_access, omp);
ASSERT_EQ(op->last_b_access, omp);
ASSERT_EQ(op->last_beta_access, omp);
ASSERT_EQ(op->last_x_access, omp);
ASSERT_EQ(op->last_alpha_access, ref);
ASSERT_EQ(op->last_b_access, ref);
ASSERT_EQ(op->last_beta_access, ref);
ASSERT_EQ(op->last_x_access, ref);
}


TEST_F(EnableLinOp, ExtendedApplyCopiesBackOnlyX)
TEST_F(EnableLinOp, ExtendedApplyNoCopyBackBetweenSameMemory)
{
op->apply(gko::lend(b), gko::lend(x));
op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x));

ASSERT_EQ(alpha->last_access, nullptr);
ASSERT_EQ(b->last_access, nullptr);
ASSERT_EQ(beta->last_access, nullptr);
ASSERT_EQ(x->last_access, omp);
ASSERT_EQ(alpha->last_access, ref);
ASSERT_EQ(b->last_access, ref);
ASSERT_EQ(beta->last_access, ref);
ASSERT_EQ(x->last_access, ref);
}


Expand Down
7 changes: 4 additions & 3 deletions core/test/base/utils.cpp
Expand Up @@ -400,11 +400,12 @@ class TemporaryClone : public ::testing::Test {
};


TEST_F(TemporaryClone, CopiesToAnotherExecutor)
TEST_F(TemporaryClone, DoesNotCopyToSameMemory)
{
auto clone = make_temporary_clone(omp, gko::lend(obj));
auto other = gko::ReferenceExecutor::create();
auto clone = make_temporary_clone(other, gko::lend(obj));

ASSERT_EQ(clone.get()->get_executor(), omp);
ASSERT_NE(clone.get()->get_executor(), other);
ASSERT_EQ(obj->get_executor(), ref);
}

Expand Down
2 changes: 2 additions & 0 deletions core/test/utils.hpp
Expand Up @@ -69,6 +69,8 @@ using ValueAndIndexTypes =
::testing::Types<float, double, std::complex<float>, std::complex<double>,
gko::int32, gko::int64, gko::size_type>;

using RealValueAndIndexTypes =
::testing::Types<float, double, gko::int32, gko::int64, gko::size_type>;

using ValueIndexTypes = ::testing::Types<
std::tuple<float, gko::int32>, std::tuple<double, gko::int32>,
Expand Down
1 change: 1 addition & 0 deletions cuda/test/base/CMakeLists.txt
@@ -1,3 +1,4 @@
ginkgo_create_cuda_test(array)
ginkgo_create_cuda_test(cuda_executor)
ginkgo_create_cuda_test(exception_helpers)
ginkgo_create_cuda_test(lin_op)
Expand Down