Skip to content

Commit

Permalink
Review updates.
Browse files Browse the repository at this point in the history
+ Do not use `operator==`, but a funciton `memory_accessible` instead.
+ Make DPC++ host and CPU be memory compatible.
+ Use pointers for the interface instead of references.
+ Ensure DPC++ tests always work.
+ Fix some typos.

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
Co-authored-by: Pratik Nayak <pratikvn@protonmail.com>
  • Loading branch information
4 people committed Dec 1, 2020
1 parent 94faee0 commit 78f8a62
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 66 deletions.
95 changes: 51 additions & 44 deletions core/test/base/executor.cpp
Expand Up @@ -480,56 +480,63 @@ TEST(Executor, CanVerifyMemory)
auto omp = gko::OmpExecutor::create();
auto hip = gko::HipExecutor::create(0, omp);
auto cuda = gko::CudaExecutor::create(0, omp);
auto cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu");
auto host_dpcpp = gko::DpcppExecutor::create(0, omp, "host");
auto gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu");
auto omp2 = gko::OmpExecutor::create();
auto hip2 = gko::HipExecutor::create(0, omp);
auto cuda2 = gko::CudaExecutor::create(0, omp);
std::shared_ptr<gko::HipExecutor> hip_1 = gko::HipExecutor::create(1, omp);
std::shared_ptr<gko::CudaExecutor> cuda_1 =
gko::CudaExecutor::create(1, omp);

ASSERT_EQ(true, *ref == *omp);
ASSERT_EQ(true, *omp == *ref);
ASSERT_EQ(false, *ref == *hip);
ASSERT_EQ(false, *hip == *ref);
ASSERT_EQ(false, *omp == *hip);
ASSERT_EQ(false, *hip == *omp);
ASSERT_EQ(false, *ref == *cuda);
ASSERT_EQ(false, *cuda == *ref);
ASSERT_EQ(false, *omp == *cuda);
ASSERT_EQ(false, *cuda == *omp);
ASSERT_EQ(true, *cpu_dpcpp == *ref);
ASSERT_EQ(true, *host_dpcpp == *ref);
ASSERT_EQ(false, *gpu_dpcpp == *ref);
ASSERT_EQ(true, *ref == *cpu_dpcpp);
ASSERT_EQ(true, *ref == *host_dpcpp);
ASSERT_EQ(false, *ref == *gpu_dpcpp);
ASSERT_EQ(true, *cpu_dpcpp == *omp);
ASSERT_EQ(true, *host_dpcpp == *omp);
ASSERT_EQ(false, *gpu_dpcpp == *omp);
ASSERT_EQ(true, *omp == *cpu_dpcpp);
ASSERT_EQ(true, *omp == *host_dpcpp);
ASSERT_EQ(false, *omp == *gpu_dpcpp);
auto hip_1 = gko::HipExecutor::create(1, omp);
auto cuda_1 = gko::CudaExecutor::create(1, omp);
auto host_dpcpp = gko::DpcppExecutor::create(0, omp, "host");
std::shared_ptr<gko::DpcppExecutor> cpu_dpcpp;
std::shared_ptr<gko::DpcppExecutor> gpu_dpcpp;
if (gko::DpcppExecutor::get_num_devices("cpu"))
cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu");
if (gko::DpcppExecutor::get_num_devices("gpu"))
gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu");

ASSERT_EQ(true, ref->memory_accessible(omp));
ASSERT_EQ(true, omp->memory_accessible(ref));
ASSERT_EQ(false, ref->memory_accessible(hip));
ASSERT_EQ(false, hip->memory_accessible(ref));
ASSERT_EQ(false, omp->memory_accessible(hip));
ASSERT_EQ(false, hip->memory_accessible(omp));
ASSERT_EQ(false, ref->memory_accessible(cuda));
ASSERT_EQ(false, cuda->memory_accessible(ref));
ASSERT_EQ(false, omp->memory_accessible(cuda));
ASSERT_EQ(false, cuda->memory_accessible(omp));
ASSERT_EQ(true, host_dpcpp->memory_accessible(ref));
ASSERT_EQ(true, ref->memory_accessible(host_dpcpp));
ASSERT_EQ(true, host_dpcpp->memory_accessible(omp));
ASSERT_EQ(true, omp->memory_accessible(host_dpcpp));
if (gko::DpcppExecutor::get_num_devices("cpu")) {
ASSERT_EQ(true, ref->memory_accessible(cpu_dpcpp));
ASSERT_EQ(true, cpu_dpcpp->memory_accessible(ref));
ASSERT_EQ(true, cpu_dpcpp->memory_accessible(omp));
ASSERT_EQ(true, omp->memory_accessible(cpu_dpcpp));
}
if (gko::DpcppExecutor::get_num_devices("gpu")) {
ASSERT_EQ(false, gpu_dpcpp->memory_accessible(ref));
ASSERT_EQ(false, ref->memory_accessible(gpu_dpcpp));
ASSERT_EQ(false, gpu_dpcpp->memory_accessible(omp));
ASSERT_EQ(false, omp->memory_accessible(gpu_dpcpp));
}
#if GINKGO_HIP_PLATFORM_NVCC
ASSERT_EQ(true, *hip == *cuda);
ASSERT_EQ(true, *cuda == *hip);
ASSERT_EQ(true, *hip_1 == *cuda_1);
ASSERT_EQ(true, *cuda_1 == *hip_1);
ASSERT_EQ(true, hip->memory_accessible(cuda));
ASSERT_EQ(true, cuda->memory_accessible(hip));
ASSERT_EQ(true, hip_1->memory_accessible(cuda_1));
ASSERT_EQ(true, cuda_1->memory_accessible(hip_1));
#else
ASSERT_EQ(false, *hip == *cuda);
ASSERT_EQ(false, *cuda == *hip);
ASSERT_EQ(false, *hip_1 == *cuda_1);
ASSERT_EQ(false, *cuda_1 == *hip_1);
ASSERT_EQ(false, hip->memory_accessible(cuda));
ASSERT_EQ(false, cuda->memory_accessible(hip));
ASSERT_EQ(false, hip_1->memory_accessible(cuda_1));
ASSERT_EQ(false, cuda_1->memory_accessible(hip_1));
#endif
ASSERT_EQ(true, *omp == *omp2);
ASSERT_EQ(true, *hip == *hip2);
ASSERT_EQ(true, *cuda == *cuda2);
ASSERT_EQ(false, *hip == *hip_1);
ASSERT_EQ(false, *cuda == *hip_1);
ASSERT_EQ(false, *cuda == *cuda_1);
ASSERT_EQ(false, *hip == *cuda_1);
ASSERT_EQ(true, omp->memory_accessible(omp2));
ASSERT_EQ(true, hip->memory_accessible(hip2));
ASSERT_EQ(true, cuda->memory_accessible(cuda2));
ASSERT_EQ(false, hip->memory_accessible(hip_1));
ASSERT_EQ(false, cuda->memory_accessible(hip_1));
ASSERT_EQ(false, cuda->memory_accessible(cuda_1));
ASSERT_EQ(false, hip->memory_accessible(cuda_1));
}


Expand Down
28 changes: 15 additions & 13 deletions dpcpp/base/executor.dp.cpp
Expand Up @@ -80,7 +80,7 @@ void OmpExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes,
bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const
{
auto device = detail::get_devices(
dest_exec->getdevice_type())[dest_exec->get_device_id()];
dest_exec->get_device_type())[dest_exec->get_device_id()];
return device.is_host() || device.is_cpu();
}

Expand Down Expand Up @@ -177,9 +177,11 @@ bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const
auto device = detail::get_devices(device_type_)[device_id_];
auto other_device = detail::get_devices(
dest_exec->get_device_type())[dest_exec->get_device_id()];
return device.get_info<cl::sycl::info::device::device_type>() ==
other_device.get_info<cl::sycl::info::device::device_type>() &&
device.get() == other_device.get();
return ((device.is_host() || device.is_cpu()) &&
(other_device.is_host() || other_device.is_cpu())) ||
(device.get_info<cl::sycl::info::device::device_type>() ==
other_device.get_info<cl::sycl::info::device::device_type>() &&
device.get() == other_device.get());
}


Expand All @@ -200,11 +202,13 @@ void DpcppExecutor::set_device_property()
{
assert(device_id_ < DpcppExecutor::get_num_devices(device_type_));
auto device = detail::get_devices(device_type_)[device_id_];
try {
subgroup_sizes_ =
device.get_info<cl::sycl::info::device::sub_group_sizes>();
} catch (cl::sycl::runtime_error &err) {
GKO_NOT_SUPPORTED(device);
if (!device.is_host()) {
try {
subgroup_sizes_ =
device.get_info<cl::sycl::info::device::sub_group_sizes>();
} catch (cl::sycl::runtime_error &err) {
GKO_NOT_SUPPORTED(device);
}
}
num_computing_units_ =
device.get_info<sycl::info::device::max_compute_units>();
Expand All @@ -214,10 +218,8 @@ void DpcppExecutor::set_device_property()
for (std::size_t i = 0; i < 3; i++) {
max_workitem_sizes_.push_back(max_workitem_sizes[i]);
}
if (!device.is_host()) {
max_workgroup_size_ =
device.get_info<sycl::info::device::max_work_group_size>();
}
max_workgroup_size_ =
device.get_info<sycl::info::device::max_work_group_size>();
// Here we declare the queue with the property `in_order` which ensures the
// kernels are executed in the submission order. Otherwise, calls to
// `wait()` would be needed after every call to a DPC++ function or kernel.
Expand Down
17 changes: 9 additions & 8 deletions include/ginkgo/core/base/executor.hpp
Expand Up @@ -634,14 +634,15 @@ class Executor : public log::EnableLogging<Executor> {
virtual void synchronize() const = 0;

/**
* Overload the equal-to operator which verifies whether the executors share
* the same memory.
* Verifies whether the executors share the same memory.
*
* @param other the other Executor to compare against
*
* @return whether the executors this and other share the same memory.
*/
bool operator==(const Executor &other) const
bool memory_accessible(const std::shared_ptr<const Executor> &other) const
{
return this->verify_memory_from(other);
return this->verify_memory_from(other.get());
}

protected:
Expand Down Expand Up @@ -702,7 +703,7 @@ class Executor : public log::EnableLogging<Executor> {
*
* @return whether this executor and src_exec share the same memory.
*/
virtual bool verify_memory_from(const Executor &src_exec) const = 0;
virtual bool verify_memory_from(const Executor *src_exec) const = 0;

/**
* @internal
Expand All @@ -718,7 +719,7 @@ class Executor : public log::EnableLogging<Executor> {

GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_VERIFY_MEMORY_TO);

GKO_ENABLE_VERIFY_MEMORY_TO(ReferenceExecutor);
GKO_ENABLE_VERIFY_MEMORY_TO(ReferenceExecutor, ref);

#undef GKO_ENABLE_VERIFY_MEMORY_TO

Expand Down Expand Up @@ -870,9 +871,9 @@ class ExecutorBase : public Executor {
src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr);
}

bool verify_memory_from(const Executor &src_exec) const override
bool verify_memory_from(const Executor *src_exec) const override
{
return src_exec.verify_memory_to(self());
return src_exec->verify_memory_to(self());
}

private:
Expand Down
2 changes: 1 addition & 1 deletion include/ginkgo/core/base/temporary_clone.hpp
Expand Up @@ -140,7 +140,7 @@ class temporary_clone {
*/
explicit temporary_clone(std::shared_ptr<const Executor> exec, pointer ptr)
{
if (*ptr->get_executor() == *exec) {
if (ptr->get_executor()->memory_accessible(exec)) {
// just use the object we already have
handle_ = handle_type(ptr, null_deleter<T>());
} else {
Expand Down

0 comments on commit 78f8a62

Please sign in to comment.