From 6499314536705cfd18d70bae58de7f1ab4082b2e Mon Sep 17 00:00:00 2001 From: OuadiElfarouki Date: Thu, 18 Jul 2024 15:55:44 +0100 Subject: [PATCH 1/4] enabled device_info caching improving get_device_info behavior --- sycl/doc/syclcompat/README.md | 2 +- sycl/include/syclcompat/device.hpp | 156 +++++++++++---------- sycl/test-e2e/syclcompat/device/device.cpp | 22 ++- 3 files changed, 103 insertions(+), 77 deletions(-) diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md index 127df2d17cac9..24d73f42c0176 100644 --- a/sycl/doc/syclcompat/README.md +++ b/sycl/doc/syclcompat/README.md @@ -1034,7 +1034,7 @@ class device_ext : public sycl::device { void get_memory_info(size_t &free_memory, size_t &total_memory) const; void get_device_info(device_info &out) const; - device_info get_device_info() const; + const device_info &get_device_info() const; void reset(bool print_on_async_exceptions = false, bool in_order = true); sycl::queue *default_queue(); diff --git a/sycl/include/syclcompat/device.hpp b/sycl/include/syclcompat/device.hpp index 4017ee177a279..05cfb2a7f53fa 100644 --- a/sycl/include/syclcompat/device.hpp +++ b/sycl/include/syclcompat/device.hpp @@ -425,52 +425,55 @@ class device_ext : public sycl::device { } void get_device_info(device_info &out) const { - device_info prop; - prop.set_name(get_info().c_str()); + if (!_dev_info) { + std::lock_guard lock(m_mutex); + device_info prop; + prop.set_name(get_info().c_str()); - int major, minor; - get_version(major, minor); - prop.set_major_version(major); - prop.set_minor_version(minor); + int major, minor; + get_version(major, minor); + prop.set_major_version(major); + prop.set_minor_version(minor); - prop.set_max_work_item_sizes( + prop.set_max_work_item_sizes( #if (__SYCL_COMPILER_VERSION && __SYCL_COMPILER_VERSION < 20220902) - // oneAPI DPC++ compiler older than 2022/09/02, where - // max_work_item_sizes is an enum class element - get_info()); + // oneAPI DPC++ compiler older than 2022/09/02, where + // max_work_item_sizes is an enum class element + get_info()); #else - // SYCL 2020-conformant code, max_work_item_sizes is a struct templated - // by an int - get_info>()); + // SYCL 2020-conformant code, max_work_item_sizes is a struct + // templated by an int + get_info>()); #endif - prop.set_host_unified_memory(has(sycl::aspect::usm_host_allocations)); + prop.set_host_unified_memory(has(sycl::aspect::usm_host_allocations)); - prop.set_max_clock_frequency( - get_info()); - prop.set_max_compute_units( - get_info()); - prop.set_max_work_group_size( - get_info()); - prop.set_global_mem_size(get_info()); - prop.set_local_mem_size(get_info()); + prop.set_max_clock_frequency( + get_info()); + prop.set_max_compute_units( + get_info()); + prop.set_max_work_group_size( + get_info()); + prop.set_global_mem_size(get_info()); + prop.set_local_mem_size(get_info()); #if (defined(SYCL_EXT_INTEL_DEVICE_INFO) && SYCL_EXT_INTEL_DEVICE_INFO >= 6) - if (has(sycl::aspect::ext_intel_memory_clock_rate)) { - unsigned int tmp = - get_info(); - if (tmp != 0) - prop.set_memory_clock_rate(1000 * tmp); - } - if (has(sycl::aspect::ext_intel_memory_bus_width)) { - prop.set_memory_bus_width( - get_info()); - } - if (has(sycl::aspect::ext_intel_device_id)) { - prop.set_device_id(get_info()); - } - if (has(sycl::aspect::ext_intel_device_info_uuid)) { - prop.set_uuid(get_info()); - } + if (has(sycl::aspect::ext_intel_memory_clock_rate)) { + unsigned int tmp = + get_info(); + if (tmp != 0) + prop.set_memory_clock_rate(1000 * tmp); + } + if (has(sycl::aspect::ext_intel_memory_bus_width)) { + prop.set_memory_bus_width( + get_info()); + } + if (has(sycl::aspect::ext_intel_device_id)) { + prop.set_device_id( + get_info()); + } + if (has(sycl::aspect::ext_intel_device_info_uuid)) { + prop.set_uuid(get_info()); + } #elif defined(_MSC_VER) && !defined(__clang__) #pragma message("get_device_info: querying memory_clock_rate and \ memory_bus_width are not supported by the compiler used. \ @@ -483,23 +486,23 @@ Use 3200000 kHz as memory_clock_rate default value. \ Use 64 bits as memory_bus_width default value." #endif - size_t max_sub_group_size = 1; - std::vector sub_group_sizes = - get_info(); + size_t max_sub_group_size = 1; + std::vector sub_group_sizes = + get_info(); - for (const auto &sub_group_size : sub_group_sizes) { - if (max_sub_group_size < sub_group_size) - max_sub_group_size = sub_group_size; - } + for (const auto &sub_group_size : sub_group_sizes) { + if (max_sub_group_size < sub_group_size) + max_sub_group_size = sub_group_size; + } - prop.set_max_sub_group_size(max_sub_group_size); + prop.set_max_sub_group_size(max_sub_group_size); - prop.set_max_work_items_per_compute_unit( - get_info()); + prop.set_max_work_items_per_compute_unit( + get_info()); #ifdef SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY - prop.set_max_nd_range_size( - get_info>()); + prop.set_max_nd_range_size( + get_info>()); #else #if defined(_MSC_VER) && !defined(__clang__) #pragma message("get_device_info: querying the maximum number \ @@ -508,31 +511,39 @@ Use 64 bits as memory_bus_width default value." #warning "get_device_info: querying the maximum number of \ work groups is not supported." #endif - int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; - prop.set_max_nd_range_size(max_nd_range_size); + int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; + prop.set_max_nd_range_size(max_nd_range_size); #endif - // Estimates max register size per work group, feel free to update the value - // according to device properties. - prop.set_max_register_size_per_work_group(65536); - - prop.set_global_mem_cache_size( - get_info()); - - prop.set_image1d_max(get_info()); - prop.set_image1d_max(get_info()); - prop.set_image2d_max(get_info(), - get_info()); - prop.set_image3d_max(get_info(), - get_info(), - get_info()); - out = prop; + // Estimates max register size per work group, feel free to update the + // value according to device properties. + prop.set_max_register_size_per_work_group(65536); + + prop.set_global_mem_cache_size( + get_info()); + + prop.set_image1d_max( + get_info()); + prop.set_image1d_max( + get_info()); + prop.set_image2d_max(get_info(), + get_info()); + prop.set_image3d_max(get_info(), + get_info(), + get_info()); + + _dev_info = prop; + out = prop; + } else { + out = *_dev_info; + } } - device_info get_device_info() const { - device_info prop; - get_device_info(prop); - return prop; + const device_info &get_device_info() const { + if (!_dev_info) { + this->get_device_info(*_dev_info); + } + return *_dev_info; } void reset(bool print_on_async_exceptions = false, bool in_order = true) { @@ -683,6 +694,7 @@ Use 64 bits as memory_bus_width default value." std::vector> _queues; mutable std::mutex m_mutex; std::vector _events; + mutable std::optional _dev_info; }; namespace detail { diff --git a/sycl/test-e2e/syclcompat/device/device.cpp b/sycl/test-e2e/syclcompat/device/device.cpp index 0ba1ce6d1e729..723ef5d01e234 100644 --- a/sycl/test-e2e/syclcompat/device/device.cpp +++ b/sycl/test-e2e/syclcompat/device/device.cpp @@ -159,12 +159,26 @@ void test_device_ext_api() { auto major = dev_.get_major_version(); test_major_version(dev_, major); dev_.get_minor_version(); - dev_.get_max_compute_units(); dev_.get_max_clock_frequency(); dev_.get_integrated(); - syclcompat::device_info Info; - dev_.get_device_info(Info); - Info = dev_.get_device_info(); + + int max_cu = dev_.get_max_compute_units(); + int max_wg_size = dev_.get_max_work_group_size(); + size_t global_mem_size = dev_.get_global_mem_size(); + { + syclcompat::device_info Info; + dev_.get_device_info(Info); + assert(Info.get_max_compute_units() == max_cu); + assert(Info.get_max_work_group_size() == max_wg_size); + assert(Info.get_global_mem_size() == global_mem_size); + } + { + syclcompat::device_info Info = dev_.get_device_info(); + assert(Info.get_max_compute_units() == max_cu); + assert(Info.get_max_work_group_size() == max_wg_size); + assert(Info.get_global_mem_size() == global_mem_size); + } + dev_.reset(); auto QueuePtr = dev_.default_queue(); dev_.queues_wait_and_throw(); From 6c34e11cd1120bc79dc1b7db1152b226ffa36688 Mon Sep 17 00:00:00 2001 From: OuadiElfarouki Date: Fri, 19 Jul 2024 16:15:23 +0100 Subject: [PATCH 2/4] simplified previous changes --- sycl/include/syclcompat/device.hpp | 154 ++++++++++++++--------------- 1 file changed, 76 insertions(+), 78 deletions(-) diff --git a/sycl/include/syclcompat/device.hpp b/sycl/include/syclcompat/device.hpp index 05cfb2a7f53fa..12a108c7d93e3 100644 --- a/sycl/include/syclcompat/device.hpp +++ b/sycl/include/syclcompat/device.hpp @@ -425,55 +425,58 @@ class device_ext : public sycl::device { } void get_device_info(device_info &out) const { - if (!_dev_info) { - std::lock_guard lock(m_mutex); - device_info prop; - prop.set_name(get_info().c_str()); + if (_dev_info) { + out = *_dev_info; + return; + } + + std::lock_guard lock(m_mutex); + device_info prop; + prop.set_name(get_info().c_str()); - int major, minor; - get_version(major, minor); - prop.set_major_version(major); - prop.set_minor_version(minor); + int major, minor; + get_version(major, minor); + prop.set_major_version(major); + prop.set_minor_version(minor); - prop.set_max_work_item_sizes( + prop.set_max_work_item_sizes( #if (__SYCL_COMPILER_VERSION && __SYCL_COMPILER_VERSION < 20220902) - // oneAPI DPC++ compiler older than 2022/09/02, where - // max_work_item_sizes is an enum class element - get_info()); + // oneAPI DPC++ compiler older than 2022/09/02, where + // max_work_item_sizes is an enum class element + get_info()); #else - // SYCL 2020-conformant code, max_work_item_sizes is a struct - // templated by an int - get_info>()); + // SYCL 2020-conformant code, max_work_item_sizes is a struct + // templated by an int + get_info>()); #endif - prop.set_host_unified_memory(has(sycl::aspect::usm_host_allocations)); + prop.set_host_unified_memory(has(sycl::aspect::usm_host_allocations)); - prop.set_max_clock_frequency( - get_info()); - prop.set_max_compute_units( - get_info()); - prop.set_max_work_group_size( - get_info()); - prop.set_global_mem_size(get_info()); - prop.set_local_mem_size(get_info()); + prop.set_max_clock_frequency( + get_info()); + prop.set_max_compute_units( + get_info()); + prop.set_max_work_group_size( + get_info()); + prop.set_global_mem_size(get_info()); + prop.set_local_mem_size(get_info()); #if (defined(SYCL_EXT_INTEL_DEVICE_INFO) && SYCL_EXT_INTEL_DEVICE_INFO >= 6) - if (has(sycl::aspect::ext_intel_memory_clock_rate)) { - unsigned int tmp = - get_info(); - if (tmp != 0) - prop.set_memory_clock_rate(1000 * tmp); - } - if (has(sycl::aspect::ext_intel_memory_bus_width)) { - prop.set_memory_bus_width( - get_info()); - } - if (has(sycl::aspect::ext_intel_device_id)) { - prop.set_device_id( - get_info()); - } - if (has(sycl::aspect::ext_intel_device_info_uuid)) { - prop.set_uuid(get_info()); - } + if (has(sycl::aspect::ext_intel_memory_clock_rate)) { + unsigned int tmp = + get_info(); + if (tmp != 0) + prop.set_memory_clock_rate(1000 * tmp); + } + if (has(sycl::aspect::ext_intel_memory_bus_width)) { + prop.set_memory_bus_width( + get_info()); + } + if (has(sycl::aspect::ext_intel_device_id)) { + prop.set_device_id(get_info()); + } + if (has(sycl::aspect::ext_intel_device_info_uuid)) { + prop.set_uuid(get_info()); + } #elif defined(_MSC_VER) && !defined(__clang__) #pragma message("get_device_info: querying memory_clock_rate and \ memory_bus_width are not supported by the compiler used. \ @@ -486,23 +489,23 @@ Use 3200000 kHz as memory_clock_rate default value. \ Use 64 bits as memory_bus_width default value." #endif - size_t max_sub_group_size = 1; - std::vector sub_group_sizes = - get_info(); + size_t max_sub_group_size = 1; + std::vector sub_group_sizes = + get_info(); - for (const auto &sub_group_size : sub_group_sizes) { - if (max_sub_group_size < sub_group_size) - max_sub_group_size = sub_group_size; - } + for (const auto &sub_group_size : sub_group_sizes) { + if (max_sub_group_size < sub_group_size) + max_sub_group_size = sub_group_size; + } - prop.set_max_sub_group_size(max_sub_group_size); + prop.set_max_sub_group_size(max_sub_group_size); - prop.set_max_work_items_per_compute_unit( - get_info()); + prop.set_max_work_items_per_compute_unit( + get_info()); #ifdef SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY - prop.set_max_nd_range_size( - get_info>()); + prop.set_max_nd_range_size( + get_info>()); #else #if defined(_MSC_VER) && !defined(__clang__) #pragma message("get_device_info: querying the maximum number \ @@ -511,32 +514,27 @@ Use 64 bits as memory_bus_width default value." #warning "get_device_info: querying the maximum number of \ work groups is not supported." #endif - int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; - prop.set_max_nd_range_size(max_nd_range_size); + int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; + prop.set_max_nd_range_size(max_nd_range_size); #endif - // Estimates max register size per work group, feel free to update the - // value according to device properties. - prop.set_max_register_size_per_work_group(65536); - - prop.set_global_mem_cache_size( - get_info()); - - prop.set_image1d_max( - get_info()); - prop.set_image1d_max( - get_info()); - prop.set_image2d_max(get_info(), - get_info()); - prop.set_image3d_max(get_info(), - get_info(), - get_info()); - - _dev_info = prop; - out = prop; - } else { - out = *_dev_info; - } + // Estimates max register size per work group, feel free to update the + // value according to device properties. + prop.set_max_register_size_per_work_group(65536); + + prop.set_global_mem_cache_size( + get_info()); + + prop.set_image1d_max(get_info()); + prop.set_image1d_max(get_info()); + prop.set_image2d_max(get_info(), + get_info()); + prop.set_image3d_max(get_info(), + get_info(), + get_info()); + + _dev_info = prop; + out = prop; } const device_info &get_device_info() const { From 3a7008675421f37d54238f108c30261bd0b470b0 Mon Sep 17 00:00:00 2001 From: OuadiElfarouki Date: Tue, 23 Jul 2024 12:17:04 +0100 Subject: [PATCH 3/4] Switched get_device_info() back to return-by-value --- sycl/doc/syclcompat/README.md | 2 +- sycl/include/syclcompat/device.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md index 24d73f42c0176..127df2d17cac9 100644 --- a/sycl/doc/syclcompat/README.md +++ b/sycl/doc/syclcompat/README.md @@ -1034,7 +1034,7 @@ class device_ext : public sycl::device { void get_memory_info(size_t &free_memory, size_t &total_memory) const; void get_device_info(device_info &out) const; - const device_info &get_device_info() const; + device_info get_device_info() const; void reset(bool print_on_async_exceptions = false, bool in_order = true); sycl::queue *default_queue(); diff --git a/sycl/include/syclcompat/device.hpp b/sycl/include/syclcompat/device.hpp index 12a108c7d93e3..4e227a635c44e 100644 --- a/sycl/include/syclcompat/device.hpp +++ b/sycl/include/syclcompat/device.hpp @@ -537,11 +537,11 @@ Use 64 bits as memory_bus_width default value." out = prop; } - const device_info &get_device_info() const { + device_info get_device_info() const { if (!_dev_info) { this->get_device_info(*_dev_info); } - return *_dev_info; + return _dev_info.value(); } void reset(bool print_on_async_exceptions = false, bool in_order = true) { From 576878e7aaca4bf94ddbc2993ba17d242ffc04e5 Mon Sep 17 00:00:00 2001 From: OuadiElfarouki Date: Tue, 23 Jul 2024 16:56:12 +0100 Subject: [PATCH 4/4] removed redundant device_ext tests --- sycl/test-e2e/syclcompat/device/device.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/sycl/test-e2e/syclcompat/device/device.cpp b/sycl/test-e2e/syclcompat/device/device.cpp index 93fb9fcdd0a02..180db72afc9f8 100644 --- a/sycl/test-e2e/syclcompat/device/device.cpp +++ b/sycl/test-e2e/syclcompat/device/device.cpp @@ -165,19 +165,12 @@ void test_device_ext_api() { int max_cu = dev_.get_max_compute_units(); int max_wg_size = dev_.get_max_work_group_size(); size_t global_mem_size = dev_.get_global_mem_size(); - { - syclcompat::device_info Info; - dev_.get_device_info(Info); - assert(Info.get_max_compute_units() == max_cu); - assert(Info.get_max_work_group_size() == max_wg_size); - assert(Info.get_global_mem_size() == global_mem_size); - } - { - syclcompat::device_info Info = dev_.get_device_info(); - assert(Info.get_max_compute_units() == max_cu); - assert(Info.get_max_work_group_size() == max_wg_size); - assert(Info.get_global_mem_size() == global_mem_size); - } + + syclcompat::device_info Info; + dev_.get_device_info(Info); + assert(Info.get_max_compute_units() == max_cu); + assert(Info.get_max_work_group_size() == max_wg_size); + assert(Info.get_global_mem_size() == global_mem_size); dev_.reset(); auto QueuePtr = dev_.default_queue();