From f7fa6c2a07c943a15fde78a1133e9a3edc73e35c Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 29 Jul 2024 15:17:29 -0700 Subject: [PATCH 1/6] [SYCL][Doc] Add sycl_ext_oneapi_cache_size draft Adds an extension for querying the availability and size of different levels of cache within a device. Signed-off-by: John Pennycook --- .../sycl_ext_oneapi_cache_size.asciidoc | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc new file mode 100644 index 0000000000000..65386b57675af --- /dev/null +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc @@ -0,0 +1,144 @@ += sycl_ext_oneapi_cache_size + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2024 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 8 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + + +== Status + +This is a proposed extension specification, intended to gather community +feedback. Interfaces defined in this specification may not be implemented yet +or may be in a preliminary state. The specification itself may also change in +incompatible ways before it is finalized. *Shipping software products should +not rely on APIs defined in this specification.* + + +== Overview + +SYCL 2020's device partitioning functions acknowledge that devices will +typically have multiple levels of cache (L1, L2, L3 and L4) but its device +queries only allow developers to request information about one (unnamed) level +of cache. + +This extension proposes a mechanism to query the availability and size of +specific levels of cache on individual devices, to help developers with +performance tuning and writing other cache-aware operations. + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_CACHE_SIZES` to one of the values defined in the table +below. Applications can test for the existence of this macro to determine if +the implementation supports this feature, or applications can test the macro's +value to determine which of the extension's features the implementation +supports. + + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|Initial version of this extension. +|=== + + +=== Cache Levels + +A new `enum` is added to describe the four levels of cache: + +[source,c++] +---- +enum class cache_level : /* unspecified */ +{ + L1 = 1, + L2 = 2, + L3 = 3, + L4 = 4, +}; +---- + + +=== Device Queries + +[%header,cols="1,5,5"] +|=== +|Device Descriptor +|Return Type +|Description + +|`template + ext::oneapi::info::device::cache_size` +|`size_t` +|Return the size in bytes of the cache at the requested `cache_level` for this +device, or 0 if this level of cache does not exist on this device. + +|=== + +The set of cache levels for which a device returns a non-zero value is not +required to be continuous (e.g., a device may report an L1 and L3 cache without +reporting an L2 cache). + +[_Note:_ Although this may seem an unusual choice, there are several real-life +devices that name their cache levels such that there are gaps. This extension +allows for this behavior to minimize the cognitive burden to developers of +shifting between the naming of cache levels in hardware specification sheets +and in SYCL. _{endnote}_] + + +== Implementation notes + +This non-normative section provides information about one possible +implementation of this extension. It is not part of the specification of the +extension's API. + +CUDA exposes an `l2CacheSize` property via the `cudaDeviceProp` struct, which +could be used to implement the size query for `cache_level::L2`. + + +== Issues + +None. From 6b908a6031f5376be4f6266e72bc08eec4d344fb Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 30 Jul 2024 06:50:47 -0700 Subject: [PATCH 2/6] Add note about non-L2 cache sizes on NVIDIA GPUs Signed-off-by: John Pennycook --- .../extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc index 65386b57675af..c903079c1ac2e 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc @@ -136,7 +136,8 @@ implementation of this extension. It is not part of the specification of the extension's API. CUDA exposes an `l2CacheSize` property via the `cudaDeviceProp` struct, which -could be used to implement the size query for `cache_level::L2`. +could be used to implement the size query for `cache_level::L2`. Other sizes +could be derived from the Compute Capability. == Issues From 4b764bc093e256a95e9bc5331085dafb4f4a9c82 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 30 Jul 2024 06:59:29 -0700 Subject: [PATCH 3/6] Add note about unknown/unsupported cache sizes --- .../proposed/sycl_ext_oneapi_cache_size.asciidoc | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc index c903079c1ac2e..e7c260e5c188b 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc @@ -142,4 +142,17 @@ could be derived from the Compute Capability. == Issues -None. +. Should devices be able to signal an "unknown"/"unsupported" cache size? ++ +-- +*UNRESOLVED*: +There are many mechanisms that could be used to signal that an implementation +simply does not know anything about a specific level of cache (e.g., +an exception, a special return value, an orthogonal query). However, requiring +implementations to determine and return an accurate size would make the query +significantly easier for developers to use. + +We should revisit this issue once we have implementation experience across +multiple backends, which should give us a better idea of how hard it is to +return accurate cache sizes in practice. +-- From 229dc3248a1bc86df44647d4b5ac107cd089f0f5 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 30 Jul 2024 14:18:50 -0700 Subject: [PATCH 4/6] Make extension experimental and declare namespace Signed-off-by: John Pennycook --- .../extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc index e7c260e5c188b..20d8f4782a84a 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc @@ -82,7 +82,8 @@ supports. |Description |1 -|Initial version of this extension. +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. |=== @@ -92,6 +93,7 @@ A new `enum` is added to describe the four levels of cache: [source,c++] ---- +namespace sycl::ext::oneapi::experimental { enum class cache_level : /* unspecified */ { L1 = 1, @@ -99,6 +101,7 @@ enum class cache_level : /* unspecified */ L3 = 3, L4 = 4, }; +} // namespace sycl::ext::oneapi::experimental ---- From 6b57068588e82f9f02d56865268a2e847ccefca4 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 30 Jul 2024 14:22:05 -0700 Subject: [PATCH 5/6] Adopt new style for info descriptor Signed-off-by: John Pennycook --- .../sycl_ext_oneapi_cache_size.asciidoc | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc index 20d8f4782a84a..884a588696d58 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc @@ -107,19 +107,20 @@ enum class cache_level : /* unspecified */ === Device Queries -[%header,cols="1,5,5"] -|=== -|Device Descriptor -|Return Type -|Description +[source,c++] +---- +namespace sycl::ext::oneapi::experimental::info::device { +template +struct cache_size { + using return_type = size_t; +}; +} // namespace sycl::ext::oneapi::experimental::info::device +---- -|`template - ext::oneapi::info::device::cache_size` -|`size_t` -|Return the size in bytes of the cache at the requested `cache_level` for this -device, or 0 if this level of cache does not exist on this device. +_Remarks_: Template parameter to `device::get_info`. -|=== +_Returns_: The size in bytes of the cache at the requested `cache_level` for +this device, or 0 if this level of cache does not exist on this device. The set of cache levels for which a device returns a non-zero value is not required to be continuous (e.g., a device may report an L1 and L3 cache without From d77dbd41a4e4f88e565fd573bf8a4872cc96aefc Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 31 Jul 2024 06:48:22 -0700 Subject: [PATCH 6/6] Fix namespace in template argument Signed-off-by: John Pennycook --- .../doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc index 884a588696d58..637acafd29276 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_cache_size.asciidoc @@ -110,7 +110,7 @@ enum class cache_level : /* unspecified */ [source,c++] ---- namespace sycl::ext::oneapi::experimental::info::device { -template +template struct cache_size { using return_type = size_t; };