From 960c9045cb4cc204a5eb80623dcf048149349302 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Fri, 21 Mar 2025 15:06:45 +0000 Subject: [PATCH 01/11] [SYCL][Graph] Add spec wording for graph-owned memory allocations - Using sycl_ext_oneapi_async_memory_alloc extension - Spec wording for graph support of the feature - Usage guide guidance for library authors - Usage guide examples of explicit and queue recording usage with and without mem pools --- .../sycl_ext_oneapi_graph.asciidoc | 191 +++++++++++++++++- sycl/doc/syclgraph/SYCLGraphUsageGuide.md | 158 +++++++++++++++ 2 files changed, 344 insertions(+), 5 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index e3b1306ef6d8f..7faf37de84653 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -338,6 +338,8 @@ enum class node_type { memadvise, ext_oneapi_barrier, host_task, + async_malloc, + async_free }; class node { @@ -765,6 +767,8 @@ public: void update(node& node); void update(const std::vector& nodes); void update(const command_graph& graph); + + size_t get_required_mem_size() const; }; } // namespace sycl::ext::oneapi::experimental @@ -987,7 +991,8 @@ conditions: ** Nodes must be added in the same order in the two graphs. Nodes may be added via `command_graph::add`, or for a recorded queue via `queue::submit` or queue shortcut functions. -** Corresponding nodes in each graph must be kernels that have the same type: +** Corresponding kernel nodes in each graph must be kernels that have the same +type: *** When the kernel is defined as a lambda, the lambda must be the same. *** When the kernel is defined as a named function object, the kernel class @@ -1011,6 +1016,148 @@ If a node containing a dynamic parameter is updated through the whole graph update API, then any previous updates to the dynamic parameter will be reflected in the new graph. +==== Graph-Owned Memory Allocations [[graph-memory-allocations]] + +It can be desirable for a graph to own and manage memory allocations for memory +associated with commands in the graph. This is made possible by using the +`async_` and `async_free` commands from the +link:../experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc[sycl_ext_oneapi_async_memory_alloc +] extension. These commands can be added to a graph either via queue recording +or explicit graph creation, which will create allocations which are owned and +managed by that specific `command_graph`, and who's lifetimes are tied to the +lifetime of that graph. + +Pointers returned from allocation nodes can be used in other graph nodes in the +same way as regular USM pointers. + +===== API Usage + +Malloc and free nodes can be added to a graph via both the explicit and queue +recording graph APIs using the `async_` free +functions inside a command-group: + +[source,c++] +---- +void* Ptr = nullptr; +size_t AllocSize = 1024; +auto CGF = [&](handler &CGH){ + Ptr = async_malloc(CGH, usm::alloc::device, AllocSize); +} + +// Explicit graph creation +Graph.add(CGF); + +Graph.add([&](handler &CGH){ + async_free(CGH, Ptr); +}); + +// Queue recording +Graph.begin_recording(Queue); +Queue.submit(CGF); +Queue.submit([&](handler &CGH){ + async_free(CGH, Ptr); +}); +---- + +The `async_*` commands which take a queue can also be used with queue +recording, particularly when recording an in-order queue to specify dependencies +as no SYCL event is returned. + +[source,c++] +---- +void* Ptr = nullptr; +size_t AllocSize = 1024; +queue Queue {syclContext, syclDevice, {property::queue::in_order{}}}; + +Graph.begin_recording(Queue); +Ptr = async_malloc(Queue, usm::alloc::device, AllocSize); +async_free(Queue, Ptr); +---- + +===== Supported Features [[allocation-supported-features]] + +Currently only device allocations are supported in graphs. Attempting to +add allocations of any other type to a graph will result in synchronous errors +being thrown with error code `feature_not_supported`. + +===== Restrictions + +The following restrictions apply to any graph containing async malloc or free +nodes: + +* Only one executable graph instance (created by finalizing the modifiable +graph) can be alive at any time, and all copies of that instance (created via +the {crs}[common reference semantics] of the `command_graph` class) must be +destroyed before the graph can be finalized again. Failing to do so will result +in a sychronous error being thrown with error code `invalid` when attempting to +finalize the graph again. +* The graph cannot be used as a sub-graph in another graph. +* Graph memory allocation nodes cannot be updated, and graphs containing these +nodes cannot be updated via <>. + +===== Allocation Lifetime + +The lifetime of graph-owned allocations are tied to the lifetime of the graph +itself. + +It is only valid to use the pointers returned from graph allocation nodes inside +the graph in which they were allocated. Any nodes using these allocations must +be ordered after the allocation node and before the free node for that +allocation. + +It is invalid to use these pointers outside of the owning graph and doing so +will result in undefined behavior. + +===== Behaviour + +The semantics of `async_malloc` and `async_free` within a graph differ from the +eager SYCL usage described in the +link:../experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc[sycl_ext_oneapi_async_memory_alloc] +extension. + +* Graph memory allocations are not made directly from any default or +user-provided Memory Pool. Each graph containing alloc/free nodes maintains its +own pool of memory from which allocations are made. The following properties of +a default or user-provided memory pool provided in calls to +`async_` will be respected for the associated graph +allocations, all other properties will be ignored: + +** The allocation type specified when creating the pool with +`usm::alloc::`, subject to the limitations in the +<> section. + +** `property::memory_pool::read_only` - Guarantee from the user that memory is +only being read from, the implementation may be able to optimize in this case. + +** `property::memory_pool::zero_init` - Allocated memory will be +zero-initialized only once when first allocated. It will not be zero-initialized +again before or during any subsequent executions of the graph. If that is +required by the application it is the responsibility of the user to add the +appropriate commands to the graph to do this. + +* `node_type::async_malloc` nodes within a graph will return a pointer to an +allocation of the provided size. This pointer can then be used in other graph +nodes ordered after that node in the same way any USM pointer would be. ++ +[Note: Returned pointers are not guaranteed to be unique. An implementation may +return the same pointer as a previous `async_malloc` nodes if that pointer was +previously freed via `async_free` at that point in the graph. -- end note] + +* `node_type::async_free` nodes within a graph indicate that a given allocation +is no longer in use. They must be ordered after the associated allocation node. +Attempting to add a free node for an allocation which does not exist in the +graph will result in a synchronous error being thrown with error code `invalid`. + +* Other nodes which use a given graph allocation must be ordered via +dependencies such that they are ordered after the allocation node and before the +free node for a given allocation. It is the user's responsibility to ensure that +dependencies are correct. Using a pointer in a graph command ordered after it +has been freed via an `async_free` node results in undefined behavior. + +The total amount of memory required for graph allocations by an executable graph +can be queried using the `command_graph::get_required_mem_size()` member +function. + ==== Graph Properties [[graph-properties]] ===== No-Cycle-Check Property @@ -1242,6 +1389,9 @@ Exceptions: property for more information. * Throws with error code `invalid` if the type of the command-group is not a kernel execution and a `dynamic_parameter` was registered inside `cgf`. +* Throws with error code `invalid` if the type of the command-group is +`async_malloc` and the `usm::alloc` type of the associated memory pool is not +`usm::alloc::device`. | [source,c++] @@ -1329,10 +1479,12 @@ finalize(const property_list& propList = {}) const; |Synchronous operation that creates a new graph in the executable state with a fixed topology that can be submitted for execution on any queue sharing the -context associated with the graph. It is valid to call this method multiple times -to create subsequent executable graphs. It is also valid to continue to add new -nodes to the modifiable graph instance after calling this function. It is valid -to finalize an empty graph instance with no recorded commands. +context associated with the graph. It is valid to call this member function +multiple times to create subsequent executable graphs, unless the graph contains +<>. It is also valid +to continue to add new nodes to the modifiable graph instance after calling this +function. It is valid to finalize an empty graph instance with no recorded +commands. Constraints: @@ -1354,6 +1506,12 @@ Exceptions: Returns: A new executable graph object which can be submitted to a queue. +Exceptions: + +* Throws with error code `invalid` if the graph contains +<> and the graph has +previously been finalized. + | [source,c++] ---- @@ -1395,6 +1553,19 @@ std::vector get_root_nodes() const; ---- |Returns a list of all nodes in the graph which have no dependencies. +| +[source,c++] +---- +size_t get_required_mem_size() const; +---- +|Returns the total size in bytes of the memory required for +<> in this graph. + +Constraints: + +* This member function is only available when the `command_graph` state is + `graph_state::executable`. + |=== Table {counter: tableNumber}. Member functions of the `command_graph` class for @@ -1525,6 +1696,9 @@ Exceptions: `property::graph::updatable` was not set when the executable graph was created. +* Throws synchronously with error code `invalid` if the graph contains any + <>. + * If any other exception is thrown the state of the graph nodes is undefined. |=== @@ -2122,6 +2296,13 @@ recording mode, as opposed to throwing. This section defines the interaction of `sycl_ext_oneapi_graph` with other extensions. +==== sycl_ext_oneapi_async_memory_alloc + +The APIs defined in +link:../experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc[sycl_ext_oneapi_async_memory_alloc] +are supported for use in graphs. For further details see the section on +<>. + ==== sycl_ext_codeplay_enqueue_native_command The new methods defined by diff --git a/sycl/doc/syclgraph/SYCLGraphUsageGuide.md b/sycl/doc/syclgraph/SYCLGraphUsageGuide.md index d34a248ae91a2..617d593520976 100644 --- a/sycl/doc/syclgraph/SYCLGraphUsageGuide.md +++ b/sycl/doc/syclgraph/SYCLGraphUsageGuide.md @@ -222,6 +222,90 @@ q.submit([&](sycl::handler &CGH) { }); ``` +### Guidance For Library Authors + +In addition to the general SYCL-graph compatibility guidelines there are some +considerations that are more relevant to library authors to be compatible with +SYCL-Graph and allow seamless capturing of library calls in a graph. + +#### Graph-owned Memory Allocations For Temporary Memory + +A common pattern in libraries with specialized SYCL kernels can involve the +allocation and use of temporary memory for those kernels. One approach is custom +allocators which rely on SYCL events to control the lifetime and re-use of this +temporary memory, but these are not compatible with events returned from queue +submissions which are recorded to a graph. Instead the +[sycl_ext_oneapi_async_memory_alloc](../extensions/experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc) +extension can be used which provides similar functionality for eager SYCL usage +as well as compatibility with graphs. + +When captured in a graph calls to these extension functions create graph-owned +memory allocations which are tied to the lifetime of the graph. These +allocations can be created as needed for library kernels and the SYCL runtime +may be able to re-use memory where appropriate to minimize the memory footprint +of the graph. This can avoid the need for a library to manage the lifetime of +these allocations themselves, or be aware of the library calls being recorded to +a graph. + +It is important to set correct dependencies between allocation commands, kernels +that use those allocations, and the calls to free the memory. This allows the +graph to determine when allocations are in-use at a given point in the graph, +and allow for re-using memory for subsequent allocation nodes if those nodes are +ordered after a free command which is no longer in use. It is important to note +that calling `async_free` will not deallocate memory but simply mark it as free +for re-use. + +The following shows a simple example of how these allocations can be used in a +library function which is recorded to a graph: + +```c++ +using namespace sycl; + +// Library code, this example is assuming an out of order SYCL queue +void launchLibraryKernel(queue& SyclQueue){ + size_t TempMemSize = 1024; + void* Ptr = nullptr; + + // Get a pointer to some temporary memory for use in the kernel + // This call creates an allocation node in the graph if this call is being + // recorded. + event AllocEvent = SyclQueue.submit([&](handler& CGH){ + Ptr = sycl_ext::async_malloc(CGH, usm::alloc::device, TempMemSize); + }); + + // Submit the actual library kernel + event KernelEvent = SyclQueue.submit([&](handler& CGH){ + // Mark the allocation as a dependency so that the temporary memory + // is available + CGH.depends_on(AllocEvent); + // Submit a kernel that uses the temp memory in Ptr + CGH.parallel_for(...); + }); + + // Free the memory back to the pool or graph, indicating that it is free to + // be re-used. Memory will not actually be released back to the OS. + SyclQueue.submit([&](handler& CGH){ + // Mark the kernel as a dependency before freeing + CGH.depends_on(KernelEvent); + sycl_ext::async_free(CGH, Ptr); + }); +} + +// Application code +void recordLibraryCall(queue& SyclQueue, sycl_ext::command_graph& Graph){ + Graph.begin_recording(SyclQueue); + // Call into library to record queue commands to the graph + launchLibraryKernel(SyclQueue); + + Graph.end_recording(SyclQueue); +} +``` + +Please see "graph-owned memory allocations" section of the +[sycl_ext_oneapi_graph +specification](../extensions/experimental/sycl_ext_oneapi_graph.asciidoc) for a +complete description of the feature. + ## Code Examples The examples below demonstrate intended usage of the extension, but may not be @@ -680,3 +764,77 @@ execMainGraph.update(updateGraph); // ptrA myQueue.ext_oneapi_graph(execMainGraph); ``` + +### Graph-Owned Memory Allocations + +#### Explicit Graph Example + +Using default memory pool. + +```c++ +using namespace sycl; +namespace sycl_ext = sycl::ext::oneapi::experimental; + +void* Ptr = nullptr; +size_t AllocSize = 1024; +// Add an async_malloc node and capturing the returned pointer in Ptr +auto AllocNode = Graph.add([&](handler& CGH){ + Ptr = sycl_ext::async_malloc(CGH, usm::alloc::device, AllocSize); +}); + +// Use Ptr in another graph node which depends on AllocNode +auto OtherNodeA = Graph.add(..., {property::graph::depends_on{AllocNode}}); +// Use Ptr in another node which has an indirect dependency on AllocNode +auto OtherNodeB = Graph.add(..., {property::graph::depends_on{OtherNodeA}}); + +// Free Ptr, indicating it is no longer in use at this point in the graph, +// with a dependency on any leaf nodes using Ptr +Graph.add([&](handler& CGH){ + sycl_ext::async_free(CGH, Ptr); +}, {property::graph::depends_on{OtherNodeB}}); +``` + +#### Queue Recording Example + +Using user-provided memory pool. + +```c++ +using namespace sycl; +namespace sycl_ext = sycl::ext::oneapi::experimental; + +void* Ptr = nullptr; +size_t AllocSize = 1024; +queue Queue {syclContext, syclDevice}; + +// Device memory pool with zero init property +sycl_ext::memory_pool MemPool{syclContext, syclDevice, usm::alloc::device, + {sycl_ext::property::memory_pool::zero_init{}}}; +Graph.begin_recording(Queue); +// Add an async_malloc node and capture the returned pointer in Ptr, +// zero_init property and usm::alloc kind of pool will be respected but pool +// is otherwise ignored +event AllocEvent = Queue.submit([&](handler& CGH){ + Ptr = sycl_ext::async_malloc_from_pool(CGH, AllocSize, MemPool); +}); + +// Use Ptr in another graph node which depends on AllocNode +event OtherEventA = Queue.submit([&](handler& CGH){ + CGH.depends_on(AllocEvent); + // Do something with Ptr + CGH.parallel_for(...); +}); +// Use Ptr in another node which has an indirect dependency on AllocNode +event OtherEventB = Queue.submit([&](handler& CGH){ + CGH.depends_on(OtherEventA); + // Do something with Ptr + CGH.parallel_for(...); +}); + +// Free Ptr, indicating it is no longer in use at this point in the graph, +// with a dependency on any leaf nodes using Ptr +Queue.submit([&](handler& CGH){ + sycl_ext::async_free(CGH, Ptr); +}); + +Graph.end_recording(Queue); +``` From 39fb08bb2365ce872fe58fcdd147984ef60a98c6 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Tue, 15 Apr 2025 17:12:03 +0100 Subject: [PATCH 02/11] Clarify finalize exception wording --- .../extensions/experimental/sycl_ext_oneapi_graph.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 7faf37de84653..df7f623262148 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1509,8 +1509,8 @@ Returns: A new executable graph object which can be submitted to a queue. Exceptions: * Throws with error code `invalid` if the graph contains -<> and the graph has -previously been finalized. +<> and any instance of +an executable graph created from this modifiable graph is still alive. | [source,c++] From 8dc40758764e4f2c9f42a7fad1559eb80175814e Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Wed, 16 Apr 2025 14:21:09 +0100 Subject: [PATCH 03/11] Addressing review comments - Improve whole-graph update conditions list. - Remove unnecessary non-normative note about returned pointers. - Fix duplicate exception sections in finalize() definition. --- .../sycl_ext_oneapi_graph.asciidoc | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index df7f623262148..e483438e29add 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -983,6 +983,8 @@ Both the source and target graphs for the update must satisfy the following conditions: * Both graphs must have been created with the same device and context. +* Neither graph may have any nodes of type `node_type::async_malloc` or + `node_type::async_free`. * Both graphs must be topologically identical. The graphs are considered topologically identical when: @@ -991,8 +993,9 @@ conditions: ** Nodes must be added in the same order in the two graphs. Nodes may be added via `command_graph::add`, or for a recorded queue via `queue::submit` or queue shortcut functions. -** Corresponding kernel nodes in each graph must be kernels that have the same -type: +** Corresponding nodes in each graph must have the same `node_type`. +** Corresponding nodes of type `node_type::kernel` must have kernels with + identical types: *** When the kernel is defined as a lambda, the lambda must be the same. *** When the kernel is defined as a named function object, the kernel class @@ -1138,10 +1141,6 @@ appropriate commands to the graph to do this. * `node_type::async_malloc` nodes within a graph will return a pointer to an allocation of the provided size. This pointer can then be used in other graph nodes ordered after that node in the same way any USM pointer would be. -+ -[Note: Returned pointers are not guaranteed to be unique. An implementation may -return the same pointer as a previous `async_malloc` nodes if that pointer was -previously freed via `async_free` at that point in the graph. -- end note] * `node_type::async_free` nodes within a graph indicate that a given allocation is no longer in use. They must be ordered after the associated allocation node. @@ -1504,14 +1503,13 @@ Exceptions: * Throws synchronously with error code `feature_not_supported` if the graph contains a command that is not supported by the device. -Returns: A new executable graph object which can be submitted to a queue. - -Exceptions: - -* Throws with error code `invalid` if the graph contains +* Throws synchronously with error code `invalid` if the graph contains <> and any instance of an executable graph created from this modifiable graph is still alive. +Returns: A new executable graph object which can be submitted to a queue. + + | [source,c++] ---- From 36100858a5eb9b603d467e100a9b826e24445585 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Tue, 10 Jun 2025 14:23:56 +0100 Subject: [PATCH 04/11] Addressing review comments - Remove references to removed features from async alloc spec - Improve links to async alloc spec - Minor rewording of spec changes - New usage guide example using in-order queue --- .../sycl_ext_oneapi_graph.asciidoc | 39 +++++++------ sycl/doc/syclgraph/SYCLGraphUsageGuide.md | 57 +++++++++++++++---- 2 files changed, 66 insertions(+), 30 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index e483438e29add..63085278e800e 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1021,14 +1021,16 @@ in the new graph. ==== Graph-Owned Memory Allocations [[graph-memory-allocations]] +:async_alloc_spec: xref:../experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc + It can be desirable for a graph to own and manage memory allocations for memory associated with commands in the graph. This is made possible by using the `async_` and `async_free` commands from the -link:../experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc[sycl_ext_oneapi_async_memory_alloc -] extension. These commands can be added to a graph either via queue recording -or explicit graph creation, which will create allocations which are owned and -managed by that specific `command_graph`, and who's lifetimes are tied to the -lifetime of that graph. +{async_alloc_spec}[sycl_ext_oneapi_async_memory_alloc] extension. These +commands can be added to a graph either via queue recording or explicit graph +creation, which will create allocations which are owned and managed by that +specific `command_graph`, and who's lifetimes are tied to the lifetime of that +graph. Pointers returned from allocation nodes can be used in other graph nodes in the same way as regular USM pointers. @@ -1079,9 +1081,9 @@ async_free(Queue, Ptr); ===== Supported Features [[allocation-supported-features]] -Currently only device allocations are supported in graphs. Attempting to -add allocations of any other type to a graph will result in synchronous errors -being thrown with error code `feature_not_supported`. +Currently only device allocations are supported. Attempting to add allocations +of any other type to a graph will result in synchronous errors being thrown +with error code `feature_not_supported`. ===== Restrictions @@ -1091,13 +1093,14 @@ nodes: * Only one executable graph instance (created by finalizing the modifiable graph) can be alive at any time, and all copies of that instance (created via the {crs}[common reference semantics] of the `command_graph` class) must be -destroyed before the graph can be finalized again. Failing to do so will result -in a sychronous error being thrown with error code `invalid` when attempting to -finalize the graph again. +destroyed before the graph can be finalized again. * The graph cannot be used as a sub-graph in another graph. * Graph memory allocation nodes cannot be updated, and graphs containing these nodes cannot be updated via <>. +Attempting to perform any of the above operations will result in a sychronous +error being thrown with error code `invalid`. + ===== Allocation Lifetime The lifetime of graph-owned allocations are tied to the lifetime of the graph @@ -1106,7 +1109,7 @@ itself. It is only valid to use the pointers returned from graph allocation nodes inside the graph in which they were allocated. Any nodes using these allocations must be ordered after the allocation node and before the free node for that -allocation. +allocation. Failure to do so will result in undefined behavior. It is invalid to use these pointers outside of the owning graph and doing so will result in undefined behavior. @@ -1115,8 +1118,7 @@ will result in undefined behavior. The semantics of `async_malloc` and `async_free` within a graph differ from the eager SYCL usage described in the -link:../experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc[sycl_ext_oneapi_async_memory_alloc] -extension. +{async_alloc_spec}[sycl_ext_oneapi_async_memory_alloc] extension. * Graph memory allocations are not made directly from any default or user-provided Memory Pool. Each graph containing alloc/free nodes maintains its @@ -1129,9 +1131,6 @@ allocations, all other properties will be ignored: `usm::alloc::`, subject to the limitations in the <> section. -** `property::memory_pool::read_only` - Guarantee from the user that memory is -only being read from, the implementation may be able to optimize in this case. - ** `property::memory_pool::zero_init` - Allocated memory will be zero-initialized only once when first allocated. It will not be zero-initialized again before or during any subsequent executions of the graph. If that is @@ -1388,9 +1387,9 @@ Exceptions: property for more information. * Throws with error code `invalid` if the type of the command-group is not a kernel execution and a `dynamic_parameter` was registered inside `cgf`. -* Throws with error code `invalid` if the type of the command-group is -`async_malloc` and the `usm::alloc` type of the associated memory pool is not -`usm::alloc::device`. +* Throws with error code `invalid` if the type of the command contained in the + command-group is `async_malloc` and the `usm::alloc` type of the associated + memory pool is not `usm::alloc::device`. | [source,c++] diff --git a/sycl/doc/syclgraph/SYCLGraphUsageGuide.md b/sycl/doc/syclgraph/SYCLGraphUsageGuide.md index 617d593520976..69f736fd3a3ec 100644 --- a/sycl/doc/syclgraph/SYCLGraphUsageGuide.md +++ b/sycl/doc/syclgraph/SYCLGraphUsageGuide.md @@ -121,7 +121,6 @@ strategy could be employed but instead using a pool of graphs to hide the potential host-synchronization caused when updating and increase device occupancy. - ### Recording Library Calls #### A Note On Library Compatibility @@ -247,15 +246,16 @@ of the graph. This can avoid the need for a library to manage the lifetime of these allocations themselves, or be aware of the library calls being recorded to a graph. -It is important to set correct dependencies between allocation commands, kernels -that use those allocations, and the calls to free the memory. This allows the -graph to determine when allocations are in-use at a given point in the graph, -and allow for re-using memory for subsequent allocation nodes if those nodes are -ordered after a free command which is no longer in use. It is important to note -that calling `async_free` will not deallocate memory but simply mark it as free -for re-use. +It is important to ensure correct dependencies between allocation commands, +kernels that use those allocations, and the calls to free the memory. This +allows the graph to determine when allocations are in-use at a given point in +the graph, and allow for re-using memory for subsequent allocation nodes if +those nodes are ordered after a free command which is no longer in use. + +It is important to note that calling `async_free` will not deallocate memory +but simply mark it as free for re-use. -The following shows a simple example of how these allocations can be used in a +The following shows a simple example of how these allocations can be used in a library function which is recorded to a graph: ```c++ @@ -810,7 +810,7 @@ queue Queue {syclContext, syclDevice}; sycl_ext::memory_pool MemPool{syclContext, syclDevice, usm::alloc::device, {sycl_ext::property::memory_pool::zero_init{}}}; Graph.begin_recording(Queue); -// Add an async_malloc node and capture the returned pointer in Ptr, +// Add an async_malloc node and capture the returned pointer in Ptr, // zero_init property and usm::alloc kind of pool will be respected but pool // is otherwise ignored event AllocEvent = Queue.submit([&](handler& CGH){ @@ -833,8 +833,45 @@ event OtherEventB = Queue.submit([&](handler& CGH){ // Free Ptr, indicating it is no longer in use at this point in the graph, // with a dependency on any leaf nodes using Ptr Queue.submit([&](handler& CGH){ + CGH.depends_on(OtherEventB); sycl_ext::async_free(CGH, Ptr); }); Graph.end_recording(Queue); ``` + +#### In-Order Queue Recording Example + +Using an in-order queue and the event-less async alloc functions. + +```c++ +using namespace sycl; +namespace sycl_ext = sycl::ext::oneapi::experimental; + +void* Ptr = nullptr; +size_t AllocSize = 1024; +queue Queue {syclContext, syclDevice, {property::queue::in_order{}}}; + +Graph.begin_recording(Queue); +// Add an async_malloc node and capturing the returned pointer in Ptr +Ptr = sycl_ext::async_malloc(Queue, usm::alloc::device, AllocSize); + +// Use Ptr in another graph node which has an in-order dependency on the +// allocation node +Queue.submit([&](handler& CGH){ + // Do something with Ptr + CGH.parallel_for(...); +}); +// Use Ptr in another node which has an in-order dependency on the +// previous kernel. +Queue.submit([&](handler& CGH){ + // Do something with Ptr + CGH.parallel_for(...); +}); + +// Free Ptr, indicating it is no longer in use at this point in the graph, +// with an in-order dependency on the previous kernel. +sycl_ext::async_free(Queue, Ptr); + +Graph.end_recording(Queue); +``` From 1e9ae5517638a4c1419dc09c8edd6a39d42a4d7c Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Tue, 17 Jun 2025 13:44:35 +0100 Subject: [PATCH 05/11] Addressing review comments - Fix broken links to base spec - Make get_required_mem_size noexcept - Minor wording improvements - Add end_recording to example snippets --- .../sycl_ext_oneapi_graph.asciidoc | 40 ++++++++++--------- sycl/doc/syclgraph/SYCLGraphUsageGuide.md | 2 +- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index e10b7435c87f6..9d75277527ddc 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -727,7 +727,7 @@ public: void update(const std::vector& nodes); void update(const command_graph& graph); - size_t get_required_mem_size() const; + size_t get_required_mem_size() const noexcept; }; } // namespace sycl::ext::oneapi::experimental @@ -980,7 +980,7 @@ in the new graph. ==== Graph-Owned Memory Allocations [[graph-memory-allocations]] -:async_alloc_spec: xref:../experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc +:async_alloc_spec: xref:../proposed/sycl_ext_oneapi_async_memory_alloc.asciidoc It can be desirable for a graph to own and manage memory allocations for memory associated with commands in the graph. This is made possible by using the @@ -1021,9 +1021,10 @@ Queue.submit(CGF); Queue.submit([&](handler &CGH){ async_free(CGH, Ptr); }); +Graph.end_recording(Queue); ---- -The `async_*` commands which take a queue can also be used with queue +The `async_*` functions which take a queue can also be used with queue recording, particularly when recording an in-order queue to specify dependencies as no SYCL event is returned. @@ -1036,6 +1037,7 @@ queue Queue {syclContext, syclDevice, {property::queue::in_order{}}}; Graph.begin_recording(Queue); Ptr = async_malloc(Queue, usm::alloc::device, AllocSize); async_free(Queue, Ptr); +Graph.end_recording(Queue); ---- ===== Supported Features [[allocation-supported-features]] @@ -1049,10 +1051,11 @@ with error code `feature_not_supported`. The following restrictions apply to any graph containing async malloc or free nodes: -* Only one executable graph instance (created by finalizing the modifiable -graph) can be alive at any time, and all copies of that instance (created via -the {crs}[common reference semantics] of the `command_graph` class) must be -destroyed before the graph can be finalized again. +* Only one executable graph instance for a given modifiable graph (created by +finalizing the modifiable graph) can be alive at any time, and all copies of +that instance (created via the {crs}[common reference semantics] of the +`command_graph` class) must be destroyed before the graph can be finalized +again. * The graph cannot be used as a sub-graph in another graph. * Graph memory allocation nodes cannot be updated, and graphs containing these nodes cannot be updated via <>. @@ -1076,15 +1079,15 @@ will result in undefined behavior. ===== Behaviour The semantics of `async_malloc` and `async_free` within a graph differ from the -eager SYCL usage described in the +non-graph usage described in the {async_alloc_spec}[sycl_ext_oneapi_async_memory_alloc] extension. * Graph memory allocations are not made directly from any default or -user-provided Memory Pool. Each graph containing alloc/free nodes maintains its -own pool of memory from which allocations are made. The following properties of -a default or user-provided memory pool provided in calls to -`async_` will be respected for the associated graph -allocations, all other properties will be ignored: +user-provided Memory Pool. Each graph containing async mlloc/free nodes +maintains its own pool of memory from which allocations are made. +The following properties of a default or user-provided memory pool provided in +calls to `async_` will be respected for the associated +graph allocations, all other properties will be ignored: ** The allocation type specified when creating the pool with `usm::alloc::`, subject to the limitations in the @@ -1102,8 +1105,9 @@ nodes ordered after that node in the same way any USM pointer would be. * `node_type::async_free` nodes within a graph indicate that a given allocation is no longer in use. They must be ordered after the associated allocation node. -Attempting to add a free node for an allocation which does not exist in the -graph will result in a synchronous error being thrown with error code `invalid`. +The pointer provided to `async_free` must be the address of a memory allocation +allocated by an async malloc node in the same graph. +Violating these preconditions will result in undefined behavior. * Other nodes which use a given graph allocation must be ordered via dependencies such that they are ordered after the allocation node and before the @@ -1425,7 +1429,7 @@ _Returns:_ A list of all nodes in the graph which have no dependencies. [source,c++] ---- -size_t get_required_mem_size() const; +size_t get_required_mem_size() const noexcept; ---- _Constraints:_ This member function is only available when the `command_graph` state is @@ -1528,7 +1532,7 @@ _Throws:_ `property::graph::updatable` was not set when the executable graph was created. -* Synchronously `exception` with error code `invalid` if the graph contains any +* Synchronous `exception` with error code `invalid` if the graph contains any <>. * If any other `exception` is thrown the state of the graph nodes is undefined. @@ -2061,7 +2065,7 @@ extensions. ==== sycl_ext_oneapi_async_memory_alloc The APIs defined in -link:../experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc[sycl_ext_oneapi_async_memory_alloc] +link:../proposed/sycl_ext_oneapi_async_memory_alloc.asciidoc[sycl_ext_oneapi_async_memory_alloc] are supported for use in graphs. For further details see the section on <>. diff --git a/sycl/doc/syclgraph/SYCLGraphUsageGuide.md b/sycl/doc/syclgraph/SYCLGraphUsageGuide.md index 69f736fd3a3ec..f8ee57f71f77d 100644 --- a/sycl/doc/syclgraph/SYCLGraphUsageGuide.md +++ b/sycl/doc/syclgraph/SYCLGraphUsageGuide.md @@ -234,7 +234,7 @@ allocation and use of temporary memory for those kernels. One approach is custom allocators which rely on SYCL events to control the lifetime and re-use of this temporary memory, but these are not compatible with events returned from queue submissions which are recorded to a graph. Instead the -[sycl_ext_oneapi_async_memory_alloc](../extensions/experimental/sycl_ext_oneapi_async_memory_alloc.asciidoc) +[sycl_ext_oneapi_async_memory_alloc](../extensions/proposed/sycl_ext_oneapi_async_memory_alloc.asciidoc) extension can be used which provides similar functionality for eager SYCL usage as well as compatibility with graphs. From 421bfa774978a96edcf3ecfeb732c01aa8efb426 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Tue, 17 Jun 2025 16:36:01 +0100 Subject: [PATCH 06/11] Add missing error descriptions for prohibited ops on alloc/free nodes --- .../sycl_ext_oneapi_graph.asciidoc | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 9d75277527ddc..621f1508fe631 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1464,6 +1464,8 @@ _Throws:_ created. * An `exception` with error code `invalid` if `node` is not part of the graph. +* An `exception` with error code `invalid` if the type of `node` is either + `node_type::async_malloc` or `node_type::async_free`. * If any other `exception` is thrown the state of the graph node is undefined. [source,c++] @@ -1489,6 +1491,8 @@ _Throws:_ `property::graph::updatable` was not set when the executable graph was created. * An `exception` with error code `invalid` if any node in `nodes` is not part of the graph. +* An `exception` with error code `invalid` if the type of any node in `nodes` is + either `node_type::async_malloc` or `node_type::async_free`. * If any other `exception` is thrown the state of the graph nodes is undefined. [source, c++] @@ -1811,6 +1815,9 @@ as the device and context used on creation of the graph. _Returns:_ An event which represents the command which is submitted to the queue. +_Throws:_ Synchronous `exception` with error code `invalid` if `graph` contains +any <>. + [source,c++] ---- event @@ -1831,6 +1838,9 @@ are the same as the device and context used on creation of the graph. _Returns:_ An event which represents the command which is submitted to the queue. +_Throws:_ Synchronous `exception` with error code `invalid` if `graph` contains +any <>. + [source,c++] ---- event @@ -1851,6 +1861,9 @@ are the same as the device and context used on creation of the graph. _Returns:_ An event which represents the command which is submitted to the queue. +_Throws:_ Synchronous `exception` with error code `invalid` if `graph` contains +any <>. + ==== New Handler Member Functions ===== Additional member functions of the `sycl::handler` class @@ -1866,9 +1879,12 @@ execute at any time. If `graph` is submitted multiple times, dependencies are automatically added by the runtime to prevent concurrent executions of an identical graph. -_Throws:_ Synchronously `exception` with error code `invalid` if the handler +_Throws:_ +* Synchronously `exception` with error code `invalid` if the handler is submitted to a queue which is associated with a device or context that is different from the device and context used on creation of the graph. +* Synchronous `exception` with error code `invalid` if `graph` contains +any <>. [source,c++] ---- From 2612d89f88ed369a644cf4a38cabf99556afb835 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Tue, 24 Jun 2025 16:46:54 +0100 Subject: [PATCH 07/11] Addressing review comments - Change error code for host/shared allocation use - Reword finalize to clarify multiple executable graphs --- .../sycl_ext_oneapi_graph.asciidoc | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 621f1508fe631..04c50884d5553 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1044,7 +1044,7 @@ Graph.end_recording(Queue); Currently only device allocations are supported. Attempting to add allocations of any other type to a graph will result in synchronous errors being thrown -with error code `feature_not_supported`. +with error code `invalid`. ===== Restrictions @@ -1373,14 +1373,16 @@ command_graph finalize(const property_list& propList = {}) const; ---- -_Effects:_ Synchronous operation that creates a new graph in the executable state with a -fixed topology that can be submitted for execution on any queue sharing the -context associated with the graph. It is valid to call this member function -multiple times to create subsequent executable graphs, unless the graph contains -<>. It is also valid -to continue to add new nodes to the modifiable graph instance after calling this -function. It is valid to finalize an empty graph instance with no recorded -commands. +_Effects:_ Synchronous operation that creates a new graph in the executable +state with a fixed topology that can be submitted for execution on any queue +sharing the context associated with the graph. +It is valid to call this member function to create a new executable graph while +another executable graph created from the same modifiable graph already exists, +unless the graph contains +<>. +It is also valid to continue to add new nodes to the modifiable graph instance +after calling this function. It is valid to finalize an empty graph instance +with no recorded commands. _Constraints:_ This member function is only available when the `command_graph` state is `graph_state::modifiable`. From 00e4bab14fbb0dd8f4c5c24d7c020bcae0e2a624 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Wed, 25 Jun 2025 13:23:27 +0100 Subject: [PATCH 08/11] Add open issue for allocations as USM pointers, remove old issues/design --- .../sycl_ext_oneapi_graph.asciidoc | 66 +++++-------------- 1 file changed, 18 insertions(+), 48 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 04c50884d5553..8ef3671aa320b 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -2302,44 +2302,6 @@ as described in <>. === Features Still in Development -==== Memory Allocation Nodes - -There is no provided interface for users to define a USM allocation/free -operation belonging to the scope of the graph. It would be error prone and -non-performant to allocate or free memory as a node executed during graph -submission. Instead, such a memory allocation API needs to provide a way to -return a pointer which won't be valid until the allocation is made on graph -finalization, as allocating at finalization is the only way to benefit from -the known graph scope for optimal memory allocation, and even optimize to -eliminate some allocations entirely. - -Such a deferred allocation strategy presents challenges however, and as a result -we recommend instead that prior to graph construction users perform core SYCL -USM allocations to be used in the graph submission. Before to coming to this -recommendation we considered the following explicit graph building interfaces -for adding a memory allocation owned by the graph: - -1. Allocation function returning a reference to the raw pointer, i.e. `void*&`, - which will be instantiated on graph finalization with the location of the - allocated USM memory. - -2. Allocation function returning a handle to the allocation. Applications use - the handle in node command-group functions to access memory when allocated. - -3. Allocation function returning a pointer to a virtual allocation, only backed - with an actual allocation when graph is finalized or submitted. - -Design 1) has the drawback of forcing users to keep the user pointer variable -alive so that the reference is valid, which is unintuitive and is likely to -result in bugs. - -Design 2) introduces a handle object which has the advantages of being a less -error prone way to provide the pointer to the deferred allocation. However, it -requires kernel changes and introduces an overhead above the raw pointers that -are the advantage of USM. - -Design 3) needs specific backend support for deferred allocation. - ==== Device Specific Graph A modifiable state `command_graph` contains nodes targeting specific devices, @@ -2396,16 +2358,6 @@ Allow an executable graph to contain nodes targeting different devices. introducing into the extension in later revisions. It has been planned for to the extent that the definition of a graph node is device specific. -=== Memory Allocation API - -We would like to provide an API that allows graph scope memory to be -allocated and used in nodes, such that optimizations can be done on -the allocation. No mechanism is currently provided, but see the -section on <> for -some designs being considered. - -**UNRESOLVED:** Trending "yes". Design is under consideration. - === Device Agnostic Graph Explicit API could support device-agnostic graphs that can be submitted @@ -2435,6 +2387,24 @@ block size. the finalize call either extending the basic command graph proposal or layered as a separate extension proposal. +=== Graph-owned allocations as USM pointers + +Currently we describe graph-owned memory allocations as being able to be used +inside graph nodes in the same way as normal USM pointers and are not valid for +used out of graphs. However, they may not be implemented with actual USM +allocations (could be virtual memory or some other approach). + +It would simplify things if we could say that they are USM pointers but it is +not clear that they would function exactly the same. + +**UNRESOLVED:** +Potential unknowns/issues: + +* Would a virtual address work/be reported as a USM pointer for queries such +as `get_pointer_device` or `get_pointer_type`? +* It is expected to be able to deallocate a USM pointer via `sycl::free` but +this would likely not be valid for other implementation approaches. + == Non-implemented features and known issues The following features are not yet supported, and an exception will be thrown From 4947a746512dbd06327d5578ebbb0bab44488677 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Thu, 26 Jun 2025 12:19:02 +0100 Subject: [PATCH 09/11] Fix typos, add missing error wording about alloc nodes in subgraph --- .../extensions/experimental/sycl_ext_oneapi_graph.asciidoc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 8ef3671aa320b..cd819d72753a0 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1391,7 +1391,7 @@ _Returns:_ A new executable graph object which can be submitted to a queue. _Throws:_ -* Synchronously `exception` with error code `feature_not_supported` if +* Synchronous `exception` with error code `feature_not_supported` if the graph contains a command that is not supported by the device. * An `exception` with error code `invalid` if the graph contains <> and any instance @@ -1882,11 +1882,12 @@ are automatically added by the runtime to prevent concurrent executions of an identical graph. _Throws:_ -* Synchronously `exception` with error code `invalid` if the handler +* Synchronous `exception` with error code `invalid` if the handler is submitted to a queue which is associated with a device or context that is different from the device and context used on creation of the graph. * Synchronous `exception` with error code `invalid` if `graph` contains -any <>. +any <> and is being +added as a subgraph. [source,c++] ---- From 2d4c3358a1db8c618d64824c8bfa356cca34f39f Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Wed, 2 Jul 2025 16:42:24 +0100 Subject: [PATCH 10/11] Improve API example, fix queue graph launch errors, fix typos --- .../sycl_ext_oneapi_graph.asciidoc | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index cd819d72753a0..0122484f59bcc 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1004,12 +1004,11 @@ functions inside a command-group: ---- void* Ptr = nullptr; size_t AllocSize = 1024; -auto CGF = [&](handler &CGH){ - Ptr = async_malloc(CGH, usm::alloc::device, AllocSize); -} // Explicit graph creation -Graph.add(CGF); +Graph.add([&](handler &CGH){ + Ptr = async_malloc(CGH, usm::alloc::device, AllocSize); +}); Graph.add([&](handler &CGH){ async_free(CGH, Ptr); @@ -1017,7 +1016,9 @@ Graph.add([&](handler &CGH){ // Queue recording Graph.begin_recording(Queue); -Queue.submit(CGF); +Queue.submit([&](handler &CGH){ + Ptr = async_malloc(CGH, usm::alloc::device, AllocSize); +}); Queue.submit([&](handler &CGH){ async_free(CGH, Ptr); }); @@ -1025,8 +1026,8 @@ Graph.end_recording(Queue); ---- The `async_*` functions which take a queue can also be used with queue -recording, particularly when recording an in-order queue to specify dependencies -as no SYCL event is returned. +recording, particularly when recording an in-order queue, to specify +dependencies as no SYCL event is returned. [source,c++] ---- @@ -1083,7 +1084,7 @@ non-graph usage described in the {async_alloc_spec}[sycl_ext_oneapi_async_memory_alloc] extension. * Graph memory allocations are not made directly from any default or -user-provided Memory Pool. Each graph containing async mlloc/free nodes +user-provided Memory Pool. Each graph containing async malloc/free nodes maintains its own pool of memory from which allocations are made. The following properties of a default or user-provided memory pool provided in calls to `async_` will be respected for the associated @@ -1841,7 +1842,8 @@ are the same as the device and context used on creation of the graph. _Returns:_ An event which represents the command which is submitted to the queue. _Throws:_ Synchronous `exception` with error code `invalid` if `graph` contains -any <>. +any <> and is being +added as a subgraph. [source,c++] ---- @@ -1864,7 +1866,8 @@ are the same as the device and context used on creation of the graph. _Returns:_ An event which represents the command which is submitted to the queue. _Throws:_ Synchronous `exception` with error code `invalid` if `graph` contains -any <>. +any <> and is being +added as a subgraph. ==== New Handler Member Functions From 211589e2d1af429a42053fdfad9654e8b8355ea9 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Fri, 4 Jul 2025 13:25:38 +0100 Subject: [PATCH 11/11] Address review comments - Clarify zero-initialization behavior - Clarify error when recording subgraph with allocations --- .../sycl_ext_oneapi_graph.asciidoc | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 0122484f59bcc..1bb976276167e 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1047,7 +1047,7 @@ Currently only device allocations are supported. Attempting to add allocations of any other type to a graph will result in synchronous errors being thrown with error code `invalid`. -===== Restrictions +===== Restrictions [[allocation-restrictions]] The following restrictions apply to any graph containing async malloc or free nodes: @@ -1095,10 +1095,13 @@ graph allocations, all other properties will be ignored: <> section. ** `property::memory_pool::zero_init` - Allocated memory will be -zero-initialized only once when first allocated. It will not be zero-initialized -again before or during any subsequent executions of the graph. If that is -required by the application it is the responsibility of the user to add the -appropriate commands to the graph to do this. +zero-initialized every time a modifiable graph containing allocations with this +property is finalized into an executable graph (see +<> for the restrictions around +finalization for such graphs). It will not be zero-initialized again before or +during any subsequent executions of the graph. If that is required by the +application it is the responsibility of the user to add the appropriate +commands to the graph to do this. * `node_type::async_malloc` nodes within a graph will return a pointer to an allocation of the provided size. This pointer can then be used in other graph @@ -1818,8 +1821,9 @@ as the device and context used on creation of the graph. _Returns:_ An event which represents the command which is submitted to the queue. -_Throws:_ Synchronous `exception` with error code `invalid` if `graph` contains -any <>. +_Throws:_ Synchronous `exception` with error code `invalid` if the queue is +being recorded to a graph and `graph` contains any +<>. [source,c++] ---- @@ -1841,9 +1845,9 @@ are the same as the device and context used on creation of the graph. _Returns:_ An event which represents the command which is submitted to the queue. -_Throws:_ Synchronous `exception` with error code `invalid` if `graph` contains -any <> and is being -added as a subgraph. +_Throws:_ Synchronous `exception` with error code `invalid` if the queue is +being recorded to a graph and `graph` contains any +<>. [source,c++] ---- @@ -1865,9 +1869,9 @@ are the same as the device and context used on creation of the graph. _Returns:_ An event which represents the command which is submitted to the queue. -_Throws:_ Synchronous `exception` with error code `invalid` if `graph` contains -any <> and is being -added as a subgraph. +_Throws:_ Synchronous `exception` with error code `invalid` if the queue is +being recorded to a graph and `graph` contains any +<>. ==== New Handler Member Functions @@ -1888,9 +1892,9 @@ _Throws:_ * Synchronous `exception` with error code `invalid` if the handler is submitted to a queue which is associated with a device or context that is different from the device and context used on creation of the graph. -* Synchronous `exception` with error code `invalid` if `graph` contains -any <> and is being -added as a subgraph. +* Synchronous `exception` with error code `invalid` if the handler is submitted +to a queue which is being recorded to a graph and `graph` contains any +<>. [source,c++] ----