-
Notifications
You must be signed in to change notification settings - Fork 794
[SYCL][Graph] Add e2e test for launching an l0 kernel in a host_task #15170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
14 changes: 14 additions & 0 deletions
14
sycl/test-e2e/Graph/Explicit/interop-level-zero-launch-kernel.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| // REQUIRES: level_zero, level_zero_dev_kit | ||
| // L0 plugin incorrectly reports memory leaks because it doesn't take into | ||
| // account direct calls to the L0 API. | ||
| // UNSUPPORTED: ze_debug | ||
| // RUN: %{build} %level_zero_options -o %t.out | ||
| // RUN: %{run} %t.out %S/../Inputs/Kernels/saxpy.spv | ||
| // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG | ||
| // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} | ||
| // Extra run to check for immediate-command-list in Level Zero | ||
| // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} | ||
|
|
||
| #define GRAPH_E2E_EXPLICIT | ||
|
|
||
| #include "../Inputs/interop-level-zero-launch-kernel.cpp" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| // Source for saxpy.spv | ||
| // Compiled using dpcpp: clang++ saxpy.cpp -fsycl -o saxpy.cpp.out | ||
| // Extracted using: clang-offload-extract saxpy.cpp.out | ||
|
|
||
| #include <sycl/detail/core.hpp> | ||
| #include <sycl/usm.hpp> | ||
|
|
||
| int main() { | ||
| size_t array_size = 16; | ||
|
|
||
| sycl::queue sycl_queue; | ||
| uint32_t *X = sycl::malloc_device<uint32_t>(array_size, sycl_queue); | ||
| uint32_t *Z = sycl::malloc_device<uint32_t>(array_size, sycl_queue); | ||
|
|
||
| sycl_queue.submit([&](sycl::handler &cgh) { | ||
| cgh.parallel_for<class saxpy>(sycl::range<1>{array_size}, | ||
| [=](sycl::item<1> itemId) { | ||
| constexpr uint32_t A = 2; | ||
| Z[itemId] = X[itemId] * A + Z[itemId]; | ||
| }); | ||
| }); | ||
| return 0; | ||
| } |
Binary file not shown.
178 changes: 178 additions & 0 deletions
178
sycl/test-e2e/Graph/Inputs/interop-level-zero-launch-kernel.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,178 @@ | ||
| // Test that launching a kernel using level-zero interop in a graph's host_task | ||
| // works as expected. | ||
|
|
||
| #include "../graph_common.hpp" | ||
| #include <level_zero/ze_api.h> | ||
| #include <sycl/ext/oneapi/backend/level_zero.hpp> | ||
| #include <sycl/interop_handle.hpp> | ||
|
|
||
| bool getDevice(device &OutDevice, backend Backend) { | ||
| auto Platforms = platform::get_platforms(); | ||
| platform L0Platform; | ||
| for (auto &Platform : Platforms) { | ||
| if (Platform.get_backend() == Backend) { | ||
| L0Platform = Platform; | ||
| } | ||
| } | ||
|
|
||
| auto Devices = L0Platform.get_devices(); | ||
| for (auto &Device : Devices) { | ||
| if (Device.get_backend() == Backend) { | ||
| OutDevice = Device; | ||
| return true; | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| std::vector<uint8_t> loadSpirvFromFile(std::string FileName) { | ||
| std::ifstream SpvStream(FileName, std::ios::binary); | ||
| SpvStream.seekg(0, std::ios::end); | ||
| size_t sz = SpvStream.tellg(); | ||
| SpvStream.seekg(0); | ||
| std::vector<uint8_t> Spv(sz); | ||
| SpvStream.read(reinterpret_cast<char *>(Spv.data()), sz); | ||
|
|
||
| return Spv; | ||
| } | ||
|
|
||
| int main(int, char **argv) { | ||
|
|
||
| device Device; | ||
| if (!getDevice(Device, backend::ext_oneapi_level_zero)) { | ||
| // No suitable device found. | ||
| return 0; | ||
| } | ||
|
|
||
| std::vector<uint8_t> Spirv = loadSpirvFromFile(argv[1]); | ||
|
|
||
| const sycl::context Context{Device}; | ||
| queue Queue{Context, Device}; | ||
|
|
||
| std::vector<uint32_t> HostZ(Size); | ||
| std::vector<uint32_t> HostX(Size); | ||
| std::vector<uint32_t> ReferenceZ(Size); | ||
| std::vector<uint32_t> ReferenceX(Size); | ||
|
|
||
| std::iota(HostZ.begin(), HostZ.end(), 1); | ||
| std::iota(HostX.begin(), HostX.end(), 10); | ||
|
|
||
| for (int i = 0; i < Size; ++i) { | ||
| ReferenceZ[i] = HostX[i] * 2 + HostZ[i]; | ||
| ReferenceX[i] = HostX[i]; | ||
| } | ||
|
|
||
| uint32_t *MemZ = malloc_device<uint32_t>(Size, Queue); | ||
| uint32_t *MemX = malloc_device<uint32_t>(Size, Queue); | ||
|
|
||
| exp_ext::command_graph Graph{Context, Device}; | ||
|
|
||
| auto NodeA = add_node( | ||
| Graph, Queue, [&](handler &CGH) { CGH.copy(HostZ.data(), MemZ, Size); }); | ||
|
|
||
| auto NodeB = add_node( | ||
| Graph, Queue, [&](handler &CGH) { CGH.copy(HostX.data(), MemX, Size); }); | ||
|
|
||
| auto NodeC = add_node( | ||
| Graph, Queue, | ||
| [&](handler &CGH) { | ||
| depends_on_helper(CGH, {NodeA, NodeB}); | ||
| CGH.host_task([&]() { | ||
| auto ZeContext = get_native<backend::ext_oneapi_level_zero>(Context); | ||
| auto ZeDevice = get_native<backend::ext_oneapi_level_zero>(Device); | ||
|
|
||
| ze_result_t status; | ||
| ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC, | ||
| nullptr, | ||
| ZE_MODULE_FORMAT_IL_SPIRV, | ||
| Spirv.size(), | ||
| Spirv.data(), | ||
| nullptr, | ||
| nullptr}; | ||
| ze_module_handle_t ZeModule; | ||
| status = zeModuleCreate(ZeContext, ZeDevice, &moduleDesc, &ZeModule, | ||
| nullptr); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
|
|
||
| ze_kernel_desc_t kernelDesc = { | ||
| ZE_STRUCTURE_TYPE_KERNEL_DESC, nullptr, 0, | ||
| "_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E5saxpy"}; | ||
| ze_kernel_handle_t ZeKernel; | ||
| status = zeKernelCreate(ZeModule, &kernelDesc, &ZeKernel); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
|
|
||
| auto ZeCommandQueueDesc = | ||
| ze_command_queue_desc_t{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, | ||
| nullptr, | ||
| 0, | ||
| 0, | ||
| 0, | ||
| ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, | ||
| ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; | ||
|
|
||
| ze_command_list_handle_t ZeCommandList; | ||
| status = zeCommandListCreateImmediate( | ||
| ZeContext, ZeDevice, &ZeCommandQueueDesc, &ZeCommandList); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
|
|
||
| status = zeKernelSetArgumentValue(ZeKernel, 0, | ||
| Size * sizeof(uint32_t), &MemZ); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
| status = zeKernelSetArgumentValue(ZeKernel, 1, | ||
| Size * sizeof(uint32_t), &MemX); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
| ze_group_count_t ZeGroupCount{Size, 1, 1}; | ||
|
|
||
| zeKernelSetGroupSize(ZeKernel, 1024, 1, 1); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
|
|
||
| status = zeCommandListAppendLaunchKernel( | ||
| ZeCommandList, ZeKernel, &ZeGroupCount, nullptr, 0, nullptr); | ||
|
|
||
| assert(status == ZE_RESULT_SUCCESS); | ||
|
|
||
| status = zeCommandListHostSynchronize(ZeCommandList, 0); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
|
|
||
| status = zeCommandListDestroy(ZeCommandList); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
|
|
||
| status = zeKernelDestroy(ZeKernel); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
|
|
||
| status = zeModuleDestroy(ZeModule); | ||
| assert(status == ZE_RESULT_SUCCESS); | ||
| }); | ||
| }, | ||
| NodeA, NodeB); | ||
|
|
||
| auto NodeD = add_node( | ||
| Graph, Queue, | ||
| [&](handler &CGH) { | ||
| depends_on_helper(CGH, NodeC); | ||
| CGH.copy(MemZ, HostZ.data(), Size); | ||
| }, | ||
| NodeC); | ||
|
|
||
| auto NodeE = add_node( | ||
| Graph, Queue, | ||
| [&](handler &CGH) { | ||
| depends_on_helper(CGH, NodeC); | ||
| CGH.copy(MemX, HostX.data(), Size); | ||
| }, | ||
| NodeC); | ||
|
|
||
| auto GraphExec = Graph.finalize(); | ||
| Queue.ext_oneapi_graph(GraphExec); | ||
| Queue.wait_and_throw(); | ||
|
|
||
| sycl::free(MemZ, Context); | ||
| sycl::free(MemX, Context); | ||
|
|
||
| for (uint32_t i = 0; i < Size; ++i) { | ||
| assert(check_value(i, ReferenceZ[i], HostZ[i], "HostZ")); | ||
| assert(check_value(i, ReferenceX[i], HostX[i], "HostX")); | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
14 changes: 14 additions & 0 deletions
14
sycl/test-e2e/Graph/RecordReplay/interop-level-zero-launch-kernel.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| // REQUIRES: level_zero, level_zero_dev_kit | ||
| // L0 plugin incorrectly reports memory leaks because it doesn't take into | ||
| // account direct calls to the L0 API. | ||
| // UNSUPPORTED: ze_debug | ||
| // RUN: %{build} %level_zero_options -o %t.out | ||
| // RUN: %{run} %t.out %S/../Inputs/Kernels/saxpy.spv | ||
| // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG | ||
| // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} | ||
| // Extra run to check for immediate-command-list in Level Zero | ||
| // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} | ||
|
|
||
| #define GRAPH_E2E_RECORD_REPLAY | ||
|
|
||
| #include "../Inputs/interop-level-zero-launch-kernel.cpp" |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.