From b9101db84d746ce5eb53530ab3bfa7485bd20202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 22 Aug 2024 16:38:37 +0100 Subject: [PATCH 1/3] [Graph] Add e2e test for launching l0 kernel in host_task --- .../interop-level-zero-launch-kernel.cpp | 15 ++ sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp | 25 +++ sycl/test-e2e/Graph/Inputs/Kernels/saxpy.spv | Bin 0 -> 6484 bytes .../interop-level-zero-launch-kernel.cpp | 169 ++++++++++++++++++ .../interop-level-zero-launch-kernel.cpp | 14 ++ 5 files changed, 223 insertions(+) create mode 100644 sycl/test-e2e/Graph/Explicit/interop-level-zero-launch-kernel.cpp create mode 100644 sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp create mode 100644 sycl/test-e2e/Graph/Inputs/Kernels/saxpy.spv create mode 100644 sycl/test-e2e/Graph/Inputs/interop-level-zero-launch-kernel.cpp create mode 100644 sycl/test-e2e/Graph/RecordReplay/interop-level-zero-launch-kernel.cpp diff --git a/sycl/test-e2e/Graph/Explicit/interop-level-zero-launch-kernel.cpp b/sycl/test-e2e/Graph/Explicit/interop-level-zero-launch-kernel.cpp new file mode 100644 index 0000000000000..0cb40a94db77e --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/interop-level-zero-launch-kernel.cpp @@ -0,0 +1,15 @@ +// REQUIRES: level_zero, level_zero_dev_kit +// L0 plugin incorrectly reports memory leaks because it doesn't take into +// account direct calls to the L0 API. +// UNSUPPORTED: ze_debug +// RUN: %{build} %level_zero_options -o %t.out +// RUN: %{run} %t.out %S/../Inputs/Kernels/saxpy.spv +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/interop-level-zero-launch-kernel.cpp" diff --git a/sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp b/sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp new file mode 100644 index 0000000000000..bf9ecc476e780 --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp @@ -0,0 +1,25 @@ +// Source for saxpy.spv +// Compiled using dpcpp: clang++ saxpy.cpp -fsycl -o saxpy.cpp.out +// Extracted using: clang-offload-extract saxpy.cpp.out + +#include + + + +int main() { + size_t array_size = 16; + + sycl::queue sycl_queue; + uint32_t *X = sycl::malloc_device(array_size, sycl_queue); + uint32_t *Z = sycl::malloc_device(array_size, sycl_queue); + + + sycl_queue.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<1>{array_size}, + [=](sycl::item<1> itemId) { + constexpr uint32_t A = 2; + Z[itemId] = X[itemId] * A + Z[itemId]; }); + }); + return 0; +} diff --git a/sycl/test-e2e/Graph/Inputs/Kernels/saxpy.spv b/sycl/test-e2e/Graph/Inputs/Kernels/saxpy.spv new file mode 100644 index 0000000000000000000000000000000000000000..5d553f9d683a33b97364b4d175d89f7f9d24d1b0 GIT binary patch literal 6484 zcmai%`*&2;6~|A=B!NIhE7TX#0TgX%rvzdkK@@3+5R*ns05z>%CNq;TWpZbjnLvUs ztO(kdZEfqLRq-uoyZT4;=l(ZctL<90{eI59J9Bdr?OkhUzrSanefGI?)466xkIQtp z8(g>hvx}aa+!~jW2=9t{cg)wvd}qw>iTM|sqxiqy;#{w=CXbyAM#hil$AVhD71qMC z;(A?=w~rp32+9lfaJuyaW4y?8D_S+QjbMZab%avU{8a#{Oh%U%w9qV^DTKQYyN= zX2#tw``mohn}|D@h}#SBilJvq0>q__7I#Z1WN_aaV>lZhDZ#p9m9#y;Yhlj89{ zXuprql8Lo4?g{CYnOr&a?}?04iLOy@%{Rg*{we9h{zdWwpRcRwiiN708r4xi_r21A zOQz4W?VNa~u=lt**`s}hl~zu3FsEJnoF3O$&DTd>r(2Lud-v;Kh~zzDUVmC#_xoI= zuXE2z_>#NSY|T$Ar^`L382@tAlt+0P_q+t%I&a{NE~3u{P2RHTFG_EpQN~@8p6uI8 z(vy9=EIrw`E7FsFdl@~|w^!OZ%lh`J>|fTm*H-iSzV*5{ye+-G|8DngA}6Pz@QBlGY+wy2jDs>rqOL)mRz?^xP3SUv9JWms(| zzE4t^=H;wDl`mQAwe&jDsr5P-ocU6%Yu#t*SOfMsJ?`^mIc?{#e(}2G`<5v6`?>0G zJKsISnr(9Uz8O~C$b4(@dt~}|Bxk+8UY-03{P@S&aEUcsWN^PJ0vA1r+arCIx|e?U z`8Y6KVhtA=+}lOqqPKB3_-~F?a94bH81_2l8#Xf7+eKibgT2=48~k_5D%eXk+qa5g z^Id1y$Y9^*u`(X3U3bQRckGmp?@-p}JEXmK#(#e#{kNw4yzhz6h{zvJ>AX{uc)U}o zQ(pl8CH`{Cefo<-5KOH)ex z4I*mhG26E#Ti&iXFXtnOiEs``C(m*YMgEI0v;9bNCk8!x;(F*24>;6!3Pq3MYD9fq zhH+NR&6rCOG)E3IER6MtoK>@zjTYmpC==i1oY0 z@ngq-Z>+QaCS~o7?Ze{KMIP%O6=$#5A@=-Zan|8nz~Z0zVW;;fCnO>dWH#KFK$ zo?*Y9vXlFcY+wGGTQWZ4 zvm%Sv=Y%p>#_y30pT+-0sLWNr2(0a*f62@51O0;kL#IvrfQT4+$j9j&B7EfU7lDCq zp9tT-)i3N@VjVo}^n<<)ir6o>?1%Xo5@BaO&iQYO#eTmC-vN>3ACye*llc!w$8Px- z34|A%-$KJtlAVjv$D5l;`TZ|oKOcF{HMfjD}DonGNf z_LE-N{2h^;7k%M6)Ta(%N3+aKc8Km2(X+?JxhGGGKPBRBT#xrqkj#7bl*rD0S~B;; zX2tBqggxqGG+X6_y(F1)19wXFgoqm8i?Yv%&x)vn8D_3e$GYq2^fEKgd3K6dMAS=P zvZ6;-H}X$J_;~F8oskSKXMju$vCO}%Rh3MP-Kjar*tk=M9ZCk99vXH{GO=JI6O+cS z%a+7$NXK?oWY|r~V3TXuEy=`!jZBPT+dZ9^ZQUyNbFxjx_N=(YpO?&e8P27+Zws=K zgWqBoC3A+v-YBxz3zCV)e^zI;L;PtG_CcM>dhr)T&x$xxYQ%57{;6d8(V=ri=3JR& z=7O1kr)QUWrtdbpFG{xAy(~F7yDv+>B042v=FndfT@ry0x0!oIGWk~9&tjdKLr%`z z&!w+6bH9)sACH~gFC~M^*&!2SJ-4-f701}jy(%4Ba^`+59qi=H{YE;mU?UTg#{R8r ztZ&$_Nye6(x!*|#n_R2=_mYVP8<`lxwz>F&Y{{7$)SX-}+aE=5i0COXw$DFFrWVc{ z+0L6Y;XL5!^B$3n=ctI^+F|jB6Y{8Ja>vB&z4t@OTSfL>8kgK9%Ee^$KO!9;`^t*w z5%|YM;NmBixqeiHo!I2>#zgFk-gi;{q-@lf%s(z2yXD*O#tGTLz;C}BCnb}6NW||3 zI_E$>@rGgheN48zDBU~m-5O2IGvf4aM*O*02IKiyW`7q&^zm(R?$bNs?~0gn^sM+* zk?HSAem~Z^Lmx=~P(%*;N8%rguEzRZ;-84fVK2l|3w7}R#YQ|eC2izfjBOe5^d6Zr z_V~KE?eWc62Inmi`}$Pe_H|9N?ISDsdaT<%K9f$2?c?(}h8ppKpR5r->zK`I{2y9G BV&VV* literal 0 HcmV?d00001 diff --git a/sycl/test-e2e/Graph/Inputs/interop-level-zero-launch-kernel.cpp b/sycl/test-e2e/Graph/Inputs/interop-level-zero-launch-kernel.cpp new file mode 100644 index 0000000000000..0b75567e468ee --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/interop-level-zero-launch-kernel.cpp @@ -0,0 +1,169 @@ +// Test that launching a kernel using level-zero interop in a graph's host_task +// works as expected. + +#include "../graph_common.hpp" +#include +#include +#include + +bool getDevice(device &OutDevice, backend Backend) { + auto Platforms = platform::get_platforms(); + platform L0Platform; + for (auto &Platform : Platforms) { + if (Platform.get_backend() == Backend) { + L0Platform = Platform; + } + } + + auto Devices = L0Platform.get_devices(); + for (auto &Device : Devices) { + if (Device.get_backend() == Backend) { + OutDevice = Device; + return true; + } + } + return false; +} + +std::vector loadSpirvFromFile(std::string FileName) { + std::ifstream SpvStream(FileName, std::ios::binary); + SpvStream.seekg(0, std::ios::end); + size_t sz = SpvStream.tellg(); + SpvStream.seekg(0); + std::vector Spv(sz); + SpvStream.read(reinterpret_cast(Spv.data()), sz); + + return Spv; +} + +int main(int, char **argv) { + + device Device; + if (!getDevice(Device, backend::ext_oneapi_level_zero)) { + // No suitable device found. + return 0; + } + + std::vector Spirv = loadSpirvFromFile(argv[1]); + + const sycl::context Context{Device}; + queue Queue{Context, Device}; + + std::vector HostZ(Size); + std::vector HostX(Size); + std::vector ReferenceZ(Size); + std::vector ReferenceX(Size); + + std::iota(HostZ.begin(), HostZ.end(), 1); + std::iota(HostX.begin(), HostX.end(), 10); + + for (int i = 0; i < Size; ++i) { + ReferenceZ[i] = HostX[i] * 2 + HostZ[i]; + ReferenceX[i] = HostX[i]; + } + + uint32_t *MemZ = malloc_device(Size, Queue); + uint32_t *MemX = malloc_device(Size, Queue); + + exp_ext::command_graph Graph{Context, Device}; + + auto NodeA = add_node( + Graph, Queue, [&](handler &CGH) { CGH.copy(HostZ.data(), MemZ, Size); }); + + auto NodeB = add_node( + Graph, Queue, [&](handler &CGH) { CGH.copy(HostX.data(), MemX, Size); }); + + auto NodeC = add_node( + Graph, Queue, + [&](handler &CGH) { + depends_on_helper(CGH, {NodeA, NodeB}); + CGH.host_task([&]() { + auto ZeContext = get_native(Context); + auto ZeDevice = get_native(Device); + + ze_result_t status; + ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC, + nullptr, + ZE_MODULE_FORMAT_IL_SPIRV, + Spirv.size(), + Spirv.data(), + nullptr, + nullptr}; + ze_module_handle_t hModule; + status = zeModuleCreate(ZeContext, ZeDevice, &moduleDesc, &hModule, + nullptr); + assert(status == ZE_RESULT_SUCCESS); + + ze_kernel_desc_t kernelDesc = { + ZE_STRUCTURE_TYPE_KERNEL_DESC, nullptr, 0, + "_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E5saxpy"}; + ze_kernel_handle_t hKernel; + status = zeKernelCreate(hModule, &kernelDesc, &hKernel); + assert(status == ZE_RESULT_SUCCESS); + + auto ZeCommandQueueDesc = + ze_command_queue_desc_t{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, + nullptr, + 0, + 0, + 0, + ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, + ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; + + ze_command_list_handle_t ZeCommandList; + status = zeCommandListCreateImmediate( + ZeContext, ZeDevice, &ZeCommandQueueDesc, &ZeCommandList); + assert(status == ZE_RESULT_SUCCESS); + + status = zeKernelSetArgumentValue(hKernel, 0, Size * sizeof(uint32_t), + &MemZ); + assert(status == ZE_RESULT_SUCCESS); + status = zeKernelSetArgumentValue(hKernel, 1, Size * sizeof(uint32_t), + &MemX); + assert(status == ZE_RESULT_SUCCESS); + ze_group_count_t ZeGroupCount{Size, 1, 1}; + + zeKernelSetGroupSize(hKernel, 1024, 1, 1); + assert(status == ZE_RESULT_SUCCESS); + + status = zeCommandListAppendLaunchKernel( + ZeCommandList, hKernel, &ZeGroupCount, nullptr, 0, nullptr); + + assert(status == ZE_RESULT_SUCCESS); + + status = zeCommandListHostSynchronize(ZeCommandList, 0); + assert(status == ZE_RESULT_SUCCESS); + }); + }, + NodeA, NodeB); + + auto NodeD = add_node( + Graph, Queue, + [&](handler &CGH) { + depends_on_helper(CGH, NodeC); + CGH.copy(MemZ, HostZ.data(), Size); + }, + NodeC); + + auto NodeE = add_node( + Graph, Queue, + [&](handler &CGH) { + depends_on_helper(CGH, NodeC); + CGH.copy(MemX, HostX.data(), Size); + }, + NodeC); + + auto GraphExec = Graph.finalize(); + Queue.ext_oneapi_graph(GraphExec); + Queue.wait_and_throw(); + + sycl::free(MemZ, Context); + sycl::free(MemX, Context); + + for (uint32_t i = 0; i < Size; ++i) { + assert(check_value(i, ReferenceZ[i], HostZ[i], "HostZ")); + assert(check_value(i, ReferenceX[i], HostX[i], "HostX")); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/RecordReplay/interop-level-zero-launch-kernel.cpp b/sycl/test-e2e/Graph/RecordReplay/interop-level-zero-launch-kernel.cpp new file mode 100644 index 0000000000000..894c35e995152 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/interop-level-zero-launch-kernel.cpp @@ -0,0 +1,14 @@ +// REQUIRES: level_zero, level_zero_dev_kit +// L0 plugin incorrectly reports memory leaks because it doesn't take into +// account direct calls to the L0 API. +// UNSUPPORTED: ze_debug +// RUN: %{build} %level_zero_options -o %t.out +// RUN: %{run} %t.out %S/../Inputs/Kernels/saxpy.spv +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/interop-level-zero-launch-kernel.cpp" From fdd1ff65b71c9fab00ca5bc55855438fe54e725e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 22 Aug 2024 17:44:50 +0100 Subject: [PATCH 2/3] Fix CI issues --- .../interop-level-zero-launch-kernel.cpp | 1 - sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp | 16 +++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/sycl/test-e2e/Graph/Explicit/interop-level-zero-launch-kernel.cpp b/sycl/test-e2e/Graph/Explicit/interop-level-zero-launch-kernel.cpp index 0cb40a94db77e..b283697720201 100644 --- a/sycl/test-e2e/Graph/Explicit/interop-level-zero-launch-kernel.cpp +++ b/sycl/test-e2e/Graph/Explicit/interop-level-zero-launch-kernel.cpp @@ -9,7 +9,6 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} - #define GRAPH_E2E_EXPLICIT #include "../Inputs/interop-level-zero-launch-kernel.cpp" diff --git a/sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp b/sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp index bf9ecc476e780..06c7110877833 100644 --- a/sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp +++ b/sycl/test-e2e/Graph/Inputs/Kernels/saxpy.cpp @@ -2,9 +2,8 @@ // Compiled using dpcpp: clang++ saxpy.cpp -fsycl -o saxpy.cpp.out // Extracted using: clang-offload-extract saxpy.cpp.out -#include - - +#include +#include int main() { size_t array_size = 16; @@ -13,13 +12,12 @@ int main() { uint32_t *X = sycl::malloc_device(array_size, sycl_queue); uint32_t *Z = sycl::malloc_device(array_size, sycl_queue); - sycl_queue.submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::range<1>{array_size}, - [=](sycl::item<1> itemId) { - constexpr uint32_t A = 2; - Z[itemId] = X[itemId] * A + Z[itemId]; }); + cgh.parallel_for(sycl::range<1>{array_size}, + [=](sycl::item<1> itemId) { + constexpr uint32_t A = 2; + Z[itemId] = X[itemId] * A + Z[itemId]; + }); }); return 0; } From 97bf31f796da2d12dffa278c590cdffeb09e642f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 28 Aug 2024 14:00:44 +0100 Subject: [PATCH 3/3] Destroy L0 handles --- .../interop-level-zero-launch-kernel.cpp | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/sycl/test-e2e/Graph/Inputs/interop-level-zero-launch-kernel.cpp b/sycl/test-e2e/Graph/Inputs/interop-level-zero-launch-kernel.cpp index 0b75567e468ee..704c15724b756 100644 --- a/sycl/test-e2e/Graph/Inputs/interop-level-zero-launch-kernel.cpp +++ b/sycl/test-e2e/Graph/Inputs/interop-level-zero-launch-kernel.cpp @@ -89,16 +89,16 @@ int main(int, char **argv) { Spirv.data(), nullptr, nullptr}; - ze_module_handle_t hModule; - status = zeModuleCreate(ZeContext, ZeDevice, &moduleDesc, &hModule, + ze_module_handle_t ZeModule; + status = zeModuleCreate(ZeContext, ZeDevice, &moduleDesc, &ZeModule, nullptr); assert(status == ZE_RESULT_SUCCESS); ze_kernel_desc_t kernelDesc = { ZE_STRUCTURE_TYPE_KERNEL_DESC, nullptr, 0, "_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E5saxpy"}; - ze_kernel_handle_t hKernel; - status = zeKernelCreate(hModule, &kernelDesc, &hKernel); + ze_kernel_handle_t ZeKernel; + status = zeKernelCreate(ZeModule, &kernelDesc, &ZeKernel); assert(status == ZE_RESULT_SUCCESS); auto ZeCommandQueueDesc = @@ -115,24 +115,33 @@ int main(int, char **argv) { ZeContext, ZeDevice, &ZeCommandQueueDesc, &ZeCommandList); assert(status == ZE_RESULT_SUCCESS); - status = zeKernelSetArgumentValue(hKernel, 0, Size * sizeof(uint32_t), - &MemZ); + status = zeKernelSetArgumentValue(ZeKernel, 0, + Size * sizeof(uint32_t), &MemZ); assert(status == ZE_RESULT_SUCCESS); - status = zeKernelSetArgumentValue(hKernel, 1, Size * sizeof(uint32_t), - &MemX); + status = zeKernelSetArgumentValue(ZeKernel, 1, + Size * sizeof(uint32_t), &MemX); assert(status == ZE_RESULT_SUCCESS); ze_group_count_t ZeGroupCount{Size, 1, 1}; - zeKernelSetGroupSize(hKernel, 1024, 1, 1); + zeKernelSetGroupSize(ZeKernel, 1024, 1, 1); assert(status == ZE_RESULT_SUCCESS); status = zeCommandListAppendLaunchKernel( - ZeCommandList, hKernel, &ZeGroupCount, nullptr, 0, nullptr); + ZeCommandList, ZeKernel, &ZeGroupCount, nullptr, 0, nullptr); assert(status == ZE_RESULT_SUCCESS); status = zeCommandListHostSynchronize(ZeCommandList, 0); assert(status == ZE_RESULT_SUCCESS); + + status = zeCommandListDestroy(ZeCommandList); + assert(status == ZE_RESULT_SUCCESS); + + status = zeKernelDestroy(ZeKernel); + assert(status == ZE_RESULT_SUCCESS); + + status = zeModuleDestroy(ZeModule); + assert(status == ZE_RESULT_SUCCESS); }); }, NodeA, NodeB);