diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index da60f8258a0bb..1f740b1b15f9f 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -726,6 +726,7 @@ class __SYCL_EXPORT handler { MNDRDesc.set(std::move(NumWorkItems)); MCGType = detail::CG::KERNEL; extractArgsAndReqs(); + MKernelName = getKernelName(); } #ifdef __SYCL_DEVICE_ONLY__ @@ -1185,6 +1186,7 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); MCGType = detail::CG::KERNEL; extractArgsAndReqs(); + MKernelName = getKernelName(); } void parallel_for(range<1> NumWorkItems, kernel Kernel) { @@ -1218,6 +1220,7 @@ class __SYCL_EXPORT handler { MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); MCGType = detail::CG::KERNEL; extractArgsAndReqs(); + MKernelName = getKernelName(); } /// Defines and invokes a SYCL kernel function for the specified range and @@ -1238,6 +1241,7 @@ class __SYCL_EXPORT handler { MNDRDesc.set(std::move(NDRange)); MCGType = detail::CG::KERNEL; extractArgsAndReqs(); + MKernelName = getKernelName(); } /// Defines and invokes a SYCL kernel function. @@ -1260,9 +1264,10 @@ class __SYCL_EXPORT handler { MNDRDesc.set(range<1>{1}); MKernel = detail::getSyclObjImpl(std::move(Kernel)); MCGType = detail::CG::KERNEL; - if (!MIsHost && !lambdaAndKernelHaveEqualName()) + if (!MIsHost && !lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); - else + MKernelName = getKernelName(); + } else StoreLambda(std::move(KernelFunc)); #endif } @@ -1300,9 +1305,10 @@ class __SYCL_EXPORT handler { MNDRDesc.set(std::move(NumWorkItems)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); MCGType = detail::CG::KERNEL; - if (!MIsHost && !lambdaAndKernelHaveEqualName()) + if (!MIsHost && !lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); - else + MKernelName = getKernelName(); + } else StoreLambda( std::move(KernelFunc)); #endif @@ -1336,9 +1342,10 @@ class __SYCL_EXPORT handler { MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); MCGType = detail::CG::KERNEL; - if (!MIsHost && !lambdaAndKernelHaveEqualName()) + if (!MIsHost && !lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); - else + MKernelName = getKernelName(); + } else StoreLambda( std::move(KernelFunc)); #endif @@ -1373,9 +1380,10 @@ class __SYCL_EXPORT handler { MNDRDesc.set(std::move(NDRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); MCGType = detail::CG::KERNEL; - if (!MIsHost && !lambdaAndKernelHaveEqualName()) + if (!MIsHost && !lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); - else + MKernelName = getKernelName(); + } else StoreLambda( std::move(KernelFunc)); #endif @@ -1791,7 +1799,7 @@ class __SYCL_EXPORT handler { unique_ptr_class MHostKernel; /// Storage for lambda/function when using HostTask unique_ptr_class MHostTask; - detail::OSModuleHandle MOSModuleHandle; + detail::OSModuleHandle MOSModuleHandle = detail::OSUtil::ExeModuleHandle; // Storage for a lambda or function when using InteropTasks std::unique_ptr MInteropTask; /// The list of events that order this operation. diff --git a/sycl/test/basic_tests/handler/handler_set_args.cpp b/sycl/test/basic_tests/handler/handler_set_args.cpp new file mode 100644 index 0000000000000..dcb613ec33627 --- /dev/null +++ b/sycl/test/basic_tests/handler/handler_set_args.cpp @@ -0,0 +1,230 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==--------------- handler_set_args.cpp -------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +constexpr bool UseOffset = true; +constexpr bool NoOffset = false; +const cl::sycl::range<1> Range = 1; + +using AccessorT = cl::sycl::accessor; + +struct SingleTaskFunctor { + SingleTaskFunctor(AccessorT acc) : MAcc(acc) {} + + void operator()() { MAcc[0] = 10; } + + AccessorT MAcc; +}; + +template struct ParallelForRangeIdFunctor { + ParallelForRangeIdFunctor(AccessorT acc) : MAcc(acc) {} + + void operator()(cl::sycl::id<1> id) { MAcc[0] = 10; } + + AccessorT MAcc; +}; + +template struct ParallelForRangeItemFunctor { + ParallelForRangeItemFunctor(AccessorT acc) : MAcc(acc) {} + + void operator()(cl::sycl::item<1> item) { MAcc[0] = 10; } + + AccessorT MAcc; +}; + +struct ParallelForNdRangeFunctor { + ParallelForNdRangeFunctor(AccessorT acc) : MAcc(acc) {} + + void operator()(cl::sycl::nd_item<1> ndItem) { MAcc[0] = 10; } + + AccessorT MAcc; +}; + +template +cl::sycl::kernel getPrebuiltKernel(cl::sycl::queue &queue) { + cl::sycl::program program(queue.get_context()); + program.build_with_kernel_type(); + return program.get_kernel(); +} + +template +void checkApiCall(cl::sycl::queue &queue, kernel_wrapper &&kernelWrapper) { + int result = 0; + { + auto buf = cl::sycl::buffer(&result, Range); + queue.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + kernelWrapper(cgh, acc); + }); + } + assert(result == 10); +} + +int main() { + cl::sycl::queue Queue; + const cl::sycl::id<1> Offset(0); + const cl::sycl::nd_range<1> NdRange(Range, Range); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.single_task(SingleTaskFunctor(acc)); + }); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.parallel_for(Range, ParallelForRangeIdFunctor(acc)); + }); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.parallel_for(Range, Offset, ParallelForRangeIdFunctor(acc)); + }); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.parallel_for(Range, ParallelForRangeItemFunctor(acc)); + }); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.parallel_for(Range, Offset, + ParallelForRangeItemFunctor(acc)); + }); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.parallel_for(NdRange, ParallelForNdRangeFunctor(acc)); + }); + + { + auto preBuiltKernel = getPrebuiltKernel(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.single_task(preBuiltKernel); + }); + } + + { + auto preBuiltKernel = + getPrebuiltKernel>(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for(Range, preBuiltKernel); + }); + } + + { + auto preBuiltKernel = + getPrebuiltKernel>(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for(Range, Offset, preBuiltKernel); + }); + } + + { + auto preBuiltKernel = + getPrebuiltKernel>(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for(Range, preBuiltKernel); + }); + } + + { + auto preBuiltKernel = + getPrebuiltKernel>(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for(Range, Offset, preBuiltKernel); + }); + } + + { + auto preBuiltKernel = getPrebuiltKernel(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for(NdRange, preBuiltKernel); + }); + } + + { + auto preBuiltKernel = getPrebuiltKernel(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.single_task(preBuiltKernel, + [=]() { acc[0] = 10; }); + }); + } + + { + auto preBuiltKernel = + getPrebuiltKernel>(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for( + preBuiltKernel, Range, [=](cl::sycl::id<1> id) { acc[0] = 10; }); + }); + } + + { + auto preBuiltKernel = + getPrebuiltKernel>(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for( + preBuiltKernel, Range, Offset, + [=](cl::sycl::id<1> id) { acc[0] = 10; }); + }); + } + + { + auto preBuiltKernel = + getPrebuiltKernel>(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for( + preBuiltKernel, Range, [=](cl::sycl::item<1> item) { acc[0] = 10; }); + }); + } + + { + auto preBuiltKernel = + getPrebuiltKernel>(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for( + preBuiltKernel, Range, Offset, + [=](cl::sycl::item<1> item) { acc[0] = 10; }); + }); + } + + { + auto preBuiltKernel = getPrebuiltKernel(Queue); + + checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) { + cgh.set_args(acc); + cgh.parallel_for( + preBuiltKernel, NdRange, + [=](cl::sycl::nd_item<1> ndItem) { acc[0] = 10; }); + }); + } + + return 0; +}