diff --git a/SYCL/Basic/kernel_invoke_runtime_properties.cpp b/SYCL/Basic/kernel_invoke_runtime_properties.cpp new file mode 100644 index 0000000000..a6a5871b68 --- /dev/null +++ b/SYCL/Basic/kernel_invoke_runtime_properties.cpp @@ -0,0 +1,226 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t2.out +// RUN: %CPU_RUN_PLACEHOLDER %t2.out +// RUN: %GPU_RUN_PLACEHOLDER %t2.out +// RUN: %ACC_RUN_PLACEHOLDER %t2.out + +#include + +using namespace cl::sycl; + +class k0; +class k1; +class k2; +class k3; +class k4; +class k5; +class k6; +class k7; + +int main() { + queue myQueue; + property_list Props = {}; + + const range<1> globalRange(2); + const range<1> localRange(2); + const id<1> globalOffset(0); + const nd_range<1> ndRange(globalRange, localRange, globalOffset); + + cl::sycl::program Prg0(myQueue.get_context()); + Prg0.build_with_kernel_type(); + cl::sycl::kernel Krn0 = Prg0.get_kernel(); + + assert(Prg0.has_kernel()); + + myQueue.submit([&](cl::sycl::handler &Cgh) { + Cgh.single_task( + Krn0, [=]() {}, Props); + }); + + myQueue.submit([&](cl::sycl::handler &Cgh) { Cgh.single_task(Krn0, Props); }); + + cl::sycl::program Prg1(myQueue.get_context()); + Prg1.build_with_kernel_type(); + cl::sycl::kernel Krn1 = Prg1.get_kernel(); + + assert(Prg1.has_kernel()); + + myQueue.submit([&](cl::sycl::handler &Cgh) { + Cgh.parallel_for( + Krn1, range<1>(2), [=](id<1> index) {}, Props); + }); + + myQueue.submit([&](cl::sycl::handler &Cgh) { + Cgh.parallel_for(range<1>(2), Krn1, Props); + }); + + cl::sycl::program Prg2(myQueue.get_context()); + Prg2.build_with_kernel_type(); + cl::sycl::kernel Krn2 = Prg2.get_kernel(); + + assert(Prg2.has_kernel()); + + myQueue.submit([&](cl::sycl::handler &Cgh) { + Cgh.parallel_for( + Krn2, range<2>(2, 2), [=](id<2> index) {}, Props); + }); + + myQueue.submit([&](cl::sycl::handler &Cgh) { + Cgh.parallel_for(range<2>(2, 2), Krn2, Props); + }); + + cl::sycl::program Prg3(myQueue.get_context()); + Prg3.build_with_kernel_type(); + cl::sycl::kernel Krn3 = Prg3.get_kernel(); + + assert(Prg3.has_kernel()); + + myQueue.submit([&](cl::sycl::handler &Cgh) { + Cgh.parallel_for( + Krn3, range<3>(2, 2, 2), [=](id<3> index) {}, Props); + }); + + myQueue.submit([&](cl::sycl::handler &Cgh) { + Cgh.parallel_for(range<3>(2, 2, 2), Krn3, Props); + }); + cl::sycl::program Prg4(myQueue.get_context()); + Prg4.build_with_kernel_type(); + cl::sycl::kernel Krn4 = Prg4.get_kernel(); + + assert(Prg4.has_kernel()); + + myQueue.submit([&](handler &cgh) { + cgh.parallel_for( + Krn4, range<1>(2), id<1>(0), [=](item<1> index) {}, Props); + }); + + myQueue.submit([&](handler &cgh) { + cgh.parallel_for(range<1>(2), id<1>(0), Krn4, Props); + }); + + cl::sycl::program Prg5(myQueue.get_context()); + Prg5.build_with_kernel_type(); + cl::sycl::kernel Krn5 = Prg5.get_kernel(); + + assert(Prg5.has_kernel()); + + myQueue.submit([&](handler &cgh) { + cgh.parallel_for( + Krn5, ndRange, [=](nd_item<1> index) {}, Props); + }); + + myQueue.submit([&](handler &cgh) { cgh.parallel_for(ndRange, Krn5, Props); }); + + cl::sycl::program Prg6(myQueue.get_context()); + Prg6.build_with_kernel_type(); + cl::sycl::kernel Krn6 = Prg6.get_kernel(); + + assert(Prg6.has_kernel()); + + myQueue.submit([&](handler &cgh) { + cgh.parallel_for_work_group( + Krn6, range<1>(2), [=](group<1> g) {}, Props); + }); + + cl::sycl::program Prg7(myQueue.get_context()); + Prg7.build_with_kernel_type(); + cl::sycl::kernel Krn7 = Prg7.get_kernel(); + + assert(Prg7.has_kernel()); + + myQueue.submit([&](handler &cgh) { + cgh.parallel_for_work_group( + Krn7, range<1>(2), range<1>(2), [=](group<1> g) {}, Props); + }); + + myQueue.submit( + [&](handler &cgh) { cgh.single_task([=]() {}, Props); }); + + myQueue.submit([&](handler &cgh) { + cgh.parallel_for( + range<1>(2), [=](id<1> index) {}, Props); + }); + myQueue.submit([&](handler &cgh) { + cgh.parallel_for( + range<2>(2, 2), [=](id<2> index) {}, Props); + }); + myQueue.submit([&](handler &cgh) { + cgh.parallel_for( + range<3>(2, 2, 2), [=](id<3> index) {}, Props); + }); + myQueue.submit([&](handler &cgh) { + cgh.parallel_for( + range<1>(2), id<1>(0), [=](item<1> index) {}, Props); + }); + + myQueue.submit([&](handler &cgh) { + cgh.parallel_for( + ndRange, [=](nd_item<1> index) {}, Props); + }); + + myQueue.submit([&](handler &cgh) { + cgh.parallel_for_work_group( + range<1>(2), + [=](group<1> g) { + g.parallel_for_work_item(range<1>(2), [&](h_item<1> i) {}); + }, + Props); + }); + + myQueue.submit([&](handler &cgh) { + cgh.parallel_for_work_group( + range<1>(2), range<1>(2), + [=](group<1> g) { + g.parallel_for_work_item(range<1>(2), [&](h_item<1> i) {}); + }, + Props); + }); + + // Reductions currenty doens't work on level zero because of the gpu driver + // issue. + // TODO: enable when fixed. + if (myQueue.get_context().get_platform().get_backend() != + backend::level_zero) { + buffer InBuf1(2); + buffer InBuf2(2); + buffer OutBuf1(1); + buffer OutBuf2(1); + + myQueue + .submit([&](handler &CGH) { + auto In1 = InBuf1.get_access(CGH); + + auto Out1 = OutBuf1.get_access(CGH); + + auto Lambda = [=](nd_item<1> NDIt, auto &Sum1) {}; + + auto Redu1 = + ONEAPI::reduction>(Out1, 0, std::plus{}); + + auto NDR = nd_range<1>{range<1>(2), range<1>{2}}; + CGH.parallel_for(NDR, Redu1, Lambda, Props); + }) + .wait(); + + myQueue + .submit([&](handler &CGH) { + auto In1 = InBuf1.get_access(CGH); + auto In2 = InBuf2.get_access(CGH); + + auto Out1 = OutBuf1.get_access(CGH); + auto Out2 = OutBuf2.get_access(CGH); + + auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2) {}; + + auto Redu1 = + ONEAPI::reduction>(Out1, 0, std::plus{}); + auto Redu2 = + ONEAPI::reduction>(Out2, 0, std::plus{}); + + auto NDR = nd_range<1>{range<1>(2), range<1>{2}}; + CGH.parallel_for(NDR, Redu1, Redu2, Lambda, Props); + }) + .wait(); + } + + return 0; +}