diff --git a/SYCL/Basic/accessor/accessor.cpp b/SYCL/Basic/accessor/accessor.cpp index 9c5f0bf62b..cee11ad89f 100644 --- a/SYCL/Basic/accessor/accessor.cpp +++ b/SYCL/Basic/accessor/accessor.cpp @@ -107,7 +107,8 @@ template void TestAccSizeFuncs(const std::vector &vec) { q.submit([&](sycl::handler &cgh) { sycl::accessor accRes(bufRes, cgh); sycl::local_accessor locAcc(vec.size(), cgh); - cgh.single_task([=]() { test(accRes, locAcc); }); + cgh.parallel_for(sycl::nd_range<1>{1, 1}, + [=](sycl::nd_item<1>) { test(accRes, locAcc); }); }); q.wait(); } @@ -120,7 +121,7 @@ template void testLocalAccItersImpl(sycl::handler &cgh, GlobAcc &globAcc, LocAcc &locAcc, bool testConstIter) { if (testConstIter) { - cgh.single_task([=]() { + cgh.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) { size_t Idx = 0; for (auto &It : locAcc) { It = globAcc[Idx++]; @@ -133,7 +134,7 @@ void testLocalAccItersImpl(sycl::handler &cgh, GlobAcc &globAcc, LocAcc &locAcc, globAcc[Idx--] += *It; }); } else { - cgh.single_task([=]() { + cgh.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) { size_t Idx = 0; for (auto It = locAcc.begin(); It != locAcc.end(); It++) *It = globAcc[Idx++] * 2; @@ -991,10 +992,11 @@ int main() { sycl::accessor acc1(buf1, cgh); sycl::accessor acc2(buf2, cgh); acc1.swap(acc2); - cgh.single_task([=]() { - acc1[15] = 4; - acc2[7] = 4; - }); + cgh.parallel_for(sycl::nd_range<1>{1, 1}, + [=](sycl::nd_item<1>) { + acc1[15] = 4; + acc2[7] = 4; + }); }); } assert(vec1[7] == 4 && vec2[15] == 4); @@ -1012,10 +1014,11 @@ int main() { sycl::accessor acc2(buf2, cgh); sycl::local_accessor locAcc1(8, cgh), locAcc2(16, cgh); locAcc1.swap(locAcc2); - cgh.single_task([=]() { - acc1[0] = locAcc1.size(); - acc2[0] = locAcc2.size(); - }); + cgh.parallel_for(sycl::nd_range<1>{1, 1}, + [=](sycl::nd_item<1>) { + acc1[0] = locAcc1.size(); + acc2[0] = locAcc2.size(); + }); }); } assert(size1 == 16 && size2 == 8); @@ -1082,19 +1085,54 @@ int main() { // Explicit block to prompt copy-back to Data { sycl::buffer DataBuffer(&Data, sycl::range<1>(1)); - Queue.submit([&](sycl::handler &CGH) { sycl::accessor Acc(DataBuffer, CGH); sycl::local_accessor LocalAcc(CGH); - CGH.single_task([=]() { - LocalAcc = 64; - Acc = LocalAcc; - }); + CGH.parallel_for(sycl::nd_range<1>{1, 1}, + [=](sycl::nd_item<1>) { + LocalAcc = 64; + Acc = LocalAcc; + }); }); } assert(Data == 64); } + // Throws exception on local_accessors used in single_task + { + constexpr static int size = 1; + sycl::queue Queue; + + try { + Queue.submit([&](sycl::handler &cgh) { + auto local_acc = sycl::local_accessor({size}, cgh); + cgh.single_task([=]() { (void)local_acc; }); + }); + assert(0 && "local accessor must not be used in single task."); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what() << std::endl; + } + } + + // Throws exception on local_accessors used in parallel_for taking a range + // parameter. + { + constexpr static int size = 1; + sycl::queue Queue; + + try { + Queue.submit([&](sycl::handler &cgh) { + auto local_acc = sycl::local_accessor({size}, cgh); + cgh.parallel_for( + sycl::range<1>{size}, [=](sycl::id<1> ID) { (void)local_acc; }); + }); + assert(0 && + "local accessor must not be used in parallel for with range."); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what() << std::endl; + } + } + std::cout << "Test passed" << std::endl; } diff --git a/SYCL/Basic/multi_ptr.hpp b/SYCL/Basic/multi_ptr.hpp index 889f9c0890..502a1180f4 100644 --- a/SYCL/Basic/multi_ptr.hpp +++ b/SYCL/Basic/multi_ptr.hpp @@ -104,11 +104,11 @@ template void testMultPtr() { local_accessor localAccessor(numOfItems, cgh); cgh.parallel_for>(range<1>{10}, [=](id<1> wiID) { + T, IsDecorated>>(nd_range<1>{10, 10}, [=](nd_item<1> wiID) { T private_data[10]; for (size_t i = 0; i < 10; ++i) private_data[i] = 0; - localAccessor[wiID] = 0; + localAccessor[wiID.get_local_id()] = 0; auto ptr_1 = multi_ptr( @@ -166,8 +166,8 @@ template void testMultPtr() { global_ptr ptr_12 = global_ptr(ptr_11); - innerFunc(wiID.get(0), ptr_1, ptr_2, ptr_3, ptr_4, - ptr_5, local_ptr, priv_ptr); + innerFunc(wiID.get_local_id().get(0), ptr_1, ptr_2, + ptr_3, ptr_4, ptr_5, local_ptr, priv_ptr); }); }); } @@ -201,8 +201,8 @@ void testMultPtrArrowOperator() { access::placeholder::false_t> accessorData_3(bufferData_3, cgh); - cgh.single_task>( - [=]() { + cgh.parallel_for>( + sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) { point private_val = 0; auto ptr_1 = diff --git a/SYCL/Basic/multi_ptr_legacy.hpp b/SYCL/Basic/multi_ptr_legacy.hpp index 979c9b7f6c..b6161d3390 100644 --- a/SYCL/Basic/multi_ptr_legacy.hpp +++ b/SYCL/Basic/multi_ptr_legacy.hpp @@ -62,50 +62,51 @@ template void testMultPtr() { accessorData_2(bufferData_2, cgh); local_accessor localAccessor(numOfItems, cgh); - cgh.parallel_for>(range<1>{10}, [=](id<1> - wiID) { - auto ptr_1 = - make_ptr(accessorData_1.get_pointer()); - auto ptr_2 = - make_ptr(accessorData_2.get_pointer()); - auto local_ptr = - make_ptr(localAccessor.get_pointer()); - - // Construct extension pointer from accessors. - auto dev_ptr = - multi_ptr( - accessorData_1); - static_assert( - std::is_same_v, decltype(dev_ptr)>, - "Incorrect type for dev_ptr."); - - // General conversions in multi_ptr class - T *RawPtr = nullptr; - global_ptr ptr_4(RawPtr); - ptr_4 = RawPtr; - - global_ptr ptr_5(accessorData_1); - - global_ptr ptr_6((void *)RawPtr); - - ptr_6 = (void *)RawPtr; - - // Explicit conversions for device_ptr/host_ptr to global_ptr - ext::intel::device_ptr ptr_7((void *)RawPtr); - global_ptr ptr_8 = global_ptr(ptr_7); - ext::intel::host_ptr ptr_9((void *)RawPtr); - global_ptr ptr_10 = global_ptr(ptr_9); - // TODO: need propagation of a7b763b26 patch to acl tool before - // testing these conversions - otherwise the test would fail on - // accelerator device during reversed translation from SPIR-V to - // LLVM IR device_ptr ptr_11(accessorData_1); global_ptr - // ptr_12 = global_ptr(ptr_11); - - innerFunc(wiID.get(0), ptr_1, ptr_2, local_ptr); - }); + cgh.parallel_for>( + nd_range<1>{10, 10}, [=](nd_item<1> wiID) { + auto ptr_1 = make_ptr( + accessorData_1.get_pointer()); + auto ptr_2 = make_ptr( + accessorData_2.get_pointer()); + auto local_ptr = make_ptr( + localAccessor.get_pointer()); + + // Construct extension pointer from accessors. + auto dev_ptr = + multi_ptr( + accessorData_1); + static_assert( + std::is_same_v, decltype(dev_ptr)>, + "Incorrect type for dev_ptr."); + + // General conversions in multi_ptr class + T *RawPtr = nullptr; + global_ptr ptr_4(RawPtr); + ptr_4 = RawPtr; + + global_ptr ptr_5(accessorData_1); + + global_ptr ptr_6((void *)RawPtr); + + ptr_6 = (void *)RawPtr; + + // Explicit conversions for device_ptr/host_ptr to global_ptr + ext::intel::device_ptr ptr_7((void *)RawPtr); + global_ptr ptr_8 = global_ptr(ptr_7); + ext::intel::host_ptr ptr_9((void *)RawPtr); + global_ptr ptr_10 = global_ptr(ptr_9); + // TODO: need propagation of a7b763b26 patch to acl tool before + // testing these conversions - otherwise the test would fail on + // accelerator device during reversed translation from SPIR-V to + // LLVM IR device_ptr ptr_11(accessorData_1); global_ptr + // ptr_12 = global_ptr(ptr_11); + + innerFunc(wiID.get_local_id().get(0), ptr_1, ptr_2, local_ptr); + }); }); } for (size_t i = 0; i < 10; ++i) { @@ -141,35 +142,38 @@ template void testMultPtrArrowOperator() { access::placeholder::false_t> accessorData_4(bufferData_4, cgh); - cgh.single_task>([=]() { - auto ptr_1 = - make_ptr, access::address_space::global_space, - access::decorated::legacy>(accessorData_1.get_pointer()); - auto ptr_2 = - make_ptr, access::address_space::constant_space, - access::decorated::legacy>(accessorData_2.get_pointer()); - auto ptr_3 = - make_ptr, access::address_space::local_space, - access::decorated::legacy>(accessorData_3.get_pointer()); - auto ptr_4 = - make_ptr, - access::address_space::ext_intel_global_device_space, - access::decorated::legacy>(accessorData_4.get_pointer()); - - auto x1 = ptr_1->x; - auto x2 = ptr_2->x; - auto x3 = ptr_3->x; - auto x4 = ptr_4->x; - - static_assert(std::is_same::value, - "Expected decltype(ptr_1->x) == T"); - static_assert(std::is_same::value, - "Expected decltype(ptr_2->x) == T"); - static_assert(std::is_same::value, - "Expected decltype(ptr_3->x) == T"); - static_assert(std::is_same::value, - "Expected decltype(ptr_4->x) == T"); - }); + cgh.parallel_for>( + sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) { + auto ptr_1 = make_ptr, access::address_space::global_space, + access::decorated::legacy>( + accessorData_1.get_pointer()); + auto ptr_2 = + make_ptr, access::address_space::constant_space, + access::decorated::legacy>( + accessorData_2.get_pointer()); + auto ptr_3 = make_ptr, access::address_space::local_space, + access::decorated::legacy>( + accessorData_3.get_pointer()); + auto ptr_4 = + make_ptr, + access::address_space::ext_intel_global_device_space, + access::decorated::legacy>( + accessorData_4.get_pointer()); + + auto x1 = ptr_1->x; + auto x2 = ptr_2->x; + auto x3 = ptr_3->x; + auto x4 = ptr_4->x; + + static_assert(std::is_same::value, + "Expected decltype(ptr_1->x) == T"); + static_assert(std::is_same::value, + "Expected decltype(ptr_2->x) == T"); + static_assert(std::is_same::value, + "Expected decltype(ptr_3->x) == T"); + static_assert(std::is_same::value, + "Expected decltype(ptr_4->x) == T"); + }); }); } } diff --git a/SYCL/DeviceLib/string_test.cpp b/SYCL/DeviceLib/string_test.cpp index 1cffcf3048..a3b485569a 100644 --- a/SYCL/DeviceLib/string_test.cpp +++ b/SYCL/DeviceLib/string_test.cpp @@ -406,22 +406,23 @@ bool kernel_test_memcpy_addr_space(sycl::queue &deviceQueue) { sycl::access::target::device, sycl::access::placeholder::false_t> dst1_acc(buffer3, cgh); - cgh.single_task([=]() { - // memcpy from constant buffer to local buffer - memcpy(local_acc.get_pointer(), src_acc.get_pointer(), 8); - for (size_t idx = 0; idx < 7; ++idx) - local_acc[idx] += 1; - // memcpy from local buffer to global buffer - memcpy(dst_acc.get_pointer(), local_acc.get_pointer(), 8); - char device_buf[16]; - // memcpy from constant buffer to private memory - memcpy(device_buf, src_acc.get_pointer(), 8); - for (size_t idx = 0; idx < 7; ++idx) { - device_buf[idx] += 2; - // memcpy from private to global buffer - memcpy(dst1_acc.get_pointer(), device_buf, 8); - } - }); + cgh.parallel_for( + sycl::nd_range<1>{16, 16}, [=](sycl::nd_item<1>) { + // memcpy from constant buffer to local buffer + memcpy(local_acc.get_pointer(), src_acc.get_pointer(), 8); + for (size_t idx = 0; idx < 7; ++idx) + local_acc[idx] += 1; + // memcpy from local buffer to global buffer + memcpy(dst_acc.get_pointer(), local_acc.get_pointer(), 8); + char device_buf[16]; + // memcpy from constant buffer to private memory + memcpy(device_buf, src_acc.get_pointer(), 8); + for (size_t idx = 0; idx < 7; ++idx) { + device_buf[idx] += 2; + // memcpy from private to global buffer + memcpy(dst1_acc.get_pointer(), device_buf, 8); + } + }); }); } diff --git a/SYCL/DiscardEvents/discard_events_accessors.cpp b/SYCL/DiscardEvents/discard_events_accessors.cpp index ce3570e6ec..46affede73 100644 --- a/SYCL/DiscardEvents/discard_events_accessors.cpp +++ b/SYCL/DiscardEvents/discard_events_accessors.cpp @@ -55,6 +55,7 @@ int main(int Argc, const char *Argv[]) { sycl::property::queue::in_order{}, sycl::ext::oneapi::property::queue::discard_events{}}; sycl::queue Q(props); + sycl::nd_range<1> NDRange(BUFFER_SIZE, BUFFER_SIZE); sycl::range<1> Range(BUFFER_SIZE); RunKernelHelper(Q, [&](int *Harray) { @@ -63,7 +64,7 @@ int main(int Argc, const char *Argv[]) { sycl::local_accessor LocalAcc(LocalMemSize, CGH); CGH.parallel_for( - Range, [=](sycl::item<1> itemID) { + NDRange, [=](sycl::item<1> itemID) { size_t i = itemID.get_id(0); int *Ptr = LocalAcc.get_pointer(); Ptr[i] = i + 5; diff --git a/SYCL/Regression/local-arg-align.cpp b/SYCL/Regression/local-arg-align.cpp index ecd4fea37e..f37162b025 100644 --- a/SYCL/Regression/local-arg-align.cpp +++ b/SYCL/Regression/local-arg-align.cpp @@ -34,7 +34,7 @@ int main(int argc, char *argv[]) { // argument first and the float4 argument second. If the two arguments are // simply laid out consecutively, the float4 argument will not be // correctly aligned. - h.parallel_for(1, [a, b, ares](sycl::id<1> i) { + h.parallel_for(sycl::nd_range<1>{1, 1}, [a, b, ares](sycl::nd_item<1>) { // Get the addresses of the two local buffers ares[0] = (size_t)&a[0]; ares[1] = (size_t)&b[0]; diff --git a/SYCL/Regression/local_accessor_3d_subscript.cpp b/SYCL/Regression/local_accessor_3d_subscript.cpp index d67e4fcb45..d07eeb52d1 100644 --- a/SYCL/Regression/local_accessor_3d_subscript.cpp +++ b/SYCL/Regression/local_accessor_3d_subscript.cpp @@ -23,9 +23,10 @@ int main() { Q.submit([&](sycl::handler &CGH) { sycl::local_accessor LocalMem(sycl::range<3>(1, 1, 1), CGH); auto Acc = Buf.get_access(CGH); - CGH.parallel_for(1, [=](sycl::item<1> It) { - LocalMem[It][It][It] = 42; - Acc[It] = LocalMem[It][It][It]; + CGH.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1> It) { + LocalMem[It.get_local_id()][It.get_local_id()][It.get_local_id()] = 42; + Acc[It.get_local_id()] = + LocalMem[It.get_local_id()][It.get_local_id()][It.get_local_id()]; }); }); } diff --git a/SYCL/Regression/zero_size_local_accessor.cpp b/SYCL/Regression/zero_size_local_accessor.cpp index 6298d8b6db..41641f3970 100644 --- a/SYCL/Regression/zero_size_local_accessor.cpp +++ b/SYCL/Regression/zero_size_local_accessor.cpp @@ -17,7 +17,7 @@ int main() { sycl::queue Q; Q.submit([&](sycl::handler &CGH) { sycl::local_accessor ZeroSizeLocalAcc(sycl::range<1>(0), CGH); - CGH.single_task([=]() { + CGH.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) { if (ZeroSizeLocalAcc.get_range()[0]) ZeroSizeLocalAcc[0] = 1; }); diff --git a/SYCL/XPTI/buffer/accessors.cpp b/SYCL/XPTI/buffer/accessors.cpp index 84b9dc7773..f411cd6896 100644 --- a/SYCL/XPTI/buffer/accessors.cpp +++ b/SYCL/XPTI/buffer/accessors.cpp @@ -21,6 +21,7 @@ int main() { sycl::buffer Buf(3); sycl::range<1> Range{Buf.size()}; + sycl::nd_range<1> NDRange(Buf.size(), Buf.size()); Queue.submit([&](sycl::handler &cgh) { // CHECK: {{[0-9]+}}|Construct accessor|[[BUFFERID]]|[[ACCID1:.+]]|2015|1024|{{.*}}accessors.cpp:[[# @LINE + 1]]:15 @@ -35,7 +36,7 @@ int main() { auto A5 = Buf.get_access(cgh); // CHECK: {{[0-9]+}}|Construct accessor|[[BUFFERID]]|[[ACCID6:.*]]|2014|1029|{{.*}}accessors.cpp:[[# @LINE + 1]]:15 auto A6 = Buf.get_access(cgh); - cgh.parallel_for(Range, [=](sycl::id<1> WIid) { + cgh.parallel_for(NDRange, [=](sycl::id<1> WIid) { (void)A1; (void)A2; (void)A3; diff --git a/SYCL/XPTI/kernel/basic.cpp b/SYCL/XPTI/kernel/basic.cpp index e2283aee24..4a58f7934c 100644 --- a/SYCL/XPTI/kernel/basic.cpp +++ b/SYCL/XPTI/kernel/basic.cpp @@ -49,6 +49,7 @@ int main() { // CHECK:{{[0-9]+}}|Create buffer|[[BUFFERID:[0-9,a-f,x]+]]|0x0|{{i(nt)*}}|4|1|{5,0,0}|{{.*}}.cpp:[[# @LINE + 1]]:24 sycl::buffer Buf(5); sycl::range<1> Range{Buf.size()}; + sycl::nd_range<1> NDRange{Buf.size(), Buf.size()}; short Val = Buf.size(); auto PtrDevice = sycl::malloc_device(7, Queue); auto PtrShared = sycl::malloc_shared(8, Queue); @@ -58,22 +59,19 @@ int main() { auto A1 = Buf.get_access(cgh); // CHECK: {{[0-9]+}}|Construct accessor|0x0|[[ACCID2:.*]]|2016|1026|{{.*}}.cpp:[[# @LINE + 1]]:38 sycl::local_accessor A2(Range, cgh); - // CHECK-OPT:Node create|{{.*}}FillBuffer{{.*}}|{{.*}}.cpp:[[# @LINE - 6 ]]:3|{5, 1, 1}, {0, 0, 0}, {0, 0, 0}, 6 - // CHECK-NOOPT:Node create|{{.*}}FillBuffer{{.*}}|{{.*}}.cpp:[[# @LINE - 7 ]]:3|{5, 1, 1}, {0, 0, 0}, {0, 0, 0}, 12 - cgh.parallel_for( - Range, [=](sycl::id<1> WIid, sycl::kernel_handler kh) { - // CHECK-OPT: arg0 : {1, {{[0-9,a-f,x]+}}, 2, 0} - int h = Val; - // CHECK-OPT: arg1 : {1, {{.*}}0, 20, 1} - A2[WIid[0]] = h; - // CHECK-OPT: arg2 : {0, [[ACCID1]], 4062, 2} - // CHECK-OPT: arg3 : {1, [[ACCID1]], 8, 3} - A1[WIid[0]] = A2[WIid[0]]; - // CHECK-OPT: arg4 : {3, {{.*}}, 8, 4} - PtrDevice[WIid[0]] = WIid[0]; - // CHECK-OPT: arg5 : {3, {{.*}}, 8, 5} - PtrShared[WIid[0]] = PtrDevice[WIid[0]]; - }); + cgh.parallel_for(NDRange, [=](sycl::id<1> WIid) { + // CHECK-OPT: arg0 : {1, {{[0-9,a-f,x]+}}, 2, 0} + int h = Val; + // CHECK-OPT: arg1 : {1, {{.*}}0, 20, 1} + A2[WIid[0]] = h; + // CHECK-OPT: arg2 : {0, [[ACCID1]], 4062, 2} + // CHECK-OPT: arg3 : {1, [[ACCID1]], 8, 3} + A1[WIid[0]] = A2[WIid[0]]; + // CHECK-OPT: arg4 : {3, {{.*}}, 8, 4} + PtrDevice[WIid[0]] = WIid[0]; + // CHECK-OPT: arg5 : {3, {{.*}}, 8, 5} + PtrShared[WIid[0]] = PtrDevice[WIid[0]]; + }); }) .wait();