From 5ad7a9cad9828522b25f97e25ca98d397a21be6c Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 5 Apr 2021 14:24:18 +0300 Subject: [PATCH 1/2] [SYCL] Add a test for group local memory w/o device code opt --- SYCL/GroupLocalMemory/no_early_opt.cpp | 62 ++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 SYCL/GroupLocalMemory/no_early_opt.cpp diff --git a/SYCL/GroupLocalMemory/no_early_opt.cpp b/SYCL/GroupLocalMemory/no_early_opt.cpp new file mode 100644 index 0000000000..76c608135c --- /dev/null +++ b/SYCL/GroupLocalMemory/no_early_opt.cpp @@ -0,0 +1,62 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fno-sycl-early-optimizations %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// The test checks that multiple calls to the same template instantiation of a +// group local memory function result in separate allocations, even with device +// code optimizations disabled (the implementation relies on inlining these +// functions regardless of device code optimization settings). + +#include + +#include +#include + +constexpr size_t WgSize = 32; +constexpr size_t WgCount = 4; +constexpr size_t Size = WgSize * WgCount; + +class KernelA; + +using namespace sycl; + +int main() { + queue Q; + { + std::vector VecA(Size, 0); + std::vector VecB(Size, 0); + buffer BufA{VecA.data(), range<1>(Size)}; + buffer BufB{VecB.data(), range<1>(Size)}; + + Q.submit([&](handler &Cgh) { + auto AccA = BufA.get_access(Cgh); + auto AccB = BufB.get_access(Cgh); + Cgh.parallel_for( + nd_range<1>(range<1>(Size), range<1>(WgSize)), [=](nd_item<1> Item) { + multi_ptr PtrA = + group_local_memory_for_overwrite(Item.get_group()); + multi_ptr PtrB = + group_local_memory_for_overwrite(Item.get_group()); + size_t LocalId = Item.get_local_linear_id(); + (*PtrA)[LocalId] = 1; + (*PtrB)[LocalId] = 1; + (*PtrA)[LocalId] *= LocalId; + (*PtrB)[LocalId] *= LocalId; + Item.barrier(); + + size_t LocalIdx = LocalId ^ 1; + size_t GlobalIdx = Item.get_global_linear_id() ^ 1; + AccA[GlobalIdx] = (*PtrA)[LocalIdx]; + AccB[GlobalIdx] = (*PtrB)[LocalIdx]; + }); + }); + + auto AccA = BufA.get_access(); + auto AccB = BufB.get_access(); + for (size_t I = 0; I < Size; ++I) { + assert(AccA[I] == I % WgSize); + assert(AccB[I] == I % WgSize); + } + } +} From f12fcd8db12979d1a6926eadfa2ad4194d2bab36 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 5 Apr 2021 17:53:13 +0300 Subject: [PATCH 2/2] Simplify the test --- SYCL/GroupLocalMemory/no_early_opt.cpp | 37 ++++++++++---------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/SYCL/GroupLocalMemory/no_early_opt.cpp b/SYCL/GroupLocalMemory/no_early_opt.cpp index 76c608135c..00680e7c23 100644 --- a/SYCL/GroupLocalMemory/no_early_opt.cpp +++ b/SYCL/GroupLocalMemory/no_early_opt.cpp @@ -24,39 +24,30 @@ using namespace sycl; int main() { queue Q; { - std::vector VecA(Size, 0); - std::vector VecB(Size, 0); - buffer BufA{VecA.data(), range<1>(Size)}; - buffer BufB{VecB.data(), range<1>(Size)}; + std::vector VecA(Size, 0); + std::vector VecB(Size, 0); + buffer BufA{VecA.data(), range<1>(Size)}; + buffer BufB{VecB.data(), range<1>(Size)}; Q.submit([&](handler &Cgh) { auto AccA = BufA.get_access(Cgh); auto AccB = BufB.get_access(Cgh); Cgh.parallel_for( nd_range<1>(range<1>(Size), range<1>(WgSize)), [=](nd_item<1> Item) { - multi_ptr PtrA = - group_local_memory_for_overwrite(Item.get_group()); - multi_ptr PtrB = - group_local_memory_for_overwrite(Item.get_group()); - size_t LocalId = Item.get_local_linear_id(); - (*PtrA)[LocalId] = 1; - (*PtrB)[LocalId] = 1; - (*PtrA)[LocalId] *= LocalId; - (*PtrB)[LocalId] *= LocalId; - Item.barrier(); - - size_t LocalIdx = LocalId ^ 1; - size_t GlobalIdx = Item.get_global_linear_id() ^ 1; - AccA[GlobalIdx] = (*PtrA)[LocalIdx]; - AccB[GlobalIdx] = (*PtrB)[LocalIdx]; + multi_ptr PtrA = + group_local_memory_for_overwrite(Item.get_group()); + multi_ptr PtrB = + group_local_memory_for_overwrite(Item.get_group()); + + size_t GlobalId = Item.get_global_linear_id(); + AccA[GlobalId] = PtrA; + AccB[GlobalId] = PtrB; }); }); auto AccA = BufA.get_access(); auto AccB = BufB.get_access(); - for (size_t I = 0; I < Size; ++I) { - assert(AccA[I] == I % WgSize); - assert(AccB[I] == I % WgSize); - } + for (size_t I = 0; I < Size; ++I) + assert(AccA[I] != AccB[I]); } }