From ead6e5a1b531b4398d91d97ba00ced7d29409238 Mon Sep 17 00:00:00 2001 From: fineg74 <61437305+fineg74@users.noreply.github.com> Date: Wed, 31 Aug 2022 21:42:26 -0400 Subject: [PATCH 1/5] Add test for lsc predicates --- SYCL/ESIMD/lsc/lsc_predicate.cpp | 200 +++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 SYCL/ESIMD/lsc/lsc_predicate.cpp diff --git a/SYCL/ESIMD/lsc/lsc_predicate.cpp b/SYCL/ESIMD/lsc/lsc_predicate.cpp new file mode 100644 index 0000000000..5edadaab14 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_predicate.cpp @@ -0,0 +1,200 @@ +//==------------ lsc_neg.cpp - DPC++ ESIMD on-device test ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc || esimd_emulator +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "../esimd_test_utils.hpp" + +#include +#include +#include +#include +#include + +using namespace sycl; +using namespace sycl::ext::intel::esimd; +using namespace sycl::ext::intel::experimental::esimd; + +int testAccessor(queue q) { + auto size = size_t{128}; + auto constexpr SIMDSize = unsigned{4}; + + auto vec_0 = std::vector(size); + auto vec_1 = std::vector(size); + auto vec_2 = std::vector(size); + auto vec_3 = std::vector(size); + + std::iota(vec_0.begin(), vec_0.end(), 0); + std::iota(vec_1.begin(), vec_1.end(), 0); + std::iota(vec_2.begin(), vec_2.end(), 0); + std::iota(vec_3.begin(), vec_3.end(), 0); + auto buf_0 = buffer{vec_0}; + auto buf_1 = buffer{vec_1}; + auto buf_2 = buffer{vec_2}; + auto buf_3 = buffer{vec_3}; + + try { + q.submit([&](handler &h) { + auto access_0 = buf_0.template get_access(h); + auto access_1 = buf_1.template get_access(h); + auto access_2 = buf_2.template get_access(h); + auto access_3 = buf_3.template get_access(h); + + h.parallel_for( + range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL { + auto offset = id[0] * SIMDSize * sizeof(int); + auto offsets = simd(id * SIMDSize * sizeof(int), + sizeof(int)); + auto pred_enable = simd_mask<1>(1); + auto pred_disable = simd_mask<1>(0); + + lsc_prefetch(access_0, + offset); + auto data_0 = + lsc_block_load(access_0, offset, pred_enable); + lsc_block_store(access_0, offset, data_0 * 2, + pred_enable); + + lsc_prefetch(access_1, + offset); + auto data_1 = + lsc_block_load(access_1, offset, pred_disable); + lsc_block_store(access_1, offset, data_1 * 2, + pred_enable); + + lsc_prefetch(access_2, + offset); + auto data_2 = + lsc_block_load(access_2, offset, pred_enable); + lsc_block_store(access_2, offset, data_2 * 2, + pred_disable); + + lsc_prefetch(access_3, + offset); + auto data_3 = + lsc_block_load(access_3, offset, pred_disable); + lsc_block_store(access_3, offset, data_3 * 2, + pred_disable); + }); + }); + q.wait(); + buf_0.template get_access(); + buf_1.template get_access(); + buf_2.template get_access(); + buf_3.template get_access(); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what(); + return 1; + } + + auto error = 0; + for (auto i = 0; i != size; ++i) { + error += vec_0[i] != 2 * i; + error += vec_1[i] > 0; + error += vec_2[i] != i; + error += vec_3[i] != i; + } + std::cout << "Accessor lsc predicate test "; + std::cout << (error != 0 ? "FAILED" : "passed") << std::endl; + return error; +} + +int testUSM(queue q) { + auto size = size_t{128}; + auto constexpr SIMDSize = unsigned{4}; + + auto *vec_0 = malloc_shared(size, q); + auto *vec_1 = malloc_shared(size, q); + auto *vec_2 = malloc_shared(size, q); + auto *vec_3 = malloc_shared(size, q); + std::iota(vec_0, vec_0 + size, 0); + std::iota(vec_1, vec_1 + size, 0); + std::iota(vec_2, vec_2 + size, 0); + std::iota(vec_3, vec_3 + size, 0); + + try { + q.submit([&](handler &h) { + h.parallel_for(range<1>{size / SIMDSize}, [=](id<1> + id) SYCL_ESIMD_KERNEL { + auto offset = id[0] * SIMDSize; + auto offsets = + simd(id * SIMDSize * sizeof(int), sizeof(int)); + auto pred_enable = simd_mask<1>(1); + auto pred_disable = simd_mask<1>(0); + + lsc_prefetch(vec_0 + offset); + auto data_0 = + lsc_block_load(vec_0 + offset, pred_enable); + lsc_block_store(vec_0 + offset, data_0 * 2, pred_enable); + + lsc_prefetch(vec_1 + offset); + auto data_1 = + lsc_block_load(vec_1 + offset, pred_disable); + lsc_block_store(vec_1 + offset, data_1 * 2, pred_enable); + + lsc_prefetch(vec_2 + offset); + auto data_2 = + lsc_block_load(vec_2 + offset, pred_enable); + lsc_block_store(vec_2 + offset, data_2 * 2, + pred_disable); + + lsc_prefetch(vec_3 + offset); + auto data_3 = + lsc_block_load(vec_3 + offset, pred_disable); + lsc_block_store(vec_3 + offset, data_3 * 2, + pred_disable); + }); + }); + q.wait(); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what(); + sycl::free(vec_0, q); + sycl::free(vec_1, q); + sycl::free(vec_2, q); + sycl::free(vec_3, q); + return 1; + } + + int error = 0; + for (auto i = 0; i != size; ++i) { + error += vec_0[i] != 2 * i; + error += vec_1[i] > 0; + error += vec_2[i] != i; + error += vec_3[i] != i; + } + sycl::free(vec_0, q); + sycl::free(vec_1, q); + sycl::free(vec_2, q); + sycl::free(vec_3, q); + std::cout << "USM lsc predicate test "; + std::cout << (error != 0 ? "FAILED" : "passed") << std::endl; + return error; +} + +int main() { + + auto q = + queue{esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()}; + auto device = q.get_device(); + std::cout << "Device name: " << device.get_info() + << std::endl; + + int error = testUSM(q); + error += testAccessor(q); + return error; +} From 255c744e78d7bba0821b0c82d1054fe0a7c2cafd Mon Sep 17 00:00:00 2001 From: gregory Date: Thu, 1 Sep 2022 09:49:21 -0700 Subject: [PATCH 2/5] Fix formatting issues --- SYCL/ESIMD/lsc/lsc_predicate.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SYCL/ESIMD/lsc/lsc_predicate.cpp b/SYCL/ESIMD/lsc/lsc_predicate.cpp index 5edadaab14..53a768ebb7 100644 --- a/SYCL/ESIMD/lsc/lsc_predicate.cpp +++ b/SYCL/ESIMD/lsc/lsc_predicate.cpp @@ -39,14 +39,14 @@ int testAccessor(queue q) { auto buf_1 = buffer{vec_1}; auto buf_2 = buffer{vec_2}; auto buf_3 = buffer{vec_3}; - + try { q.submit([&](handler &h) { auto access_0 = buf_0.template get_access(h); auto access_1 = buf_1.template get_access(h); auto access_2 = buf_2.template get_access(h); auto access_3 = buf_3.template get_access(h); - + h.parallel_for( range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL { auto offset = id[0] * SIMDSize * sizeof(int); @@ -186,7 +186,7 @@ int testUSM(queue q) { return error; } -int main() { +int main() { auto q = queue{esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()}; From 523ebba6d6b00fcd8f054f8fdb856a0fbe249225 Mon Sep 17 00:00:00 2001 From: gregory Date: Wed, 7 Sep 2022 09:51:36 -0700 Subject: [PATCH 3/5] Address PR comments --- SYCL/ESIMD/lsc/lsc_predicate.cpp | 94 +++++++---------- SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp | 117 +++++++++++++++++++++ 2 files changed, 157 insertions(+), 54 deletions(-) create mode 100644 SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp diff --git a/SYCL/ESIMD/lsc/lsc_predicate.cpp b/SYCL/ESIMD/lsc/lsc_predicate.cpp index 53a768ebb7..ad221c3ae0 100644 --- a/SYCL/ESIMD/lsc/lsc_predicate.cpp +++ b/SYCL/ESIMD/lsc/lsc_predicate.cpp @@ -1,4 +1,4 @@ -//==------------ lsc_neg.cpp - DPC++ ESIMD on-device test ------------------==// +//==------------ lsc_predicate.cpp - DPC++ ESIMD on-device test ------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// // REQUIRES: gpu-intel-pvc || esimd_emulator -// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out +// The test checks functionality of the lsc_block_load, lsc_block_store +// with newly introduced predicate parameter. + #include "../esimd_test_utils.hpp" #include @@ -22,9 +24,8 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; -int testAccessor(queue q) { +template int testAccessor(queue q) { auto size = size_t{128}; - auto constexpr SIMDSize = unsigned{4}; auto vec_0 = std::vector(size); auto vec_1 = std::vector(size); @@ -55,33 +56,21 @@ int testAccessor(queue q) { auto pred_enable = simd_mask<1>(1); auto pred_disable = simd_mask<1>(0); - lsc_prefetch(access_0, - offset); auto data_0 = lsc_block_load(access_0, offset, pred_enable); lsc_block_store(access_0, offset, data_0 * 2, pred_enable); - lsc_prefetch(access_1, - offset); auto data_1 = lsc_block_load(access_1, offset, pred_disable); lsc_block_store(access_1, offset, data_1 * 2, pred_enable); - lsc_prefetch(access_2, - offset); auto data_2 = lsc_block_load(access_2, offset, pred_enable); lsc_block_store(access_2, offset, data_2 * 2, pred_disable); - lsc_prefetch(access_3, - offset); auto data_3 = lsc_block_load(access_3, offset, pred_disable); lsc_block_store(access_3, offset, data_3 * 2, @@ -110,9 +99,8 @@ int testAccessor(queue q) { return error; } -int testUSM(queue q) { +template int testUSM(queue q) { auto size = size_t{128}; - auto constexpr SIMDSize = unsigned{4}; auto *vec_0 = malloc_shared(size, q); auto *vec_1 = malloc_shared(size, q); @@ -125,40 +113,33 @@ int testUSM(queue q) { try { q.submit([&](handler &h) { - h.parallel_for(range<1>{size / SIMDSize}, [=](id<1> - id) SYCL_ESIMD_KERNEL { - auto offset = id[0] * SIMDSize; - auto offsets = - simd(id * SIMDSize * sizeof(int), sizeof(int)); - auto pred_enable = simd_mask<1>(1); - auto pred_disable = simd_mask<1>(0); - - lsc_prefetch(vec_0 + offset); - auto data_0 = - lsc_block_load(vec_0 + offset, pred_enable); - lsc_block_store(vec_0 + offset, data_0 * 2, pred_enable); - - lsc_prefetch(vec_1 + offset); - auto data_1 = - lsc_block_load(vec_1 + offset, pred_disable); - lsc_block_store(vec_1 + offset, data_1 * 2, pred_enable); - - lsc_prefetch(vec_2 + offset); - auto data_2 = - lsc_block_load(vec_2 + offset, pred_enable); - lsc_block_store(vec_2 + offset, data_2 * 2, - pred_disable); - - lsc_prefetch(vec_3 + offset); - auto data_3 = - lsc_block_load(vec_3 + offset, pred_disable); - lsc_block_store(vec_3 + offset, data_3 * 2, - pred_disable); - }); + h.parallel_for( + range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL { + auto offset = id[0] * SIMDSize; + auto offsets = simd(id * SIMDSize * sizeof(int), + sizeof(int)); + auto pred_enable = simd_mask<1>(1); + auto pred_disable = simd_mask<1>(0); + + auto data_0 = + lsc_block_load(vec_0 + offset, pred_enable); + lsc_block_store(vec_0 + offset, data_0 * 2, + pred_enable); + + auto data_1 = + lsc_block_load(vec_1 + offset, pred_disable); + lsc_block_store(vec_1 + offset, data_1 * 2, + pred_enable); + + auto data_2 = + lsc_block_load(vec_2 + offset, pred_enable); + lsc_block_store(vec_2 + offset, data_2 * 2, + pred_disable); + auto data_3 = + lsc_block_load(vec_3 + offset, pred_disable); + lsc_block_store(vec_3 + offset, data_3 * 2, + pred_disable); + }); }); q.wait(); } catch (sycl::exception e) { @@ -194,7 +175,12 @@ int main() { std::cout << "Device name: " << device.get_info() << std::endl; - int error = testUSM(q); - error += testAccessor(q); + int error = testUSM<8>(q); + int error = testUSM<16>(q); + int error = testUSM<32>(q); + + error += testAccessor<8>(q); + error += testAccessor<16>(q); + error += testAccessor<32>(q); return error; } diff --git a/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp b/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp new file mode 100644 index 0000000000..5b168b7743 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp @@ -0,0 +1,117 @@ +//==------------ lsc_predicate_stateless.cpp - DPC++ ESIMD on-device test +//------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc || esimd_emulator +// RUN: %clangxx -fsycl -fsycl-esimd-force-stateless-mem %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// The test checks functionality of the lsc_block_load, lsc_block_store +// accessor - based ESIMD intrinsics when stateless memory accesses are +// enforced, i.e. accessor based accesses are automatically converted to +// stateless accesses with newly introduced predicate parameter. + +#include "../esimd_test_utils.hpp" + +#include +#include +#include +#include +#include + +using namespace sycl; +using namespace sycl::ext::intel::esimd; +using namespace sycl::ext::intel::experimental::esimd; + +template int testAccessor(queue q) { + auto size = size_t{128}; + + auto vec_0 = std::vector(size); + auto vec_1 = std::vector(size); + auto vec_2 = std::vector(size); + auto vec_3 = std::vector(size); + + std::iota(vec_0.begin(), vec_0.end(), 0); + std::iota(vec_1.begin(), vec_1.end(), 0); + std::iota(vec_2.begin(), vec_2.end(), 0); + std::iota(vec_3.begin(), vec_3.end(), 0); + auto buf_0 = buffer{vec_0}; + auto buf_1 = buffer{vec_1}; + auto buf_2 = buffer{vec_2}; + auto buf_3 = buffer{vec_3}; + + try { + q.submit([&](handler &h) { + auto access_0 = buf_0.template get_access(h); + auto access_1 = buf_1.template get_access(h); + auto access_2 = buf_2.template get_access(h); + auto access_3 = buf_3.template get_access(h); + + h.parallel_for( + range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL { + auto offset = id[0] * SIMDSize * sizeof(int); + auto offsets = simd(id * SIMDSize * sizeof(int), + sizeof(int)); + auto pred_enable = simd_mask<1>(1); + auto pred_disable = simd_mask<1>(0); + + auto data_0 = + lsc_block_load(access_0, offset, pred_enable); + lsc_block_store(access_0, offset, data_0 * 2, + pred_enable); + + auto data_1 = + lsc_block_load(access_1, offset, pred_disable); + lsc_block_store(access_1, offset, data_1 * 2, + pred_enable); + + auto data_2 = + lsc_block_load(access_2, offset, pred_enable); + lsc_block_store(access_2, offset, data_2 * 2, + pred_disable); + + auto data_3 = + lsc_block_load(access_3, offset, pred_disable); + lsc_block_store(access_3, offset, data_3 * 2, + pred_disable); + }); + }); + q.wait(); + buf_0.template get_access(); + buf_1.template get_access(); + buf_2.template get_access(); + buf_3.template get_access(); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what(); + return 1; + } + + auto error = 0; + for (auto i = 0; i != size; ++i) { + error += vec_0[i] != 2 * i; + error += vec_1[i] > 0; + error += vec_2[i] != i; + error += vec_3[i] != i; + } + std::cout << "Accessor lsc predicate test "; + std::cout << (error != 0 ? "FAILED" : "passed") << std::endl; + return error; +} + +int main() { + + auto q = + queue{esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()}; + auto device = q.get_device(); + std::cout << "Device name: " << device.get_info() + << std::endl; + + error += testAccessor<8>(q); + error += testAccessor<16>(q); + error += testAccessor<32>(q); + return error; +} From 420f2d8f9d90d3f25e92dfe8b50d1939ac230028 Mon Sep 17 00:00:00 2001 From: gregory Date: Wed, 7 Sep 2022 10:02:38 -0700 Subject: [PATCH 4/5] Address build breaks --- SYCL/ESIMD/lsc/lsc_predicate.cpp | 4 ++-- SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/SYCL/ESIMD/lsc/lsc_predicate.cpp b/SYCL/ESIMD/lsc/lsc_predicate.cpp index ad221c3ae0..5f0df4672a 100644 --- a/SYCL/ESIMD/lsc/lsc_predicate.cpp +++ b/SYCL/ESIMD/lsc/lsc_predicate.cpp @@ -176,8 +176,8 @@ int main() { << std::endl; int error = testUSM<8>(q); - int error = testUSM<16>(q); - int error = testUSM<32>(q); + error = testUSM<16>(q); + error = testUSM<32>(q); error += testAccessor<8>(q); error += testAccessor<16>(q); diff --git a/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp b/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp index 5b168b7743..48f0cbf3dc 100644 --- a/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp +++ b/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp @@ -1,5 +1,4 @@ -//==------------ lsc_predicate_stateless.cpp - DPC++ ESIMD on-device test -//------------==// +//==------------ lsc_predicate_stateless.cpp - DPC++ ESIMD on-device test -==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -110,7 +109,7 @@ int main() { std::cout << "Device name: " << device.get_info() << std::endl; - error += testAccessor<8>(q); + int error += testAccessor<8>(q); error += testAccessor<16>(q); error += testAccessor<32>(q); return error; From 3e221987a1bf2af810c874400ae082fd43064508 Mon Sep 17 00:00:00 2001 From: gregory Date: Wed, 7 Sep 2022 10:10:52 -0700 Subject: [PATCH 5/5] Address build break --- SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp b/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp index 48f0cbf3dc..23ab9d3e52 100644 --- a/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp +++ b/SYCL/ESIMD/lsc/lsc_predicate_stateless.cpp @@ -109,7 +109,7 @@ int main() { std::cout << "Device name: " << device.get_info() << std::endl; - int error += testAccessor<8>(q); + int error = testAccessor<8>(q); error += testAccessor<16>(q); error += testAccessor<32>(q); return error;