From ea13b48dbd55948fc9dda0060453bfd78c459824 Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Wed, 22 Mar 2023 23:09:06 -0700 Subject: [PATCH 1/2] [ESIMD] Add tests for private/stack memory The tests verify that basic ESIMD API works correctly when the memory for SIMD is allocated on stack. Signed-off-by: Vyacheslav N Klochkov --- SYCL/ESIMD/private_memory.cpp | 179 ++++++++++++++++++++++++++++++ SYCL/ESIMD/private_memory_pvc.cpp | 16 +++ 2 files changed, 195 insertions(+) create mode 100644 SYCL/ESIMD/private_memory.cpp create mode 100644 SYCL/ESIMD/private_memory_pvc.cpp diff --git a/SYCL/ESIMD/private_memory.cpp b/SYCL/ESIMD/private_memory.cpp new file mode 100644 index 0000000000..bcf734df70 --- /dev/null +++ b/SYCL/ESIMD/private_memory.cpp @@ -0,0 +1,179 @@ +//==---------------- private_memory.cpp - DPC++ ESIMD on-device test ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu && !gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// The test verifies that basic ESIMD API works properly with +// private memory allocated on stack. + +#include "esimd_test_utils.hpp" + +#include +#include +#include + +using namespace sycl; +using namespace sycl::ext::intel::esimd; +using namespace sycl::ext::intel::experimental::esimd; + +constexpr specialization_id PrivateArrayLenSC(1); + +template +ESIMD_NOINLINE bool test(queue Q, int PrivateArrayLen) { + std::cout << "Testing T=" << esimd_test::type_name() + << " ArrLen=" << PrivateArrayLen << ", FirstN=" << FirstN + << ", UseConstExpr=" << UseConstExpr << "...\n"; + + int GlobalRange = 2; + int Size = GlobalRange * PrivateArrayLenConst; + + auto DstAsIsUPtr = esimd_test::usm_malloc_shared(Q, Size); + auto DstOddPlus1UPtr = esimd_test::usm_malloc_shared(Q, Size); + auto DstAfterBlockStoreUPtr = esimd_test::usm_malloc_shared(Q, Size); + + T *DstAsIs = DstAsIsUPtr.get(); + T *DstOddPlus1 = DstOddPlus1UPtr.get(); + T *DstAfterBlockStore = DstAfterBlockStoreUPtr.get(); + for (int I = 0; I < Size; I++) { + DstAsIs[I] = DstOddPlus1[I] = DstAfterBlockStore[I] = 0; + } + + T TOne = static_cast(1); + T TTen = static_cast(10); + + Q.submit([&](sycl::handler &CGH) { + CGH.set_specialization_constant(PrivateArrayLen); + CGH.parallel_for( + GlobalRange, [=](id<1> Id, sycl::kernel_handler KH) SYCL_ESIMD_KERNEL { + uint32_t ArrayLen; + if constexpr (UseConstExpr) { + // This declaration masks the declaration of PrivateArrayLen on + // HOST. + ArrayLen = KH.get_specialization_constant(); + } else { + // Simply use PrivateArrayLen declared/initialized on HOST. + ArrayLen = PrivateArrayLen; + } + T *PrivateArray = (T *)__builtin_alloca_with_align( + sizeof(T) * ArrayLen, sizeof(T) * 8 * 16); + + // Initialize private memory + for (int I = 0; I < ArrayLen; I++) { + simd IV(static_cast(Id) * PrivateArrayLen + I); + simd TV = IV; + TV.template copy_to(PrivateArray + I); + } + + simd BigVec(PrivateArray); + BigVec.copy_to(DstAsIs + ArrayLen * Id); + + // Check that scatter() works fine. + auto FirstNOdd = BigVec.template select(1).read(); + FirstNOdd = FirstNOdd + simd(TOne); + simd FirstNOddByteOffsets(sizeof(T), 2 * sizeof(T)); + scatter(PrivateArray, FirstNOddByteOffsets, FirstNOdd); + + simd BigVecOddPlus1(PrivateArray); + BigVecOddPlus1.copy_to(DstOddPlus1 + ArrayLen * Id); + + if constexpr (PrivateArrayLenConst > FirstN && + FirstN * sizeof(T) >= 16 && + FirstN * sizeof(T) <= 8 * 16) { + // Check that block_store() works fine. + BigVec.copy_to(PrivateArray); + simd BigVecFirstN = BigVec.template select(); + BigVecFirstN = BigVecFirstN * simd(TTen); + block_store(PrivateArray, BigVecFirstN); + + simd BigVecAfterBlockStore(PrivateArray); + BigVecAfterBlockStore.copy_to(DstAfterBlockStore + ArrayLen * Id); + } + }); + }).wait(); + + for (int I = 0; I < Size; I++) { + T Expected = I; + if (DstAsIs[I] != Expected) { + std::cout << "Error/DstAsIs[" << I << "]: " << DstAsIs[I] + << " != " << Expected << "(Expected)" << std::endl; + return false; + } + + int CurrentWI = I / PrivateArrayLenConst; + int IndexInWI = I - CurrentWI * PrivateArrayLenConst; + + Expected = I; + if ((IndexInWI & 1) && IndexInWI < FirstN * 2) + Expected = I + 1; + if (DstOddPlus1[I] != Expected) { + std::cout << "Error/DstOddPlus1[" << I << "]: " << DstOddPlus1[I] + << " != " << Expected << "(Expected)" << std::endl; + return false; + } + + if constexpr (PrivateArrayLenConst > FirstN && FirstN * sizeof(T) >= 16 && + FirstN * sizeof(T) <= 8 * 16) { + Expected = I; + if (IndexInWI < FirstN) + // Expected = I * I; + Expected = (T)I * TTen; + if (DstAfterBlockStore[I] != Expected) { + std::cout << "Error/DstAfterBlockStore[" << I + << "]: " << DstAfterBlockStore[I] << " != " << Expected + << "(Expected)" << std::endl; + return false; + } + } + } + + return true; +} + +template bool tests(queue Q) { + constexpr bool UseSpecConst = true; + + bool Passed = true; + Passed &= test(Q, 32); + Passed &= test(Q, 32); + + Passed &= test(Q, 256); + Passed &= test(Q, 256); + + return Passed; +} + +int main() { + queue Q; + std::cout << "Running on " << Q.get_device().get_info() + << "\n"; + + bool Passed = true; + Passed &= tests(Q); + Passed &= tests(Q); + Passed &= tests(Q); + Passed &= tests(Q); + + Passed &= tests(Q); + if (Q.get_device().has(sycl::aspect::fp16)) + Passed &= tests(Q); + if (Q.get_device().has(sycl::aspect::fp64)) + Passed &= tests(Q); + + // TODO: GPU driver reports an error during JIT compilation. + // Report and enable this case when driver is fixed. + // Passed &= tests(Q); + +#ifdef TEST_TFLOAT32 + Passed &= tests(Q); +#endif // TEST_TFLOAT32 + + std::cout << (Passed ? "Passed\n" : "FAILED\n"); + return Passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/private_memory_pvc.cpp b/SYCL/ESIMD/private_memory_pvc.cpp new file mode 100644 index 0000000000..b0a725a551 --- /dev/null +++ b/SYCL/ESIMD/private_memory_pvc.cpp @@ -0,0 +1,16 @@ +//==------------- private_memory_pvc.cpp - DPC++ ESIMD on-device test -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// The test verifies that basic ESIMD API works properly with +// private memory allocated on stack. + +#define TEST_TFLOAT32 +#include "private_memory.cpp" From 3c940b01ec42944d4342b1c9361a7016489cb548 Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Fri, 24 Mar 2023 20:25:44 -0700 Subject: [PATCH 2/2] Add XFAIL the test for esimd_emulator Signed-off-by: Vyacheslav N Klochkov --- SYCL/ESIMD/private_memory.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SYCL/ESIMD/private_memory.cpp b/SYCL/ESIMD/private_memory.cpp index bcf734df70..6e77be2a2e 100644 --- a/SYCL/ESIMD/private_memory.cpp +++ b/SYCL/ESIMD/private_memory.cpp @@ -7,6 +7,10 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu && !gpu-intel-pvc // UNSUPPORTED: cuda || hip + +// TODO online_compiler check fails for esimd_emulator. +// XFAIL: esimd_emulator + // RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out