diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 99cc970843a01..dbed269df7da7 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -13,11 +13,12 @@ endif() if (NOT TARGET LLVMGenXIntrinsics) if (NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR) set(LLVMGenXIntrinsics_GIT_REPO https://github.com/intel/vc-intrinsics.git) - # commit a9bb6d8040c43404c5fbe3694e59c503d179d19a - # Author: Nikita Rudenko - # Date: Tue Feb 1 14:57:43 2022 +0000 - # Fix attributes are not forwarded for call inst with SEV - set(LLVMGenXIntrinsics_GIT_TAG a9bb6d8040c43404c5fbe3694e59c503d179d19a) + # commit 8b6e209fe1269a2c6470b36dfbaa0e051d2a100f (master) + # Author: Konstantin Vladimirov + # Date: Tue Feb 8 10:47:03 2022 +0000 + # introducing named barrier support in adaptor pass + # named barrier required for DPC++ and other customers + set(LLVMGenXIntrinsics_GIT_TAG 8b6e209fe1269a2c6470b36dfbaa0e051d2a100f) message(STATUS "vc-intrinsics repo is missing. Will try to download it from ${LLVMGenXIntrinsics_GIT_REPO}") include(FetchContent) diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp index 379cf1000b0dc..732f384a15ae5 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp @@ -23,6 +23,7 @@ #include "llvm/Demangle/Demangle.h" #include "llvm/Demangle/ItaniumDemangle.h" #include "llvm/GenXIntrinsics/GenXIntrinsics.h" +#include "llvm/GenXIntrinsics/GenXMetadata.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -445,6 +446,9 @@ class ESIMDIntrinDescTable { {"raw_send2_noresult", {"raw.send2.noresult", {a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7)}}}, + {"nbarrier", {"nbarrier", {a(0), a(1), a(2)}}}, + {"raw_send_nbarrier_signal", + {"raw.send.noresult", {a(0), ai1(4), a(1), a(2), a(3)}}}, {"sat", {"sat", {a(0)}}}, {"fptoui_sat", {"fptoui.sat", {a(0)}}}, {"fptosi_sat", {"fptosi.sat", {a(0)}}}, @@ -885,6 +889,34 @@ static void translateUnPackMask(CallInst &CI) { CI.replaceAllUsesWith(TransCI); } +// This function sets VCNamedBarrierCount attribute to set +// the number of named barriers required by a kernel +static void translateNbarrierInit(CallInst &CI) { + auto *F = CI.getFunction(); + + auto *ArgV = CI.getArgOperand(0); + assert(isa(ArgV) && + "integral constant expected for nbarrier count"); + + auto NewVal = cast(ArgV)->getZExtValue(); + assert(NewVal != 0 && "zero nbarrier count being requested"); + + if (llvm::MDNode *Node = getSLMSizeMDNode(F)) { + if (llvm::Value *OldCount = + getVal(Node->getOperand(genx::KernelMDOp::NBarrierCnt))) { + assert(isa(OldCount) && "integer constant expected"); + llvm::Value *NewCount = + llvm::ConstantInt::get(OldCount->getType(), NewVal); + uint64_t OldVal = cast(OldCount)->getZExtValue(); + if (OldVal < NewVal) + Node->replaceOperandWith(genx::KernelMDOp::NBarrierCnt, + getMD(NewCount)); + } + } else { + llvm_unreachable("esimd_nbarrier_init can only be called by a kernel"); + } +} + static bool translateVLoad(CallInst &CI, SmallPtrSet &GVTS) { if (GVTS.find(CI.getType()) != GVTS.end()) return false; @@ -1406,7 +1438,10 @@ void generateKernelMetadata(Module &M) { getMD(llvm::ConstantInt::getNullValue(I32Ty)), // SLM size in bytes getMD(llvm::ConstantInt::getNullValue(I32Ty)), // arg offsets IOKinds, - ArgDescs}; + ArgDescs, + getMD(llvm::ConstantInt::getNullValue(I32Ty)), // named barrier count + getMD(llvm::ConstantInt::getNullValue(I32Ty)) // regular barrier count + }; // Add this kernel to the root. Kernels->addOperand(MDNode::get(Ctx, MDArgs)); @@ -1527,6 +1562,11 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F, ToErase.push_back(CI); continue; } + if (Name.startswith("__esimd_nbarrier_init")) { + translateNbarrierInit(*CI); + ToErase.push_back(CI); + continue; + } if (Name.startswith("__esimd_pack_mask")) { translatePackMask(*CI); ToErase.push_back(CI); diff --git a/llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll b/llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll index 0a7d4c5c55c7e..7a61729386bc0 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll @@ -30,7 +30,7 @@ define weak_odr dso_local spir_kernel void @ESIMDKernel(i32 %_arg_, float addrsp ; CHECK: attributes #[[GENX_MAIN]] = { "CMGenxMain" "oclrt"="1" } ; CHECK: !genx.kernels = !{![[GENX_KERNELS:[0-9]+]]} -; CHECK: ![[GENX_KERNELS]] = !{void (i32, float addrspace(1)*, float addrspace(1)*, i32, float addrspace(1)*)* @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]]} +; CHECK: ![[GENX_KERNELS]] = !{void (i32, float addrspace(1)*, float addrspace(1)*, i32, float addrspace(1)*)* @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0} ; CHECK: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0} ; CHECK: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0} ; CHECK: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"} diff --git a/llvm/test/SYCLLowerIR/esimd_lower_nbarriers.ll b/llvm/test/SYCLLowerIR/esimd_lower_nbarriers.ll new file mode 100644 index 0000000000000..9cd1217d105f1 --- /dev/null +++ b/llvm/test/SYCLLowerIR/esimd_lower_nbarriers.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -LowerESIMD -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +; Function Attrs: convergent norecurse mustprogress +define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() !sycl_explicit_simd !3 { +entry: +; CHECK: call void @llvm.genx.nbarrier(i8 0, i8 2, i8 0) + call spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext 0, i8 zeroext 2, i8 zeroext 0) + +; CHECK: call void @llvm.genx.raw.send.noresult.i1.v8i32(i32 0, i1 true, i32 3, i32 33554436, <8 x i32> ) + call spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32 0, i32 3, i32 33554436, <8 x i32> , i16 zeroext 1) + + ret void +} +!3 = !{} + +declare dso_local spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext, i8 zeroext, i8 zeroext) local_unnamed_addr #1 +declare dso_local spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32, i32, i32, <8 x i32>, i16 zeroext) diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/nbarriers-metadata.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/nbarriers-metadata.ll new file mode 100644 index 0000000000000..7a404b64ef7c0 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/nbarriers-metadata.ll @@ -0,0 +1,19 @@ +; RUN: sycl-post-link -split-esimd -lower-esimd -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t_esimd_0.ll + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +; Function Attrs: convergent norecurse mustprogress +define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() #0 !sycl_explicit_simd !3 { +entry: + tail call spir_func void @_Z21__esimd_nbarrier_inith(i8 zeroext 7) + ret void +} + +!3 = !{} + +declare dso_local spir_func void @_Z21__esimd_nbarrier_inith(i8 zeroext) +; CHECK: attributes #0 = { {{.*}}"VCNamedBarrierCount"="7"{{.*}} } + +attributes #0 = { "sycl-module-id"="a.cpp" } diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 6c2fbf1cf1efe..0e787120cfa3d 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -927,4 +927,62 @@ __ESIMD_INTRIN void __esimd_raw_send2_noresult( } #endif // __SYCL_DEVICE_ONLY__ +/// Represents named barrier synchronization for a subgroup of threads. +/// Available only on PVC +/// +/// @param mode - is wait(0) or signal(1) +/// +/// @param id - barrier id +/// +/// @param thread_count - number of threads, ignored in 'wait' mode +__ESIMD_INTRIN void __esimd_nbarrier(uint8_t mode, uint8_t id, + uint8_t thread_count) +#ifdef __SYCL_DEVICE_ONLY__ + ; +#else // __SYCL_DEVICE_ONLY__ +{ + throw cl::sycl::feature_not_supported(); +} +#endif // __SYCL_DEVICE_ONLY__ + +/// Initialize number of named barriers for a kernel +/// Available only on PVC +/// +/// @param count - number of named barriers +__ESIMD_INTRIN void __esimd_nbarrier_init(uint8_t count) +#ifdef __SYCL_DEVICE_ONLY__ + ; +#else // __SYCL_DEVICE_ONLY__ +{ + throw cl::sycl::feature_not_supported(); +} +#endif // __SYCL_DEVICE_ONLY__ + +/// Raw send signal to perform signal operation on named barriers +/// Available only on PVC +/// @tparam Ty - message element type +/// +/// @tparam N - message length +/// +/// @param is_sendc - is sendc +/// +/// @param extended_descriptor - extended message descriptor +/// +/// @param descriptor - message descriptor +/// +/// @param msg_var - source operand of send message +/// +/// @param pred - predicate for enabled channels +template +__ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal( + uint32_t is_sendc, uint32_t extended_descriptor, uint32_t descriptor, + __SEIEED::vector_type_t msg_var, uint16_t pred = 1) +#ifdef __SYCL_DEVICE_ONLY__ + ; +#else // __SYCL_DEVICE_ONLY__ +{ + throw cl::sycl::feature_not_supported(); +} +#endif // __SYCL_DEVICE_ONLY__ + /// @endcond ESIMD_DETAIL diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index 731c2d09df1a3..d5a359a232364 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -1252,8 +1252,63 @@ raw_send_store(simd msgSrc0, uint32_t exDesc, uint32_t msgDesc, numSrc0, sfid, exDesc, msgDesc, msgSrc0.data()); } + /// @} sycl_esimd_raw_send +/// @defgroup sycl_esimd_memory_nbarrier Named barrier APIs. +/// @ingroup sycl_esimd_memory + +/// @addtogroup sycl_esimd_memory_nbarrier +/// @{ + +/// Wait on a named barrier +/// Available only on PVC +/// +/// @param id - named barrier id +__ESIMD_API void nbarrier_wait(uint8_t id) { + __esimd_nbarrier(0 /*wait*/, id, 0 /*thread count*/); +} + +/// Initialize number of named barriers for a kernel +/// Available only on PVC +/// +/// @tparam NbarCount - number of named barriers +template __ESIMD_API void nbarrier_init() { + __esimd_nbarrier_init(NbarCount); +} + +/// Perform signal operation for the given named barrier +/// Available only on PVC +/// +/// @param barrier_id - named barrier id +/// +/// @param producer_consumer_mode - 2-bit flag to indicate if it's producer +/// mode (0x1) or consumer mode (0x2). User must ensure the input value is set +/// correctly and higher order bits are cleared. +/// +/// @param num_producers - number of producers +/// +/// @param num_consumers - number of consumers +__ESIMD_API void nbarrier_signal(uint8_t barrier_id, + uint8_t producer_consumer_mode, + uint32_t num_producers, + uint32_t num_consumers) { + constexpr uint32_t gateway = 3; + constexpr uint32_t barrier = 4; + constexpr uint32_t descriptor = 1 << 25 | // Message length: 1 register + 0 << 12 | // Fence Data Ports: No fence + barrier; // Barrier subfunction + + detail::vector_type_t payload = 0; + payload[2] = (num_consumers & 0xff) << 24 | (num_producers & 0xff) << 16 | + producer_consumer_mode << 14 | (barrier_id & 0b11111) << 0; + + __esimd_raw_send_nbarrier_signal( + 0 /*sendc*/, gateway, descriptor, payload, 1 /*pred*/); +} + +/// @} sycl_esimd_memory_nbarrier + #undef __ESIMD_GET_SURF_HANDLE /// @cond EXCLUDE diff --git a/sycl/test/esimd/nbarriers.cpp b/sycl/test/esimd/nbarriers.cpp new file mode 100644 index 0000000000000..4ede16040c950 --- /dev/null +++ b/sycl/test/esimd/nbarriers.cpp @@ -0,0 +1,19 @@ +// RUN: %clangxx -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o %t + +#include +#include + +using namespace sycl::ext::intel::experimental::esimd; + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +void caller(int x) { + kernel([=]() SYCL_ESIMD_KERNEL { + nbarrier_init<7>(); + nbarrier_wait(2); + nbarrier_signal(0, 0, 4, 4); + }); +}