Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][ESIMD] Add support for named barrier APIs #5583

Merged
merged 2 commits into from Feb 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 6 additions & 5 deletions llvm/lib/SYCLLowerIR/CMakeLists.txt
Expand Up @@ -13,11 +13,12 @@ endif()
if (NOT TARGET LLVMGenXIntrinsics)
if (NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR)
set(LLVMGenXIntrinsics_GIT_REPO https://github.com/intel/vc-intrinsics.git)
# commit a9bb6d8040c43404c5fbe3694e59c503d179d19a
# Author: Nikita Rudenko <nikita.rudenko@intel.com>
# Date: Tue Feb 1 14:57:43 2022 +0000
# Fix attributes are not forwarded for call inst with SEV
set(LLVMGenXIntrinsics_GIT_TAG a9bb6d8040c43404c5fbe3694e59c503d179d19a)
# commit 8b6e209fe1269a2c6470b36dfbaa0e051d2a100f (master)
# Author: Konstantin Vladimirov <konstantin.vladimirov@intel.com>
# Date: Tue Feb 8 10:47:03 2022 +0000
# introducing named barrier support in adaptor pass
# named barrier required for DPC++ and other customers
set(LLVMGenXIntrinsics_GIT_TAG 8b6e209fe1269a2c6470b36dfbaa0e051d2a100f)

message(STATUS "vc-intrinsics repo is missing. Will try to download it from ${LLVMGenXIntrinsics_GIT_REPO}")
include(FetchContent)
Expand Down
42 changes: 41 additions & 1 deletion llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Expand Up @@ -23,6 +23,7 @@
#include "llvm/Demangle/Demangle.h"
#include "llvm/Demangle/ItaniumDemangle.h"
#include "llvm/GenXIntrinsics/GenXIntrinsics.h"
#include "llvm/GenXIntrinsics/GenXMetadata.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
Expand Down Expand Up @@ -445,6 +446,9 @@ class ESIMDIntrinDescTable {
{"raw_send2_noresult",
{"raw.send2.noresult",
{a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7)}}},
{"nbarrier", {"nbarrier", {a(0), a(1), a(2)}}},
{"raw_send_nbarrier_signal",
{"raw.send.noresult", {a(0), ai1(4), a(1), a(2), a(3)}}},
{"sat", {"sat", {a(0)}}},
{"fptoui_sat", {"fptoui.sat", {a(0)}}},
{"fptosi_sat", {"fptosi.sat", {a(0)}}},
Expand Down Expand Up @@ -885,6 +889,34 @@ static void translateUnPackMask(CallInst &CI) {
CI.replaceAllUsesWith(TransCI);
}

// This function sets VCNamedBarrierCount attribute to set
// the number of named barriers required by a kernel
static void translateNbarrierInit(CallInst &CI) {
auto *F = CI.getFunction();

auto *ArgV = CI.getArgOperand(0);
assert(isa<ConstantInt>(ArgV) &&
"integral constant expected for nbarrier count");

auto NewVal = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
assert(NewVal != 0 && "zero nbarrier count being requested");

if (llvm::MDNode *Node = getSLMSizeMDNode(F)) {
if (llvm::Value *OldCount =
getVal(Node->getOperand(genx::KernelMDOp::NBarrierCnt))) {
assert(isa<llvm::ConstantInt>(OldCount) && "integer constant expected");
llvm::Value *NewCount =
llvm::ConstantInt::get(OldCount->getType(), NewVal);
uint64_t OldVal = cast<llvm::ConstantInt>(OldCount)->getZExtValue();
if (OldVal < NewVal)
Node->replaceOperandWith(genx::KernelMDOp::NBarrierCnt,
getMD(NewCount));
}
} else {
llvm_unreachable("esimd_nbarrier_init can only be called by a kernel");
}
}

static bool translateVLoad(CallInst &CI, SmallPtrSet<Type *, 4> &GVTS) {
if (GVTS.find(CI.getType()) != GVTS.end())
return false;
Expand Down Expand Up @@ -1406,7 +1438,10 @@ void generateKernelMetadata(Module &M) {
getMD(llvm::ConstantInt::getNullValue(I32Ty)), // SLM size in bytes
getMD(llvm::ConstantInt::getNullValue(I32Ty)), // arg offsets
IOKinds,
ArgDescs};
ArgDescs,
getMD(llvm::ConstantInt::getNullValue(I32Ty)), // named barrier count
getMD(llvm::ConstantInt::getNullValue(I32Ty)) // regular barrier count
};

// Add this kernel to the root.
Kernels->addOperand(MDNode::get(Ctx, MDArgs));
Expand Down Expand Up @@ -1527,6 +1562,11 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
ToErase.push_back(CI);
continue;
}
if (Name.startswith("__esimd_nbarrier_init")) {
translateNbarrierInit(*CI);
ToErase.push_back(CI);
continue;
}
if (Name.startswith("__esimd_pack_mask")) {
translatePackMask(*CI);
ToErase.push_back(CI);
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll
Expand Up @@ -30,7 +30,7 @@ define weak_odr dso_local spir_kernel void @ESIMDKernel(i32 %_arg_, float addrsp

; CHECK: attributes #[[GENX_MAIN]] = { "CMGenxMain" "oclrt"="1" }
; CHECK: !genx.kernels = !{![[GENX_KERNELS:[0-9]+]]}
; CHECK: ![[GENX_KERNELS]] = !{void (i32, float addrspace(1)*, float addrspace(1)*, i32, float addrspace(1)*)* @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]]}
; CHECK: ![[GENX_KERNELS]] = !{void (i32, float addrspace(1)*, float addrspace(1)*, i32, float addrspace(1)*)* @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
; CHECK: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0}
; CHECK: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
; CHECK: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"}
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/SYCLLowerIR/esimd_lower_nbarriers.ll
@@ -0,0 +1,21 @@
; RUN: opt < %s -LowerESIMD -S | FileCheck %s

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown-sycldevice"

; Function Attrs: convergent norecurse mustprogress
define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() !sycl_explicit_simd !3 {
entry:
; CHECK: call void @llvm.genx.nbarrier(i8 0, i8 2, i8 0)
call spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext 0, i8 zeroext 2, i8 zeroext 0)

; CHECK: call void @llvm.genx.raw.send.noresult.i1.v8i32(i32 0, i1 true, i32 3, i32 33554436, <8 x i32> <i32 0, i32 0, i32 67371008, i32 0, i32 0, i32 0, i32 0, i32 0>)
call spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32 0, i32 3, i32 33554436, <8 x i32> <i32 0, i32 0, i32 67371008, i32 0, i32 0, i32
0, i32 0, i32 0>, i16 zeroext 1)

ret void
}
!3 = !{}

declare dso_local spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext, i8 zeroext, i8 zeroext) local_unnamed_addr #1
declare dso_local spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32, i32, i32, <8 x i32>, i16 zeroext)
19 changes: 19 additions & 0 deletions llvm/test/tools/sycl-post-link/sycl-esimd/nbarriers-metadata.ll
@@ -0,0 +1,19 @@
; RUN: sycl-post-link -split-esimd -lower-esimd -S %s -o %t.table
; RUN: FileCheck %s -input-file=%t_esimd_0.ll

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown-sycldevice"

; Function Attrs: convergent norecurse mustprogress
define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() #0 !sycl_explicit_simd !3 {
entry:
tail call spir_func void @_Z21__esimd_nbarrier_inith(i8 zeroext 7)
ret void
}

!3 = !{}

declare dso_local spir_func void @_Z21__esimd_nbarrier_inith(i8 zeroext)
; CHECK: attributes #0 = { {{.*}}"VCNamedBarrierCount"="7"{{.*}} }

attributes #0 = { "sycl-module-id"="a.cpp" }
Expand Up @@ -927,4 +927,62 @@ __ESIMD_INTRIN void __esimd_raw_send2_noresult(
}
#endif // __SYCL_DEVICE_ONLY__

/// Represents named barrier synchronization for a subgroup of threads.
/// Available only on PVC
///
/// @param mode - is wait(0) or signal(1)
///
/// @param id - barrier id
///
/// @param thread_count - number of threads, ignored in 'wait' mode
__ESIMD_INTRIN void __esimd_nbarrier(uint8_t mode, uint8_t id,
uint8_t thread_count)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // __SYCL_DEVICE_ONLY__
{
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__

/// Initialize number of named barriers for a kernel
/// Available only on PVC
///
/// @param count - number of named barriers
__ESIMD_INTRIN void __esimd_nbarrier_init(uint8_t count)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // __SYCL_DEVICE_ONLY__
{
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__

/// Raw send signal to perform signal operation on named barriers
/// Available only on PVC
/// @tparam Ty - message element type
///
/// @tparam N - message length
///
/// @param is_sendc - is sendc
///
/// @param extended_descriptor - extended message descriptor
///
/// @param descriptor - message descriptor
///
/// @param msg_var - source operand of send message
///
/// @param pred - predicate for enabled channels
template <typename Ty, int N>
__ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal(
uint32_t is_sendc, uint32_t extended_descriptor, uint32_t descriptor,
__SEIEED::vector_type_t<Ty, N> msg_var, uint16_t pred = 1)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // __SYCL_DEVICE_ONLY__
{
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__

/// @endcond ESIMD_DETAIL
55 changes: 55 additions & 0 deletions sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp
Expand Up @@ -1252,8 +1252,63 @@ raw_send_store(simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
numSrc0, sfid, exDesc, msgDesc,
msgSrc0.data());
}

/// @} sycl_esimd_raw_send

/// @defgroup sycl_esimd_memory_nbarrier Named barrier APIs.
/// @ingroup sycl_esimd_memory

/// @addtogroup sycl_esimd_memory_nbarrier
/// @{

/// Wait on a named barrier
/// Available only on PVC
///
/// @param id - named barrier id
__ESIMD_API void nbarrier_wait(uint8_t id) {
__esimd_nbarrier(0 /*wait*/, id, 0 /*thread count*/);
}

/// Initialize number of named barriers for a kernel
/// Available only on PVC
///
/// @tparam NbarCount - number of named barriers
template <uint8_t NbarCount> __ESIMD_API void nbarrier_init() {
__esimd_nbarrier_init(NbarCount);
}

/// Perform signal operation for the given named barrier
/// Available only on PVC
///
/// @param barrier_id - named barrier id
///
/// @param producer_consumer_mode - 2-bit flag to indicate if it's producer
/// mode (0x1) or consumer mode (0x2). User must ensure the input value is set
/// correctly and higher order bits are cleared.
///
/// @param num_producers - number of producers
///
/// @param num_consumers - number of consumers
__ESIMD_API void nbarrier_signal(uint8_t barrier_id,
uint8_t producer_consumer_mode,
uint32_t num_producers,
uint32_t num_consumers) {
constexpr uint32_t gateway = 3;
constexpr uint32_t barrier = 4;
constexpr uint32_t descriptor = 1 << 25 | // Message length: 1 register
0 << 12 | // Fence Data Ports: No fence
barrier; // Barrier subfunction

detail::vector_type_t<uint32_t, 8> payload = 0;
payload[2] = (num_consumers & 0xff) << 24 | (num_producers & 0xff) << 16 |
producer_consumer_mode << 14 | (barrier_id & 0b11111) << 0;

__esimd_raw_send_nbarrier_signal<uint32_t, 8>(
0 /*sendc*/, gateway, descriptor, payload, 1 /*pred*/);
}

/// @} sycl_esimd_memory_nbarrier

#undef __ESIMD_GET_SURF_HANDLE

/// @cond EXCLUDE
Expand Down
19 changes: 19 additions & 0 deletions sycl/test/esimd/nbarriers.cpp
@@ -0,0 +1,19 @@
// RUN: %clangxx -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o %t

#include <CL/sycl.hpp>
#include <sycl/ext/intel/experimental/esimd.hpp>

using namespace sycl::ext::intel::experimental::esimd;

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
kernelFunc();
}

void caller(int x) {
kernel<class kernel_esimd>([=]() SYCL_ESIMD_KERNEL {
nbarrier_init<7>();
nbarrier_wait(2);
nbarrier_signal(0, 0, 4, 4);
});
}