Skip to content

Commit

Permalink
[SYCL][ESIMD] Add support for named barrier APIs
Browse files Browse the repository at this point in the history
Signed-off-by: Sergey Dmitriev <serguei.n.dmitriev@intel.com>
  • Loading branch information
sndmitriev committed Feb 16, 2022
1 parent 7f5bd40 commit f66adaa
Show file tree
Hide file tree
Showing 8 changed files with 220 additions and 7 deletions.
11 changes: 6 additions & 5 deletions llvm/lib/SYCLLowerIR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ endif()
if (NOT TARGET LLVMGenXIntrinsics)
if (NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR)
set(LLVMGenXIntrinsics_GIT_REPO https://github.com/intel/vc-intrinsics.git)
# commit a9bb6d8040c43404c5fbe3694e59c503d179d19a
# Author: Nikita Rudenko <nikita.rudenko@intel.com>
# Date: Tue Feb 1 14:57:43 2022 +0000
# Fix attributes are not forwarded for call inst with SEV
set(LLVMGenXIntrinsics_GIT_TAG a9bb6d8040c43404c5fbe3694e59c503d179d19a)
# commit 8b6e209fe1269a2c6470b36dfbaa0e051d2a100f (master)
# Author: Konstantin Vladimirov <konstantin.vladimirov@intel.com>
# Date: Tue Feb 8 10:47:03 2022 +0000
# introducing named barrier support in adaptor pass
# named barrier required for DPC++ and other customers
set(LLVMGenXIntrinsics_GIT_TAG 8b6e209fe1269a2c6470b36dfbaa0e051d2a100f)

message(STATUS "vc-intrinsics repo is missing. Will try to download it from ${LLVMGenXIntrinsics_GIT_REPO}")
include(FetchContent)
Expand Down
42 changes: 41 additions & 1 deletion llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "llvm/Demangle/Demangle.h"
#include "llvm/Demangle/ItaniumDemangle.h"
#include "llvm/GenXIntrinsics/GenXIntrinsics.h"
#include "llvm/GenXIntrinsics/GenXMetadata.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
Expand Down Expand Up @@ -445,6 +446,9 @@ class ESIMDIntrinDescTable {
{"raw_send2_noresult",
{"raw.send2.noresult",
{a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7)}}},
{"nbarrier", {"nbarrier", {a(0), a(1), a(2)}}},
{"raw_send_nbarrier_signal",
{"raw.send.noresult", {a(0), ai1(4), a(1), a(2), a(3)}}},
{"sat", {"sat", {a(0)}}},
{"fptoui_sat", {"fptoui.sat", {a(0)}}},
{"fptosi_sat", {"fptosi.sat", {a(0)}}},
Expand Down Expand Up @@ -885,6 +889,34 @@ static void translateUnPackMask(CallInst &CI) {
CI.replaceAllUsesWith(TransCI);
}

// This function sets VCNamedBarrierCount attribute to set
// the number of named barriers required by a kernel
static void translateNbarrierInit(CallInst &CI) {
auto F = CI.getParent()->getParent();

auto *ArgV = CI.getArgOperand(0);
assert(isa<ConstantInt>(ArgV) &&
"integral constant expected for nbarrier count");

auto NewVal = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
assert(NewVal != 0 && "zero nbarrier count being requested");

if (llvm::MDNode *Node = getSLMSizeMDNode(F)) {
if (llvm::Value *OldCount =
getVal(Node->getOperand(genx::KernelMDOp::NBarrierCnt))) {
assert(isa<llvm::ConstantInt>(OldCount) && "integer constant expected");
llvm::Value *NewCount =
llvm::ConstantInt::get(OldCount->getType(), NewVal);
uint64_t OldVal = cast<llvm::ConstantInt>(OldCount)->getZExtValue();
if (OldVal < NewVal)
Node->replaceOperandWith(genx::KernelMDOp::NBarrierCnt,
getMD(NewCount));
}
} else {
llvm_unreachable("esimd_nbarrier_init can only be called by a kernel");
}
}

static bool translateVLoad(CallInst &CI, SmallPtrSet<Type *, 4> &GVTS) {
if (GVTS.find(CI.getType()) != GVTS.end())
return false;
Expand Down Expand Up @@ -1406,7 +1438,10 @@ void generateKernelMetadata(Module &M) {
getMD(llvm::ConstantInt::getNullValue(I32Ty)), // SLM size in bytes
getMD(llvm::ConstantInt::getNullValue(I32Ty)), // arg offsets
IOKinds,
ArgDescs};
ArgDescs,
getMD(llvm::ConstantInt::getNullValue(I32Ty)), // named barrier count
getMD(llvm::ConstantInt::getNullValue(I32Ty)) // regular barrier count
};

// Add this kernel to the root.
Kernels->addOperand(MDNode::get(Ctx, MDArgs));
Expand Down Expand Up @@ -1527,6 +1562,11 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
ToErase.push_back(CI);
continue;
}
if (Name.startswith("__esimd_nbarrier_init")) {
translateNbarrierInit(*CI);
ToErase.push_back(CI);
continue;
}
if (Name.startswith("__esimd_pack_mask")) {
translatePackMask(*CI);
ToErase.push_back(CI);
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ define weak_odr dso_local spir_kernel void @ESIMDKernel(i32 %_arg_, float addrsp

; CHECK: attributes #[[GENX_MAIN]] = { "CMGenxMain" "oclrt"="1" }
; CHECK: !genx.kernels = !{![[GENX_KERNELS:[0-9]+]]}
; CHECK: ![[GENX_KERNELS]] = !{void (i32, float addrspace(1)*, float addrspace(1)*, i32, float addrspace(1)*)* @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]]}
; CHECK: ![[GENX_KERNELS]] = !{void (i32, float addrspace(1)*, float addrspace(1)*, i32, float addrspace(1)*)* @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
; CHECK: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0}
; CHECK: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
; CHECK: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"}
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/SYCLLowerIR/esimd_lower_nbarriers.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
; RUN: opt < %s -LowerESIMD -S | FileCheck %s

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown-sycldevice"

; Function Attrs: convergent norecurse mustprogress
define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() !sycl_explicit_simd !3 {
entry:
; CHECK: call void @llvm.genx.nbarrier(i8 0, i8 2, i8 0)
call spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext 0, i8 zeroext 2, i8 zeroext 0)

; CHECK: call void @llvm.genx.raw.send.noresult.i1.v8i32(i32 0, i1 true, i32 3, i32 33554436, <8 x i32> <i32 0, i32 0, i32 67371008, i32 0, i32 0, i32 0, i32 0, i32 0>)
call spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32 0, i32 3, i32 33554436, <8 x i32> <i32 0, i32 0, i32 67371008, i32 0, i32 0, i32
0, i32 0, i32 0>, i16 zeroext 1)

ret void
}
!3 = !{}

declare dso_local spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext, i8 zeroext, i8 zeroext) local_unnamed_addr #1
declare dso_local spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32, i32, i32, <8 x i32>, i16 zeroext)
19 changes: 19 additions & 0 deletions llvm/test/tools/sycl-post-link/sycl-esimd/nbarriers-metadata.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: sycl-post-link -split-esimd -lower-esimd -S %s -o %t.table
; RUN: FileCheck %s -input-file=%t_esimd_0.ll

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown-sycldevice"

; Function Attrs: convergent norecurse mustprogress
define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() #0 !sycl_explicit_simd !3 {
entry:
tail call spir_func void @_Z21__esimd_nbarrier_inith(i8 zeroext 7)
ret void
}

!3 = !{}

declare dso_local spir_func void @_Z21__esimd_nbarrier_inith(i8 zeroext)
; CHECK: attributes #0 = { {{.*}}"VCNamedBarrierCount"="7"{{.*}} }

attributes #0 = { "sycl-module-id"="a.cpp" }
Original file line number Diff line number Diff line change
Expand Up @@ -927,4 +927,62 @@ __ESIMD_INTRIN void __esimd_raw_send2_noresult(
}
#endif // __SYCL_DEVICE_ONLY__

/// Represents named barrier synchronization for a subgroup of threads.
/// Available only on PVC
///
/// @param mode - is wait(0) or signal(1)
///
/// @param id - barrier id
///
/// @param thread_count - number of threads, ignored in 'wait' mode
__ESIMD_INTRIN void __esimd_nbarrier(uint8_t mode, uint8_t id,
uint8_t thread_count)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // __SYCL_DEVICE_ONLY__
{
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__

/// Initialize number of named barriers for a kernel
/// Available only on PVC
///
/// @param count - number of named barriers
__ESIMD_INTRIN void __esimd_nbarrier_init(uint8_t count)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // __SYCL_DEVICE_ONLY__
{
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__

/// Raw send signal to perform signal operation on named barriers
/// Available only on PVC
/// @tparam Ty - message element type
///
/// @tparam N - message length
///
/// @param is_sendc - is sendc
///
/// @param extended_descriptor - extended message descriptor
///
/// @param descriptor - message descriptor
///
/// @param msg_var - source operand of send message
///
/// @param pred - predicate for enabled channels
template <typename Ty, int N>
__ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal(
uint32_t is_sendc, uint32_t extended_descriptor, uint32_t descriptor,
__SEIEED::vector_type_t<Ty, N> msg_var, uint16_t pred = 1)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // __SYCL_DEVICE_ONLY__
{
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__

/// @endcond ESIMD_DETAIL
55 changes: 55 additions & 0 deletions sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1252,8 +1252,63 @@ raw_send_store(simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
numSrc0, sfid, exDesc, msgDesc,
msgSrc0.data());
}

/// @} sycl_esimd_raw_send

/// @defgroup sycl_esimd_memory_nbarrier Named barrier APIs.
/// @ingroup sycl_esimd_memory

/// @addtogroup sycl_esimd_memory_nbarrier
/// @{

/// Wait on a named barrier
/// Available only on PVC
///
/// @param id - named barrier id
__ESIMD_API void nbarrier_wait(uint8_t id) {
__esimd_nbarrier(0 /*wait*/, id, 0 /*thread count*/);
}

/// Initialize number of named barriers for a kernel
/// Available only on PVC
///
/// @tparam NbarCount - number of named barriers
template <uint8_t NbarCount> __ESIMD_API void nbarrier_init() {
__esimd_nbarrier_init(NbarCount);
}

/// Perform signal operation for the given named barrier
/// Available only on PVC
///
/// @param barrier_id - named barrier id
///
/// @param producer_consumer_mode - 2-bit flag to indicate if it's producer
/// mode (0x1) or consumer mode (0x2). User must ensure the input value is set
/// correctly and higher order bits are cleared.
///
/// @param num_producers - number of producers
///
/// @param num_consumers - number of consumers
__ESIMD_API void nbarrier_signal(uint8_t barrier_id,
uint8_t producer_consumer_mode,
uint32_t num_producers,
uint32_t num_consumers) {
constexpr uint32_t gateway = 3;
constexpr uint32_t barrier = 4;
constexpr uint32_t descriptor = 1 << 25 | // Message length: 1 register
0 << 12 | // Fence Data Ports: No fence
barrier; // Barrier subfunction

detail::vector_type_t<uint32_t, 8> payload = 0;
payload[2] = (num_consumers & 0xff) << 24 | (num_producers & 0xff) << 16 |
producer_consumer_mode << 14 | (barrier_id & 0b11111) << 0;

__esimd_raw_send_nbarrier_signal<uint32_t, 8>(
0 /*sendc*/, gateway, descriptor, payload, 1 /*pred*/);
}

/// @} sycl_esimd_memory_nbarrier

#undef __ESIMD_GET_SURF_HANDLE

/// @cond EXCLUDE
Expand Down
19 changes: 19 additions & 0 deletions sycl/test/esimd/nbarriers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// RUN: %clangxx -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o %t

#include <CL/sycl.hpp>
#include <sycl/ext/intel/experimental/esimd.hpp>

using namespace sycl::ext::intel::experimental::esimd;

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
kernelFunc();
}

void caller(int x) {
kernel<class kernel_esimd>([=]() SYCL_ESIMD_KERNEL {
nbarrier_init<7>();
nbarrier_wait(2);
nbarrier_signal(0, 0, 4, 4);
});
}

0 comments on commit f66adaa

Please sign in to comment.