From 8fe11b3bec7fade1a264a1abefd5e70032a2fd2c Mon Sep 17 00:00:00 2001 From: Haonan Yang Date: Mon, 28 Feb 2022 22:54:04 +0800 Subject: [PATCH] Add support for split barriers extension SPV_INTEL_split_barrier This backports https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1424 Signed-off-by: Haonan Yang --- ...split-barriers-extension-SPV_INTEL_s.patch | 374 ++++++++++++++++++ 1 file changed, 374 insertions(+) create mode 100644 patches/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch diff --git a/patches/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch b/patches/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch new file mode 100644 index 00000000..7f542832 --- /dev/null +++ b/patches/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch @@ -0,0 +1,374 @@ +From 6c69a15cbdc37219cbb855652027536c9d1194c4 Mon Sep 17 00:00:00 2001 +From: Haonan Yang +Date: Mon, 28 Feb 2022 18:33:05 +0800 +Subject: [PATCH] Add support for split barriers extension + SPV_INTEL_split_barrier + +Signed-off-by: Haonan Yang +--- + include/LLVMSPIRVExtensions.inc | 1 + + lib/SPIRV/OCL20ToSPIRV.cpp | 37 +++++++++++++++++++++++++++ + lib/SPIRV/OCLUtil.cpp | 23 ++++++++++++++++- + lib/SPIRV/OCLUtil.h | 20 +++++++++++++++ + lib/SPIRV/SPIRVReader.cpp | 3 ++- + lib/SPIRV/SPIRVToOCL.cpp | 4 +++ + lib/SPIRV/SPIRVToOCL.h | 5 ++++ + lib/SPIRV/SPIRVToOCL12.cpp | 17 ++++++++++++ + lib/SPIRV/SPIRVToOCL20.cpp | 26 +++++++++++++++++++ + lib/SPIRV/libSPIRV/SPIRVInstruction.h | 19 +++++++++++++- + lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 1 + + lib/SPIRV/libSPIRV/SPIRVOpCode.h | 5 ++++ + lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h | 2 ++ + 13 files changed, 160 insertions(+), 3 deletions(-) + +diff --git a/include/LLVMSPIRVExtensions.inc b/include/LLVMSPIRVExtensions.inc +index a5acbffc..2f9866fe 100644 +--- a/include/LLVMSPIRVExtensions.inc ++++ b/include/LLVMSPIRVExtensions.inc +@@ -30,3 +30,4 @@ EXT(SPV_INTEL_optnone) + EXT(SPV_INTEL_arbitrary_precision_integers) + EXT(SPV_INTEL_variable_length_array) + EXT(SPV_INTEL_memory_access_aliasing) ++EXT(SPV_INTEL_split_barrier) +diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp +index 4e7904a1..5755a152 100644 +--- a/lib/SPIRV/OCL20ToSPIRV.cpp ++++ b/lib/SPIRV/OCL20ToSPIRV.cpp +@@ -278,6 +278,8 @@ public: + void visitSubgroupAVCBuiltinCallWithSampler(CallInst *CI, + StringRef MangledName, + const std::string &DemangledName); ++ /// For cl_intel_split_work_group_barrier built-ins: ++ void visitCallSplitBarrierINTEL(CallInst *CI, StringRef DemangledName); + + void visitCallLdexp(CallInst *CI, StringRef MangledName, + StringRef DemangledName); +@@ -546,6 +548,10 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) { + visitSubgroupImageMediaBlockINTEL(&CI, DemangledName); + return; + } ++ if (DemangledName.find(kOCLBuiltinName::SplitBarrierINTELPrefix) == 0) { ++ visitCallSplitBarrierINTEL(&CI, DemangledName); ++ return; ++ } + // Handle 'cl_intel_device_side_avc_motion_estimation' extension built-ins + if (DemangledName.find(kOCLSubgroupsAVCIntel::Prefix) == 0 || + // Workaround for a bug in the extension specification +@@ -1889,6 +1895,37 @@ void OCL20ToSPIRV::visitSubgroupAVCBuiltinCallWithSampler( + &Attrs); + } + ++void OCL20ToSPIRV::visitCallSplitBarrierINTEL(CallInst *CI, ++ StringRef DemangledName) { ++ auto Lit = getBarrierLiterals(CI); ++ AttributeList Attrs = CI->getCalledFunction()->getAttributes(); ++ Op OpCode = ++ StringSwitch(DemangledName) ++ .Case("intel_work_group_barrier_arrive", OpControlBarrierArriveINTEL) ++ .Case("intel_work_group_barrier_wait", OpControlBarrierWaitINTEL) ++ .Default(OpNop); ++ ++ mutateCallInstSPIRV( ++ M, CI, ++ [=](CallInst *, std::vector &Args) { ++ Args.resize(3); ++ // Execution scope ++ Args[0] = addInt32(map(std::get<2>(Lit))); ++ // Memory scope ++ Args[1] = addInt32(map(std::get<1>(Lit))); ++ // Memory semantics ++ // OpControlBarrierArriveINTEL -> Release, ++ // OpControlBarrierWaitINTEL -> Acquire ++ unsigned MemFenceFlag = std::get<0>(Lit); ++ OCLMemOrderKind MemOrder = OpCode == OpControlBarrierArriveINTEL ++ ? OCLMO_release ++ : OCLMO_acquire; ++ Args[2] = addInt32(mapOCLMemSemanticToSPIRV(MemFenceFlag, MemOrder)); ++ return getSPIRVFuncName(OpCode); ++ }, ++ &Attrs); ++} ++ + void OCL20ToSPIRV::visitCallLdexp(CallInst *CI, StringRef MangledName, + StringRef DemangledName) { + auto Args = getArguments(CI); +diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp +index 5032925b..9da1d9cf 100644 +--- a/lib/SPIRV/OCLUtil.cpp ++++ b/lib/SPIRV/OCLUtil.cpp +@@ -472,7 +472,9 @@ public: + } else if (UnmangledName.find("barrier") != std::string::npos) { + addUnsignedArg(0); + if (UnmangledName == "work_group_barrier" || +- UnmangledName == "sub_group_barrier") ++ UnmangledName == "sub_group_barrier" || ++ UnmangledName == "intel_work_group_barrier_arrive" || ++ UnmangledName == "intel_work_group_barrier_wait") + setEnumArg(1, SPIR::PRIMITIVE_MEMORY_SCOPE); + } else if (UnmangledName.find("atomic_work_item_fence") == 0) { + addUnsignedArg(0); +@@ -986,6 +988,25 @@ void insertImageNameAccessQualifier(SPIRVAccessQualifierKind Acc, + } + } // namespace OCLUtil + ++Value *SPIRV::transSPIRVMemorySemanticsIntoOCLMemFenceFlags( ++ Value *MemorySemantics, Instruction *InsertBefore) { ++ if (auto *C = dyn_cast(MemorySemantics)) { ++ return ConstantInt::get(C->getType(), ++ mapSPIRVMemSemanticToOCL(C->getZExtValue()).first); ++ } ++ ++ // TODO: any possible optimizations? ++ // SPIR-V MemorySemantics contains both OCL mem_fence_flags and mem_order and ++ // therefore, we need to apply mask ++ int Mask = MemorySemanticsWorkgroupMemoryMask | ++ MemorySemanticsCrossWorkgroupMemoryMask | ++ MemorySemanticsImageMemoryMask; ++ return getOrCreateSwitchFunc(kSPIRVName::TranslateSPIRVMemFence, ++ MemorySemantics, ++ OCLMemFenceExtendedMap::getRMap(), ++ /* IsReverse */ true, None, InsertBefore, InsertBefore->getModule(), Mask); ++} ++ + void llvm::mangleOpenClBuiltin(const std::string &UniqName, + ArrayRef ArgTypes, + std::string &MangledName) { +diff --git a/lib/SPIRV/OCLUtil.h b/lib/SPIRV/OCLUtil.h +index c8f20c4b..8f21a6f9 100644 +--- a/lib/SPIRV/OCLUtil.h ++++ b/lib/SPIRV/OCLUtil.h +@@ -238,6 +238,7 @@ const static char SubgroupBlockWriteINTELPrefix[] = + "intel_sub_group_block_write"; + const static char SubgroupImageMediaBlockINTELPrefix[] = + "intel_sub_group_media_block"; ++const static char SplitBarrierINTELPrefix[] = "intel_work_group_barrier_"; + const static char LDEXP[] = "ldexp"; + } // namespace kOCLBuiltinName + +@@ -545,6 +546,22 @@ getOrCreateSwitchFunc(StringRef MapName, Value *V, + return addCallInst(M, MapName, Ty, V, nullptr, InsertPoint); + } + ++/// Performs conversion from SPIR-V Memory Semantics into OpenCL ++/// mem_fence_flags. ++/// ++/// Supports both constant and non-constant values. To handle the latter case, ++/// function with switch..case statement will be inserted into module which ++/// \arg InsertBefore belongs to (in order to perform mapping at runtime) ++/// ++/// \param [in] MemorySemantics Memory Semantics value which needs to be ++/// translated ++/// \param [in] InsertBefore insertion point for call into conversion function ++/// which is generated if \arg MemorySemantics is not a constant ++/// \returns \c Value corresponding to OpenCL mem_fence_flags equivalent to ++/// SPIR-V Memory Semantics passed in \arg MemorySemantics ++Value *transSPIRVMemorySemanticsIntoOCLMemFenceFlags(Value *MemorySemantics, ++ Instruction *InsertBefore); ++ + template <> inline void SPIRVMap::init() { + add("reduce", GroupOperationReduce); + add("scan_inclusive", GroupOperationInclusiveScan); +@@ -815,6 +832,9 @@ template <> inline void SPIRVMap::init() { + // cl_khr_subgroup_shuffle_relative + _SPIRV_OP(group_shuffle_up, GroupNonUniformShuffleUp) + _SPIRV_OP(group_shuffle_down, GroupNonUniformShuffleDown) ++ // cl_khr_split_work_group_barrier ++ _SPIRV_OP(intel_work_group_barrier_arrive, ControlBarrierArriveINTEL) ++ _SPIRV_OP(intel_work_group_barrier_wait, ControlBarrierWaitINTEL) + #undef _SPIRV_OP + } + +diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp +index 02f7e52d..8ed593f4 100644 +--- a/lib/SPIRV/SPIRVReader.cpp ++++ b/lib/SPIRV/SPIRVReader.cpp +@@ -2433,7 +2433,8 @@ Instruction *SPIRVToLLVM::transBuiltinFromInst(const std::string &FuncName, + if (isFuncNoUnwind()) + Func->addFnAttr(Attribute::NoUnwind); + auto OC = BI->getOpCode(); +- if (isGroupOpCode(OC) || isIntelSubgroupOpCode(OC)) ++ if (isGroupOpCode(OC) || isIntelSubgroupOpCode(OC) || ++ isSplitBarrierINTELOpCode(OC)) + Func->addFnAttr(Attribute::Convergent); + } + auto Call = +diff --git a/lib/SPIRV/SPIRVToOCL.cpp b/lib/SPIRV/SPIRVToOCL.cpp +index 3da14587..1edba730 100644 +--- a/lib/SPIRV/SPIRVToOCL.cpp ++++ b/lib/SPIRV/SPIRVToOCL.cpp +@@ -107,6 +107,10 @@ void SPIRVToOCL::visitCallInst(CallInst &CI) { + if (OC == OpControlBarrier) { + visitCallSPIRVControlBarrier(&CI); + } ++ if (isSplitBarrierINTELOpCode(OC)) { ++ visitCallSPIRVSplitBarrierINTEL(&CI, OC); ++ return; ++ } + if (isAtomicOpCode(OC)) { + visitCallSPIRVAtomicBuiltin(&CI, OC); + return; +diff --git a/lib/SPIRV/SPIRVToOCL.h b/lib/SPIRV/SPIRVToOCL.h +index 127e8dd8..3577496b 100644 +--- a/lib/SPIRV/SPIRVToOCL.h ++++ b/lib/SPIRV/SPIRVToOCL.h +@@ -211,6 +211,11 @@ public: + /// - OCL1.2: barrier + virtual void visitCallSPIRVControlBarrier(CallInst *CI) = 0; + ++ /// Transform split __spirv_ControlBarrier barrier to: ++ /// - OCL2.0: overload with a memory_scope argument ++ /// - OCL1.2: overload with no memory_scope argument ++ virtual void visitCallSPIRVSplitBarrierINTEL(CallInst *CI, Op OC) = 0; ++ + /// Transform __spirv_EnqueueKernel to __enqueue_kernel + virtual void visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) = 0; + +diff --git a/lib/SPIRV/SPIRVToOCL12.cpp b/lib/SPIRV/SPIRVToOCL12.cpp +index 2b8b2967..f8084901 100644 +--- a/lib/SPIRV/SPIRVToOCL12.cpp ++++ b/lib/SPIRV/SPIRVToOCL12.cpp +@@ -60,6 +60,10 @@ public: + /// barrier(flag(sema)) + void visitCallSPIRVControlBarrier(CallInst *CI) override; + ++ /// Transform split __spirv_ControlBarrier barrier to overloads without a ++ /// memory_scope argument. ++ void visitCallSPIRVSplitBarrierINTEL(CallInst *CI, Op OC) override; ++ + /// Transform __spirv_OpAtomic functions. It firstly conduct generic + /// mutations for all builtins and then mutate some of them seperately + Instruction *visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) override; +@@ -202,6 +206,19 @@ void SPIRVToOCL12::visitCallSPIRVControlBarrier(CallInst *CI) { + &Attrs); + } + ++void SPIRVToOCL12::visitCallSPIRVSplitBarrierINTEL(CallInst *CI, Op OC) { ++ AttributeList Attrs = CI->getCalledFunction()->getAttributes(); ++ mutateCallInstOCL( ++ M, CI, ++ [=](CallInst *, std::vector &Args) { ++ Value *MemFenceFlags = ++ SPIRV::transSPIRVMemorySemanticsIntoOCLMemFenceFlags(Args[2], CI); ++ Args.assign(1, MemFenceFlags); ++ return OCLSPIRVBuiltinMap::rmap(OC); ++ }, ++ &Attrs); ++} ++ + Instruction *SPIRVToOCL12::visitCallSPIRVAtomicIncDec(CallInst *CI, Op OC) { + AttributeList Attrs = CI->getCalledFunction()->getAttributes(); + return mutateCallInstOCL( +diff --git a/lib/SPIRV/SPIRVToOCL20.cpp b/lib/SPIRV/SPIRVToOCL20.cpp +index 8f639488..6f0dae46 100644 +--- a/lib/SPIRV/SPIRVToOCL20.cpp ++++ b/lib/SPIRV/SPIRVToOCL20.cpp +@@ -63,6 +63,10 @@ public: + /// sub_group_barrier(flag(sema), map(memScope)) + void visitCallSPIRVControlBarrier(CallInst *CI) override; + ++ /// Transform split __spirv_ControlBarrier barrier to overloads with a ++ /// memory_scope argument. ++ void visitCallSPIRVSplitBarrierINTEL(CallInst *CI, Op OC) override; ++ + /// Transform __spirv_Atomic* to atomic_*. + /// __spirv_Atomic*(atomic_op, scope, sema, ops, ...) => + /// atomic_*(generic atomic_op, ops, ..., order(sema), map(scope)) +@@ -206,6 +210,28 @@ Instruction *SPIRVToOCL20::visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) { + return NewCI; + } + ++void SPIRVToOCL20::visitCallSPIRVSplitBarrierINTEL(CallInst *CI, Op OC) { ++ AttributeList Attrs = CI->getCalledFunction()->getAttributes(); ++ mutateCallInstOCL( ++ M, CI, ++ [=](CallInst *, std::vector &Args) { ++ auto GetArg = [=](unsigned I) { ++ return cast(Args[I])->getZExtValue(); ++ }; ++ Value *MemScope = ++ getInt32(M, rmap(static_cast(GetArg(1)))); ++ Value *MemFenceFlags = ++ SPIRV::transSPIRVMemorySemanticsIntoOCLMemFenceFlags(Args[2], CI); ++ ++ Args.resize(2); ++ Args[0] = MemFenceFlags; ++ Args[1] = MemScope; ++ ++ return OCLSPIRVBuiltinMap::rmap(OC); ++ }, ++ &Attrs); ++} ++ + Instruction *SPIRVToOCL20::visitCallSPIRVAtomicIncDec(CallInst *CI, Op OC) { + AttributeList Attrs = CI->getCalledFunction()->getAttributes(); + return mutateCallInstOCL( +diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h +index 85ac0120..9d1dbf6c 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h ++++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h +@@ -3183,6 +3183,23 @@ _SPIRV_OP(VariableLengthArray, true, 4) + _SPIRV_OP(SaveMemory, true, 3) + _SPIRV_OP(RestoreMemory, false, 2) + #undef _SPIRV_OP +-} // namespace SPIRV + ++class SPIRVSplitBarrierINTELBase : public SPIRVInstTemplateBase { ++protected: ++ SPIRVCapVec getRequiredCapability() const override { ++ return getVec(CapabilitySplitBarrierINTEL); ++ } ++ ++ SPIRVExtSet getRequiredExtensions() const override { ++ return getSet(ExtensionID::SPV_INTEL_split_barrier); ++ } ++}; ++ ++#define _SPIRV_OP(x, ...) \ ++ typedef SPIRVInstTemplate \ ++ SPIRV##x; ++_SPIRV_OP(ControlBarrierArriveINTEL, false, 4) ++_SPIRV_OP(ControlBarrierWaitINTEL, false, 4) ++#undef _SPIRV_OP ++} // namespace SPIRV + #endif // SPIRV_LIBSPIRV_SPIRVINSTRUCTION_H +diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +index ed9d944f..1cde296a 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h ++++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +@@ -576,6 +576,7 @@ template <> inline void SPIRVMap::init() { + "ArbitraryPrecisionIntegersINTEL"); + add(internal::CapabilityMemoryAccessAliasingINTEL, + "MemoryAccessAliasingINTEL"); ++ add(CapabilitySplitBarrierINTEL, "SplitBarrierINTEL"); + + } + SPIRV_DEF_NAMEMAP(Capability, SPIRVCapabilityNameMap) +diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h +index c0f2b650..2a4012db 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h ++++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h +@@ -242,6 +242,11 @@ inline bool isEventOpCode(Op OpCode) { + return OpRetainEvent <= OpCode && OpCode <= OpCaptureEventProfilingInfo; + } + ++inline bool isSplitBarrierINTELOpCode(Op OpCode) { ++ return OpCode == OpControlBarrierArriveINTEL || ++ OpCode == OpControlBarrierWaitINTEL; ++} ++ + } // namespace SPIRV + + #endif // SPIRV_LIBSPIRV_SPIRVOPCODE_H +diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h +index e568bee5..3c9fbb5f 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h ++++ b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h +@@ -489,3 +489,5 @@ _SPIRV_OP(TypeBufferSurfaceINTEL, 6086) + _SPIRV_OP(TypeStructContinuedINTEL, 6090) + _SPIRV_OP(ConstantCompositeContinuedINTEL, 6091) + _SPIRV_OP(SpecConstantCompositeContinuedINTEL, 6092) ++_SPIRV_OP(ControlBarrierArriveINTEL, 6142) ++_SPIRV_OP(ControlBarrierWaitINTEL, 6143) +-- +2.18.1 +