Skip to content

Commit

Permalink
[MLIR] Add the convergent attribute to the barrier and shuffle ops (l…
Browse files Browse the repository at this point in the history
…lvm#97807)

When lowering from the gpu dialect to the llvm dialect for spirv, the
barrier op and shuffle ops need a convergent attribute for correctness.
  • Loading branch information
FMarno committed Jul 9, 2024
1 parent 19cc461 commit 3670e7f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 14 deletions.
13 changes: 8 additions & 5 deletions mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ namespace mlir {
static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
StringRef name,
ArrayRef<Type> paramTypes,
Type resultType) {
Type resultType,
bool isConvergent = false) {
auto func = dyn_cast_or_null<LLVM::LLVMFuncOp>(
SymbolTable::lookupSymbolIn(symbolTable, name));
if (!func) {
Expand All @@ -52,6 +53,7 @@ static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
symbolTable->getLoc(), name,
LLVM::LLVMFunctionType::get(resultType, paramTypes));
func.setCConv(LLVM::cconv::CConv::SPIR_FUNC);
func.setConvergent(isConvergent);
}
return func;
}
Expand Down Expand Up @@ -89,8 +91,8 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
assert(moduleOp && "Expecting module");
Type flagTy = rewriter.getI32Type();
Type voidTy = rewriter.getType<LLVM::LLVMVoidType>();
LLVM::LLVMFuncOp func =
lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy);
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
moduleOp, funcName, flagTy, voidTy, /*isConvergent=*/true);

// Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`.
// See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
Expand Down Expand Up @@ -266,8 +268,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
Type valueType = adaptor.getValue().getType();
Type offsetType = adaptor.getOffset().getType();
Type resultType = valueType;
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
moduleOp, funcName, {valueType, offsetType}, resultType);
LLVM::LLVMFuncOp func =
lookupOrCreateSPIRVFn(moduleOp, funcName, {valueType, offsetType},
resultType, /*isConvergent=*/true);

Location loc = op->getLoc();
std::array<Value, 2> args{adaptor.getValue(), adaptor.getOffset()};
Expand Down
18 changes: 9 additions & 9 deletions mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ gpu.module @builtins {
// -----

gpu.module @barriers {
// CHECK: llvm.func spir_funccc @_Z7barrierj(i32)
// CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {convergent}

// CHECK-LABEL: gpu_barrier
func.func @gpu_barrier() {
Expand All @@ -120,10 +120,10 @@ gpu.module @barriers {
// Check `gpu.shuffle` conversion with default subgroup size.

gpu.module @shuffles {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}

// CHECK-LABEL: gpu_shuffles
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
Expand Down Expand Up @@ -155,10 +155,10 @@ gpu.module @shuffles {
gpu.module @shuffles attributes {
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
} {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}

// CHECK-LABEL: gpu_shuffles
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
Expand Down

0 comments on commit 3670e7f

Please sign in to comment.