diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp index 30812a330ef18..ebeb8f803d71d 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp +++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp @@ -43,7 +43,8 @@ namespace mlir { static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable, StringRef name, ArrayRef paramTypes, - Type resultType) { + Type resultType, + bool isConvergent = false) { auto func = dyn_cast_or_null( SymbolTable::lookupSymbolIn(symbolTable, name)); if (!func) { @@ -52,6 +53,7 @@ static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable, symbolTable->getLoc(), name, LLVM::LLVMFunctionType::get(resultType, paramTypes)); func.setCConv(LLVM::cconv::CConv::SPIR_FUNC); + func.setConvergent(isConvergent); } return func; } @@ -89,8 +91,8 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern { assert(moduleOp && "Expecting module"); Type flagTy = rewriter.getI32Type(); Type voidTy = rewriter.getType(); - LLVM::LLVMFuncOp func = - lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy); + LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn( + moduleOp, funcName, flagTy, voidTy, /*isConvergent=*/true); // Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`. // See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`. @@ -266,8 +268,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern { Type valueType = adaptor.getValue().getType(); Type offsetType = adaptor.getOffset().getType(); Type resultType = valueType; - LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn( - moduleOp, funcName, {valueType, offsetType}, resultType); + LLVM::LLVMFuncOp func = + lookupOrCreateSPIRVFn(moduleOp, funcName, {valueType, offsetType}, + resultType, /*isConvergent=*/true); Location loc = op->getLoc(); std::array args{adaptor.getValue(), adaptor.getOffset()}; diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir index 654041b8e9aac..1b0f89a9a573e 100644 --- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir +++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir @@ -104,7 +104,7 @@ gpu.module @builtins { // ----- gpu.module @barriers { - // CHECK: llvm.func spir_funccc @_Z7barrierj(i32) + // CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {convergent} // CHECK-LABEL: gpu_barrier func.func @gpu_barrier() { @@ -120,10 +120,10 @@ gpu.module @barriers { // Check `gpu.shuffle` conversion with default subgroup size. gpu.module @shuffles { - // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 - // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 - // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 - // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 + // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent} + // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent} + // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent} + // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent} // CHECK-LABEL: gpu_shuffles // CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32) @@ -155,10 +155,10 @@ gpu.module @shuffles { gpu.module @shuffles attributes { spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits> } { - // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 - // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 - // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 - // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 + // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent} + // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent} + // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent} + // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent} // CHECK-LABEL: gpu_shuffles // CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)