[NFC][GPU] Move LLVMGPUPipelineOptions to iree_gpu dialect (#18458)

This moves `LLVMGPUPipelineOptions` to `Codegen/Dialect/GPU` so that pipeline options can be set by iree_gpu lowering configuration logic (like `setMatmulLoweringConfig` in `ConfigUtils.cpp`). A new attribute `GPUPipelineOptionsAttr` is added, with optional parameters defined for each existing pipeline option. The assembly of the pipline attributes has changed, since the options are now part of the `iree_gpu` dialect. For the purposes of setting user configurations with transform dialect this PR changes the following: All pipeline option attributes should now be contained in a single `#iree_gpu.pipeline_options<>` - `reorder_workgroups = "none"/"swizzle"/"transpose"` becomes one of the pipeline options `reorder_workgroups_strategy = None/Swizzle/Transpose` - `prefetch_shared_memory` becomes the pipeline option `prefetch_shared_memory = true/false` - `no_reduce_shared_memory_bank_conflicts` becomes the pipeline option `no_reduce_shared_memory_bank_conflicts = true/false` Example: After this change, the translation_info config dict changes from ``` {reorder_workgroups = "swizzle", prefetch_shared_memory} ``` to ``` {gpu_pipeline_options = #iree_gpu.pipeline_options< prefetch_shared_memory = true, reorder_workgroups_strategy = Swizzle >} ``` --------- Signed-off-by: Max Dawkins <max.dawkins@gmail.com>
iree-org · Sep 10, 2024 · 15d58e7 · 15d58e7
1 parent b197555
commit 15d58e7
Show file tree

Hide file tree

Showing 14 changed files with 256 additions and 146 deletions.
diff --git a/build_tools/pkgci/external_test_suite/attention_and_matmul_spec.mlir b/build_tools/pkgci/external_test_suite/attention_and_matmul_spec.mlir
@@ -636,8 +636,8 @@ module attributes { transform.with_named_sequence } {
          workgroup_size = [320, 1, 1] subgroup_size = 64,
           {mma_schedule = #iree_gpu.mma_schedule<
               intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
-              subgroup_m_count = 1, subgroup_n_count = 5>
-           , reorder_workgroups = "transpose"}>
+              subgroup_m_count = 1, subgroup_n_count = 5>,
+           gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>}>
       > -> !transform.any_param
     transform.yield %conv, %config : !transform.any_op, !transform.any_param
   }
@@ -656,8 +656,9 @@ module attributes { transform.with_named_sequence } {
          workgroup_size = [256, 1, 1] subgroup_size = 64,
           {mma_schedule = #iree_gpu.mma_schedule<
               intrinsic = #iree_gpu.mma_layout<MFMA_F32_32x32x8_F16>,
-              subgroup_m_count = 1, subgroup_n_count = 4>
-           , reorder_workgroups = "transpose", llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}}>
+              subgroup_m_count = 1, subgroup_n_count = 4>,
+           gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>,
+           llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}}>
       > -> !transform.any_param
     transform.yield %conv, %config : !transform.any_op, !transform.any_param
   }
@@ -676,8 +677,8 @@ module attributes { transform.with_named_sequence } {
          workgroup_size = [320, 1, 1] subgroup_size = 64,
           {mma_schedule = #iree_gpu.mma_schedule<
               intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
-              subgroup_m_count = 1, subgroup_n_count = 5>
-           , reorder_workgroups = "transpose"}>
+              subgroup_m_count = 1, subgroup_n_count = 5>,
+           gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>}>
       > -> !transform.any_param
     transform.yield %conv, %config : !transform.any_op, !transform.any_param
   }
@@ -696,8 +697,8 @@ module attributes { transform.with_named_sequence } {
          workgroup_size = [320, 1, 1] subgroup_size = 64,
           {mma_schedule = #iree_gpu.mma_schedule<
               intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
-              subgroup_m_count = 1, subgroup_n_count = 5>
-           , reorder_workgroups = "transpose"}>
+              subgroup_m_count = 1, subgroup_n_count = 5>,
+           gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>}>
       > -> !transform.any_param
     transform.yield %conv, %config : !transform.any_op, !transform.any_param
   }
@@ -716,8 +717,9 @@ module attributes { transform.with_named_sequence } {
          workgroup_size = [128, 4, 1] subgroup_size = 64,
           {mma_schedule = #iree_gpu.mma_schedule<
               intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
-              subgroup_m_count = 4, subgroup_n_count = 2>
-           , reorder_workgroups = "transpose", llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}}>
+              subgroup_m_count = 4, subgroup_n_count = 2>,
+           gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>,
+           llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}}>
       > -> !transform.any_param
     transform.yield %conv, %config : !transform.any_op, !transform.any_param
   }
@@ -736,8 +738,8 @@ module attributes { transform.with_named_sequence } {
          workgroup_size = [320, 1, 1] subgroup_size = 64,
           {mma_schedule = #iree_gpu.mma_schedule<
               intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
-              subgroup_m_count = 1, subgroup_n_count = 5>
-           , reorder_workgroups = "transpose"}>
+              subgroup_m_count = 1, subgroup_n_count = 5>,
+           gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>}>
       > -> !transform.any_param
     transform.yield %conv, %config : !transform.any_op, !transform.any_param
   }

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
@@ -101,7 +101,7 @@ createGPUTensorAlloc(GPUPromoteSharedMemPattern promoteSharedMemPattern =
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
 createConvertVectorReductionToGPUPass(bool expandSubgroupReduction = true);
 
-enum class ReorderWorkgroupsStrategy { None, Swizzle, Transpose };
+using IREE::GPU::ReorderWorkgroupsStrategy;
 
 /// Reorders workgroup IDs.
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.cpp
@@ -1352,6 +1352,25 @@ bool LaneIdAttr::isLinearMapping() const { return true; }
 
 int64_t LaneIdAttr::getRelativeIndex() const { return getDim(); }
 
+//===----------------------------------------------------------------------===//
+// GPU Pipeline Options
+//===----------------------------------------------------------------------===//
+
+GPUPipelineOptionsAttr GPUPipelineOptionsAttr::get(
+    MLIRContext *context, bool prefetchSharedMemory,
+    bool noReduceSharedMemoryBankConflicts,
+    std::optional<ReorderWorkgroupsStrategy> reorderWorkgroupsStrategy) {
+  auto strategyAttr = ReorderWorkgroupsStrategyAttr();
+  if (reorderWorkgroupsStrategy) {
+    strategyAttr =
+        ReorderWorkgroupsStrategyAttr::get(context, *reorderWorkgroupsStrategy);
+  }
+  Builder b(context);
+  return Base::get(context, b.getBoolAttr(prefetchSharedMemory),
+                   b.getBoolAttr(noReduceSharedMemoryBankConflicts),
+                   strategyAttr);
+}
+
 //===----------------------------------------------------------------------===//
 // Attribute Registration
 //===----------------------------------------------------------------------===//

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
@@ -430,4 +430,54 @@ def IREEGPU_LaneIdAttr : AttrDef<IREEGPU_Dialect, "LaneId", [
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// GPU Pipeline Options
+//===----------------------------------------------------------------------===//
+
+def IREEGPU_ReorderWorkgroupsStrategyAttr :
+    EnumAttr<IREEGPU_Dialect, IREEGPU_ReorderWorkgroupsStrategy, ""> {
+  let assemblyFormat = "``$value";
+  let cppNamespace = "::mlir::iree_compiler::IREE::GPU";
+}
+
+def IREEGPU_GPUPipelineOptionsAttr : AttrDef<IREEGPU_Dialect, "GPUPipelineOptions"> {
+  let summary = "GPU pipeline options attribute.";
+  let description = [{
+    This attributes describes lowering pipeline specific configuration options:
+    * prefetch_shared_memory: Boolean option indicating whether or not to run
+      the loop prefetching pass in the lowering pipeline.
+    * no_reduce_shared_memory_bank_conflicts: Boolean option indicating whether
+      or not to skip the bank conflict reduction pass in the lowering pipeline.
+    * reorder_workgroups_strategy: Enum attribute indicating which strategy to
+      choose for the workgroup reordering pass. Options are `None`, `Swizzle`,
+      and `Transpose`.
+  }];
+
+  let mnemonic = "pipeline_options";
+  let cppNamespace = "::mlir::iree_compiler::IREE::GPU";
+
+  let parameters = (ins
+    OptionalParameter<"BoolAttr">:$prefetch_shared_memory,
+    OptionalParameter<"BoolAttr">:$no_reduce_shared_memory_bank_conflicts,
+    OptionalParameter<"ReorderWorkgroupsStrategyAttr">:$reorder_workgroups_strategy
+  );
+
+  let builders = [
+    AttrBuilder<(ins
+        CArg<"bool", "false">:$prefetch_shared_memory,
+        CArg<"bool", "false">:$no_reduce_shared_memory_bank_conflicts,
+        CArg<"std::optional<ReorderWorkgroupsStrategy>", "{}">:$reorder_workgroups_strategy)>
+  ];
+
+  let assemblyFormat = "`<` struct(params) `>`";
+
+  let extraClassDeclaration = [{
+    // Returns the key name for GPUPipelineOptionsAttr in the translation info
+    // config dictionary.
+    static StringRef getDictKeyName() {
+      return "gpu_pipeline_options";
+    }
+  }];
+}
+
 #endif // IREE_COMPILER_CODEGEN_DIALECT_GPU_IREEGPUATTRS
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUEnums.td b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUEnums.td
@@ -168,4 +168,28 @@ def IREEGPU_TilingLevel : IREEGPU_I32MmaEnumAttr<"TilingLevel",
       Lane
     ]>;
 
+//===----------------------------------------------------------------------===//
+// Pipeline options
+//===----------------------------------------------------------------------===//
+
+class IREEGPU_I32PipelineEnumAttr<string name, string summary, list<I32EnumAttrCase> cases>
+    : I32EnumAttr<name, summary, cases> {
+  let cppNamespace = "::mlir::iree_compiler::IREE::GPU";
+  let genSpecializedAttr = 0;
+}
+
+// ReorderWorkgroups EnumAttrCases.
+def ReorderWorkgroupsNone : I32EnumAttrCase<"None", 0>;
+def ReorderWorkgroupsSwizzle : I32EnumAttrCase<"Swizzle", 1>;
+def ReorderWorkgroupsTranspose : I32EnumAttrCase<"Transpose", 2>;
+
+// EnumAttr for workgroup reordering strategy enums.
+def IREEGPU_ReorderWorkgroupsStrategy : IREEGPU_I32PipelineEnumAttr<"ReorderWorkgroupsStrategy",
+    "Strategy for workgroup reordering", [
+      ReorderWorkgroupsNone,
+      ReorderWorkgroupsSwizzle,
+      ReorderWorkgroupsTranspose
+    ]> {
+}
+
 #endif // IREE_COMPILER_CODEGEN_DIALECT_GPU_IREEGPUENUMS
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp
@@ -473,4 +473,73 @@ LogicalResult setTileAndFuseLoweringConfig(IREE::GPU::TargetAttr target,
       workgroupSize, subgroupSize, DictionaryAttr());
 }
 
+//===----------------------------------------------------------------------===//
+// Lowering Config Attributes
+//===----------------------------------------------------------------------===//
+
+GPUPipelineOptions
+getPipelineOptions(FunctionOpInterface funcOp,
+                   IREE::Codegen::TranslationInfoAttr translationInfo) {
+  GPUPipelineOptions pipelineOptions = {};
+  auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(funcOp);
+
+  if (DictionaryAttr config = translationInfo.getConfiguration()) {
+    std::optional<NamedAttribute> maybePipelineOptionsAttr =
+        config.getNamed(GPUPipelineOptionsAttr::getDictKeyName());
+    if (!maybePipelineOptionsAttr.has_value()) {
+      return pipelineOptions;
+    }
+    auto pipelineOptionsAttr =
+        cast<GPUPipelineOptionsAttr>(maybePipelineOptionsAttr->getValue());
+    BoolAttr prefetchSharedMemory =
+        pipelineOptionsAttr.getPrefetchSharedMemory();
+    if (prefetchSharedMemory) {
+      pipelineOptions.prefetchSharedMemory = prefetchSharedMemory.getValue();
+    }
+    BoolAttr noReduceBankConflicts =
+        pipelineOptionsAttr.getNoReduceSharedMemoryBankConflicts();
+    if (noReduceBankConflicts) {
+      pipelineOptions.enableReduceSharedMemoryBankConflicts =
+          !noReduceBankConflicts.getValue();
+    }
+    ReorderWorkgroupsStrategyAttr reorderWorkgroupsStrategy =
+        pipelineOptionsAttr.getReorderWorkgroupsStrategy();
+    if (reorderWorkgroupsStrategy) {
+      pipelineOptions.reorderStrategy = reorderWorkgroupsStrategy.getValue();
+    }
+  }
+
+  pipelineOptions.enableUkernels = targetAttr && hasUkernel(targetAttr);
+
+  LLVM_DEBUG(llvm::dbgs() << "GPU Pipeline Options: " << pipelineOptions
+                          << "\n");
+  return pipelineOptions;
+}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                              const GPUPipelineOptions &options) {
+  StringRef reorderStr = "<not set>";
+  if (options.reorderStrategy) {
+    switch (options.reorderStrategy.value()) {
+    case ReorderWorkgroupsStrategy::Transpose:
+      reorderStr = "transpose";
+      break;
+    case ReorderWorkgroupsStrategy::Swizzle:
+      reorderStr = "swizzle";
+      break;
+    case ReorderWorkgroupsStrategy::None:
+      reorderStr = "none";
+      break;
+    default:
+      assert(false && "Unhandled reorder option");
+    }
+  }
+
+  return os << "{" << "enableReduceSharedMemoryBankConflicts = "
+            << options.enableReduceSharedMemoryBankConflicts << ", "
+            << ", prefetchSharedMemory = " << options.prefetchSharedMemory
+            << ", reorderWorkgroupsStrategy = " << reorderStr
+            << ", enableUkernels = " << options.enableUkernels << "}";
+}
+
 } // namespace mlir::iree_compiler::IREE::GPU
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.h b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.h
@@ -7,6 +7,7 @@
 #ifndef IREE_COMPILER_CODEGEN_DIALECT_GPU_TARGETUTILS_CONFIGUTILS_H_
 #define IREE_COMPILER_CODEGEN_DIALECT_GPU_TARGETUTILS_CONFIGUTILS_H_
 
+#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
 #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
@@ -26,6 +27,26 @@ LogicalResult setTileAndFuseLoweringConfig(IREE::GPU::TargetAttr target,
                                            mlir::FunctionOpInterface entryPoint,
                                            Operation *op);
 
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Options
+//===----------------------------------------------------------------------===//
+
+using IREE::GPU::ReorderWorkgroupsStrategy;
+
+struct GPUPipelineOptions {
+  bool enableReduceSharedMemoryBankConflicts = true;
+  bool prefetchSharedMemory = false;
+  bool enableUkernels = false;
+  std::optional<ReorderWorkgroupsStrategy> reorderStrategy;
+};
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                              const GPUPipelineOptions &options);
+
+GPUPipelineOptions
+getPipelineOptions(FunctionOpInterface funcOp,
+                   IREE::Codegen::TranslationInfoAttr translationInfo);
+
 } // namespace mlir::iree_compiler::IREE::GPU
 
 #endif // IREE_COMPILER_CODEGEN_DIALECT_GPU_TARGETUTILS_CONFIGUTILS_H_
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
@@ -13,6 +13,7 @@
 #include "iree/compiler/Codegen/Common/GPU/GPUHeuristics.h"
 #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
 #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h"
+#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUEnums.h"
 #include "iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.h"
 #include "iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.h"
 #include "iree/compiler/Codegen/Interfaces/UKernelOpInterface.h"
@@ -380,9 +381,14 @@ setConvolutionVectorDistributionConfig(IREE::GPU::TargetAttr target,
 
   // Prefetch shared memory if requested.
   if (clLLVMGPUEnablePrefetch) {
+    auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
+        context, /*prefetchSharedMemory=*/true,
+        /*no_reduce_shared_memory_bank_conflicts=*/false,
+        /*reorder_workgroups_strategy=*/std::nullopt);
     attrs.emplace_back(
-        StringAttr::get(context, LLVMGPUAttrNames::kPrefetchSharedMemory),
-        UnitAttr::get(context));
+        StringAttr::get(context,
+                        IREE::GPU::GPUPipelineOptionsAttr::getDictKeyName()),
+        pipelineOptions);
   }
 
   auto configDict = DictionaryAttr::get(context, attrs);
@@ -610,9 +616,14 @@ setMatmulVectorDistributionConfig(IREE::GPU::TargetAttr target,
 
   // Prefetch shared memory if requested.
   if (clLLVMGPUEnablePrefetch) {
+    auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
+        context, /*prefetchSharedMemory=*/true,
+        /*no_reduce_shared_memory_bank_conflicts=*/false,
+        /*reorder_workgroups_strategy=*/std::nullopt);
     attrs.emplace_back(
-        StringAttr::get(context, LLVMGPUAttrNames::kPrefetchSharedMemory),
-        UnitAttr::get(context));
+        StringAttr::get(context,
+                        IREE::GPU::GPUPipelineOptionsAttr::getDictKeyName()),
+        pipelineOptions);
   }
 
   auto configDict = DictionaryAttr::get(context, attrs);