Skip to content

Commit

Permalink
[NFC][GPU] Move LLVMGPUPipelineOptions to iree_gpu dialect (#18458)
Browse files Browse the repository at this point in the history
This moves `LLVMGPUPipelineOptions` to `Codegen/Dialect/GPU` so that
pipeline options can be set by iree_gpu lowering configuration logic
(like `setMatmulLoweringConfig` in `ConfigUtils.cpp`).

A new attribute `GPUPipelineOptionsAttr` is added, with optional
parameters defined for each existing pipeline option. The assembly of
the pipline attributes has changed, since the options are now part of
the `iree_gpu` dialect. For the purposes of setting user configurations
with transform dialect this PR changes the following:

All pipeline option attributes should now be contained in a single
`#iree_gpu.pipeline_options<>`
- `reorder_workgroups = "none"/"swizzle"/"transpose"` becomes one of the
pipeline options `reorder_workgroups_strategy = None/Swizzle/Transpose`
- `prefetch_shared_memory` becomes the pipeline option
`prefetch_shared_memory = true/false`
- `no_reduce_shared_memory_bank_conflicts` becomes the pipeline option
`no_reduce_shared_memory_bank_conflicts = true/false`

Example:

After this change, the translation_info config dict changes from
```
{reorder_workgroups = "swizzle", prefetch_shared_memory}
```
to
```
{gpu_pipeline_options =
    #iree_gpu.pipeline_options<
        prefetch_shared_memory = true, reorder_workgroups_strategy = Swizzle
    >}
```

---------

Signed-off-by: Max Dawkins <max.dawkins@gmail.com>
  • Loading branch information
Max191 committed Sep 10, 2024
1 parent b197555 commit 15d58e7
Show file tree
Hide file tree
Showing 14 changed files with 256 additions and 146 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -636,8 +636,8 @@ module attributes { transform.with_named_sequence } {
workgroup_size = [320, 1, 1] subgroup_size = 64,
{mma_schedule = #iree_gpu.mma_schedule<
intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
subgroup_m_count = 1, subgroup_n_count = 5>
, reorder_workgroups = "transpose"}>
subgroup_m_count = 1, subgroup_n_count = 5>,
gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>}>
> -> !transform.any_param
transform.yield %conv, %config : !transform.any_op, !transform.any_param
}
Expand All @@ -656,8 +656,9 @@ module attributes { transform.with_named_sequence } {
workgroup_size = [256, 1, 1] subgroup_size = 64,
{mma_schedule = #iree_gpu.mma_schedule<
intrinsic = #iree_gpu.mma_layout<MFMA_F32_32x32x8_F16>,
subgroup_m_count = 1, subgroup_n_count = 4>
, reorder_workgroups = "transpose", llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}}>
subgroup_m_count = 1, subgroup_n_count = 4>,
gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>,
llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}}>
> -> !transform.any_param
transform.yield %conv, %config : !transform.any_op, !transform.any_param
}
Expand All @@ -676,8 +677,8 @@ module attributes { transform.with_named_sequence } {
workgroup_size = [320, 1, 1] subgroup_size = 64,
{mma_schedule = #iree_gpu.mma_schedule<
intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
subgroup_m_count = 1, subgroup_n_count = 5>
, reorder_workgroups = "transpose"}>
subgroup_m_count = 1, subgroup_n_count = 5>,
gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>}>
> -> !transform.any_param
transform.yield %conv, %config : !transform.any_op, !transform.any_param
}
Expand All @@ -696,8 +697,8 @@ module attributes { transform.with_named_sequence } {
workgroup_size = [320, 1, 1] subgroup_size = 64,
{mma_schedule = #iree_gpu.mma_schedule<
intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
subgroup_m_count = 1, subgroup_n_count = 5>
, reorder_workgroups = "transpose"}>
subgroup_m_count = 1, subgroup_n_count = 5>,
gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>}>
> -> !transform.any_param
transform.yield %conv, %config : !transform.any_op, !transform.any_param
}
Expand All @@ -716,8 +717,9 @@ module attributes { transform.with_named_sequence } {
workgroup_size = [128, 4, 1] subgroup_size = 64,
{mma_schedule = #iree_gpu.mma_schedule<
intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
subgroup_m_count = 4, subgroup_n_count = 2>
, reorder_workgroups = "transpose", llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}}>
subgroup_m_count = 4, subgroup_n_count = 2>,
gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>,
llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}}>
> -> !transform.any_param
transform.yield %conv, %config : !transform.any_op, !transform.any_param
}
Expand All @@ -736,8 +738,8 @@ module attributes { transform.with_named_sequence } {
workgroup_size = [320, 1, 1] subgroup_size = 64,
{mma_schedule = #iree_gpu.mma_schedule<
intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
subgroup_m_count = 1, subgroup_n_count = 5>
, reorder_workgroups = "transpose"}>
subgroup_m_count = 1, subgroup_n_count = 5>,
gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>}>
> -> !transform.any_param
transform.yield %conv, %config : !transform.any_op, !transform.any_param
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ createGPUTensorAlloc(GPUPromoteSharedMemPattern promoteSharedMemPattern =
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createConvertVectorReductionToGPUPass(bool expandSubgroupReduction = true);

enum class ReorderWorkgroupsStrategy { None, Swizzle, Transpose };
using IREE::GPU::ReorderWorkgroupsStrategy;

/// Reorders workgroup IDs.
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
Expand Down
19 changes: 19 additions & 0 deletions compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1352,6 +1352,25 @@ bool LaneIdAttr::isLinearMapping() const { return true; }

int64_t LaneIdAttr::getRelativeIndex() const { return getDim(); }

//===----------------------------------------------------------------------===//
// GPU Pipeline Options
//===----------------------------------------------------------------------===//

GPUPipelineOptionsAttr GPUPipelineOptionsAttr::get(
MLIRContext *context, bool prefetchSharedMemory,
bool noReduceSharedMemoryBankConflicts,
std::optional<ReorderWorkgroupsStrategy> reorderWorkgroupsStrategy) {
auto strategyAttr = ReorderWorkgroupsStrategyAttr();
if (reorderWorkgroupsStrategy) {
strategyAttr =
ReorderWorkgroupsStrategyAttr::get(context, *reorderWorkgroupsStrategy);
}
Builder b(context);
return Base::get(context, b.getBoolAttr(prefetchSharedMemory),
b.getBoolAttr(noReduceSharedMemoryBankConflicts),
strategyAttr);
}

//===----------------------------------------------------------------------===//
// Attribute Registration
//===----------------------------------------------------------------------===//
Expand Down
50 changes: 50 additions & 0 deletions compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -430,4 +430,54 @@ def IREEGPU_LaneIdAttr : AttrDef<IREEGPU_Dialect, "LaneId", [
}];
}

//===----------------------------------------------------------------------===//
// GPU Pipeline Options
//===----------------------------------------------------------------------===//

def IREEGPU_ReorderWorkgroupsStrategyAttr :
EnumAttr<IREEGPU_Dialect, IREEGPU_ReorderWorkgroupsStrategy, ""> {
let assemblyFormat = "``$value";
let cppNamespace = "::mlir::iree_compiler::IREE::GPU";
}

def IREEGPU_GPUPipelineOptionsAttr : AttrDef<IREEGPU_Dialect, "GPUPipelineOptions"> {
let summary = "GPU pipeline options attribute.";
let description = [{
This attributes describes lowering pipeline specific configuration options:
* prefetch_shared_memory: Boolean option indicating whether or not to run
the loop prefetching pass in the lowering pipeline.
* no_reduce_shared_memory_bank_conflicts: Boolean option indicating whether
or not to skip the bank conflict reduction pass in the lowering pipeline.
* reorder_workgroups_strategy: Enum attribute indicating which strategy to
choose for the workgroup reordering pass. Options are `None`, `Swizzle`,
and `Transpose`.
}];

let mnemonic = "pipeline_options";
let cppNamespace = "::mlir::iree_compiler::IREE::GPU";

let parameters = (ins
OptionalParameter<"BoolAttr">:$prefetch_shared_memory,
OptionalParameter<"BoolAttr">:$no_reduce_shared_memory_bank_conflicts,
OptionalParameter<"ReorderWorkgroupsStrategyAttr">:$reorder_workgroups_strategy
);

let builders = [
AttrBuilder<(ins
CArg<"bool", "false">:$prefetch_shared_memory,
CArg<"bool", "false">:$no_reduce_shared_memory_bank_conflicts,
CArg<"std::optional<ReorderWorkgroupsStrategy>", "{}">:$reorder_workgroups_strategy)>
];

let assemblyFormat = "`<` struct(params) `>`";

let extraClassDeclaration = [{
// Returns the key name for GPUPipelineOptionsAttr in the translation info
// config dictionary.
static StringRef getDictKeyName() {
return "gpu_pipeline_options";
}
}];
}

#endif // IREE_COMPILER_CODEGEN_DIALECT_GPU_IREEGPUATTRS
24 changes: 24 additions & 0 deletions compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUEnums.td
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,28 @@ def IREEGPU_TilingLevel : IREEGPU_I32MmaEnumAttr<"TilingLevel",
Lane
]>;

//===----------------------------------------------------------------------===//
// Pipeline options
//===----------------------------------------------------------------------===//

class IREEGPU_I32PipelineEnumAttr<string name, string summary, list<I32EnumAttrCase> cases>
: I32EnumAttr<name, summary, cases> {
let cppNamespace = "::mlir::iree_compiler::IREE::GPU";
let genSpecializedAttr = 0;
}

// ReorderWorkgroups EnumAttrCases.
def ReorderWorkgroupsNone : I32EnumAttrCase<"None", 0>;
def ReorderWorkgroupsSwizzle : I32EnumAttrCase<"Swizzle", 1>;
def ReorderWorkgroupsTranspose : I32EnumAttrCase<"Transpose", 2>;

// EnumAttr for workgroup reordering strategy enums.
def IREEGPU_ReorderWorkgroupsStrategy : IREEGPU_I32PipelineEnumAttr<"ReorderWorkgroupsStrategy",
"Strategy for workgroup reordering", [
ReorderWorkgroupsNone,
ReorderWorkgroupsSwizzle,
ReorderWorkgroupsTranspose
]> {
}

#endif // IREE_COMPILER_CODEGEN_DIALECT_GPU_IREEGPUENUMS
Original file line number Diff line number Diff line change
Expand Up @@ -473,4 +473,73 @@ LogicalResult setTileAndFuseLoweringConfig(IREE::GPU::TargetAttr target,
workgroupSize, subgroupSize, DictionaryAttr());
}

//===----------------------------------------------------------------------===//
// Lowering Config Attributes
//===----------------------------------------------------------------------===//

GPUPipelineOptions
getPipelineOptions(FunctionOpInterface funcOp,
IREE::Codegen::TranslationInfoAttr translationInfo) {
GPUPipelineOptions pipelineOptions = {};
auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(funcOp);

if (DictionaryAttr config = translationInfo.getConfiguration()) {
std::optional<NamedAttribute> maybePipelineOptionsAttr =
config.getNamed(GPUPipelineOptionsAttr::getDictKeyName());
if (!maybePipelineOptionsAttr.has_value()) {
return pipelineOptions;
}
auto pipelineOptionsAttr =
cast<GPUPipelineOptionsAttr>(maybePipelineOptionsAttr->getValue());
BoolAttr prefetchSharedMemory =
pipelineOptionsAttr.getPrefetchSharedMemory();
if (prefetchSharedMemory) {
pipelineOptions.prefetchSharedMemory = prefetchSharedMemory.getValue();
}
BoolAttr noReduceBankConflicts =
pipelineOptionsAttr.getNoReduceSharedMemoryBankConflicts();
if (noReduceBankConflicts) {
pipelineOptions.enableReduceSharedMemoryBankConflicts =
!noReduceBankConflicts.getValue();
}
ReorderWorkgroupsStrategyAttr reorderWorkgroupsStrategy =
pipelineOptionsAttr.getReorderWorkgroupsStrategy();
if (reorderWorkgroupsStrategy) {
pipelineOptions.reorderStrategy = reorderWorkgroupsStrategy.getValue();
}
}

pipelineOptions.enableUkernels = targetAttr && hasUkernel(targetAttr);

LLVM_DEBUG(llvm::dbgs() << "GPU Pipeline Options: " << pipelineOptions
<< "\n");
return pipelineOptions;
}

llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
const GPUPipelineOptions &options) {
StringRef reorderStr = "<not set>";
if (options.reorderStrategy) {
switch (options.reorderStrategy.value()) {
case ReorderWorkgroupsStrategy::Transpose:
reorderStr = "transpose";
break;
case ReorderWorkgroupsStrategy::Swizzle:
reorderStr = "swizzle";
break;
case ReorderWorkgroupsStrategy::None:
reorderStr = "none";
break;
default:
assert(false && "Unhandled reorder option");
}
}

return os << "{" << "enableReduceSharedMemoryBankConflicts = "
<< options.enableReduceSharedMemoryBankConflicts << ", "
<< ", prefetchSharedMemory = " << options.prefetchSharedMemory
<< ", reorderWorkgroupsStrategy = " << reorderStr
<< ", enableUkernels = " << options.enableUkernels << "}";
}

} // namespace mlir::iree_compiler::IREE::GPU
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#ifndef IREE_COMPILER_CODEGEN_DIALECT_GPU_TARGETUTILS_CONFIGUTILS_H_
#define IREE_COMPILER_CODEGEN_DIALECT_GPU_TARGETUTILS_CONFIGUTILS_H_

#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h"
#include "mlir/IR/Operation.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
Expand All @@ -26,6 +27,26 @@ LogicalResult setTileAndFuseLoweringConfig(IREE::GPU::TargetAttr target,
mlir::FunctionOpInterface entryPoint,
Operation *op);

//===----------------------------------------------------------------------===//
// Pass Pipeline Options
//===----------------------------------------------------------------------===//

using IREE::GPU::ReorderWorkgroupsStrategy;

struct GPUPipelineOptions {
bool enableReduceSharedMemoryBankConflicts = true;
bool prefetchSharedMemory = false;
bool enableUkernels = false;
std::optional<ReorderWorkgroupsStrategy> reorderStrategy;
};

llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
const GPUPipelineOptions &options);

GPUPipelineOptions
getPipelineOptions(FunctionOpInterface funcOp,
IREE::Codegen::TranslationInfoAttr translationInfo);

} // namespace mlir::iree_compiler::IREE::GPU

#endif // IREE_COMPILER_CODEGEN_DIALECT_GPU_TARGETUTILS_CONFIGUTILS_H_
19 changes: 15 additions & 4 deletions compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "iree/compiler/Codegen/Common/GPU/GPUHeuristics.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h"
#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUEnums.h"
#include "iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.h"
#include "iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.h"
#include "iree/compiler/Codegen/Interfaces/UKernelOpInterface.h"
Expand Down Expand Up @@ -380,9 +381,14 @@ setConvolutionVectorDistributionConfig(IREE::GPU::TargetAttr target,

// Prefetch shared memory if requested.
if (clLLVMGPUEnablePrefetch) {
auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
context, /*prefetchSharedMemory=*/true,
/*no_reduce_shared_memory_bank_conflicts=*/false,
/*reorder_workgroups_strategy=*/std::nullopt);
attrs.emplace_back(
StringAttr::get(context, LLVMGPUAttrNames::kPrefetchSharedMemory),
UnitAttr::get(context));
StringAttr::get(context,
IREE::GPU::GPUPipelineOptionsAttr::getDictKeyName()),
pipelineOptions);
}

auto configDict = DictionaryAttr::get(context, attrs);
Expand Down Expand Up @@ -610,9 +616,14 @@ setMatmulVectorDistributionConfig(IREE::GPU::TargetAttr target,

// Prefetch shared memory if requested.
if (clLLVMGPUEnablePrefetch) {
auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
context, /*prefetchSharedMemory=*/true,
/*no_reduce_shared_memory_bank_conflicts=*/false,
/*reorder_workgroups_strategy=*/std::nullopt);
attrs.emplace_back(
StringAttr::get(context, LLVMGPUAttrNames::kPrefetchSharedMemory),
UnitAttr::get(context));
StringAttr::get(context,
IREE::GPU::GPUPipelineOptionsAttr::getDictKeyName()),
pipelineOptions);
}

auto configDict = DictionaryAttr::get(context, attrs);
Expand Down
Loading

0 comments on commit 15d58e7

Please sign in to comment.