Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add workgroup chipletgroup strategy to workgroup reordering pass #17811

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ createConvertVectorReductionToGPUPass(
bool expandSubgroupReduction = true,
std::function<int(mlir::FunctionOpInterface)> getWarpSize = nullptr);

enum class ReorderWorkgroupsStrategy { None, Swizzle, Transpose };
enum class ReorderWorkgroupsStrategy { None, ChipletGroup, Swizzle, Transpose };

/// Reorders workgroup IDs.
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
Expand Down
5 changes: 3 additions & 2 deletions compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,11 @@ def ReorderWorkgroupsPass :
let dependentDialects = ["::mlir::affine::AffineDialect"];
let options = [
Option<"strategy", "strategy", "std::string", /*default=*/"",
"Workgroup reordering strategy, one of: '' (none), 'transpose', 'swizzle'">,
"Workgroup reordering strategy, one of: '' (none), 'transpose', 'swizzle', 'chipletgroup'">,
Option<"logTile", "logTile", "unsigned",
/*default=*/"0",
"The log2 of the tile size used for swizzling. (0: disabled, non-0: swizzling enabled)">,
"The log2 of the tile size used for swizzling and chipletgroup. "
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
"(0: disabled, non-0: swizzling/chipletgroup enabled)">,
];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,95 @@ makeSwizzledIds(Location loc, OpBuilder b, Value workgroupIdX,
return {swizzledIdX, swizzledIdY};
}

// reoredering to make workgroup ids move slowly between chiplet groups
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
static Value chipletAwareWorkgroupReordering(Location loc, OpBuilder b,
Value linearizedId,
Value workgroupCountX,
Value workgroupCountY,
int64_t numChipletsPerGroup) {
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
Value numChipletsVal =
b.createOrFold<arith::ConstantIndexOp>(loc, numChipletsPerGroup);
Value workgroupCount =
b.create<arith::MulIOp>(loc, workgroupCountX, workgroupCountY);
Value workgroupCountPerChiplet =
b.create<arith::DivUIOp>(loc, workgroupCount, numChipletsVal);
Value chipletId = b.create<arith::RemUIOp>(loc, linearizedId, numChipletsVal);
Value wgIdWithinChiplet =
b.create<arith::DivUIOp>(loc, linearizedId, numChipletsVal);
Value reorderedId = b.create<arith::AddIOp>(
loc, wgIdWithinChiplet,
b.create<arith::MulIOp>(loc, chipletId, workgroupCountPerChiplet));

// The following code is used to handle the remainder part
kuhar marked this conversation as resolved.
Show resolved Hide resolved

Value constOne = b.createOrFold<arith::ConstantIndexOp>(loc, 1);
Value lastWorkgroupId =
b.create<arith::SubIOp>(loc, workgroupCount, constOne);
Value modulatedLastWorkgroupId = b.create<arith::SubIOp>(
loc, lastWorkgroupId,
b.create<arith::RemUIOp>(loc, workgroupCount, numChipletsVal));
Value isGreaterThanFinalWorkgroupId = b.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::ugt, linearizedId, modulatedLastWorkgroupId);
linearizedId = b.create<arith::SelectOp>(loc, isGreaterThanFinalWorkgroupId,
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
linearizedId, reorderedId);

return linearizedId;
}

// Chiplet-aware workgroup reordering strategy: reordering + super-grouping.
// Step 1: Reorder the workgroup grid to move slowly between
// chiplet groups (Function: chipletAwareWorkgroupReordering).
// Step 2: Implement 'super-grouping' of workgroups before switching to the next
// column.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Say what the return value is.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not addressed.

static std::pair<Value, Value>
makeChipletGroupedIds(Location loc, OpBuilder b, Value workgroupIdX,
Value workgroupIdY, Value workgroupCountX,
Value workgroupCountY, unsigned chipletGroupTile) {
// Create one dimension ID for workgroup
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Create one dimension ID for workgroup
// Create one dimension ID for workgroup.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not addressed

Value linearized =
b.create<arith::MulIOp>(loc, workgroupIdY, workgroupCountX);
linearized = b.create<arith::AddIOp>(loc, linearized, workgroupIdX);

// This value is hardcoded for cdna3(mi300x)
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
int64_t numXCDs = 8;
// Map chiplets to perform a spatially local tile operation.
// Reorder the linearized ID such that every consecutive group of chiplets
// is the slowest-changing dimension in the grid.
// Emphircally found that two chiplets as a group has better locality
// throughout.
linearized = chipletAwareWorkgroupReordering(
loc, b, linearized, workgroupCountX, workgroupCountY, numXCDs / 2);

// Detailed explaination about the idea behind the below implementation:
// the L2 Cache Optimizations subsection in
// https://triton-lang.org/main/getting-started/tutorials/03-matrix-multiplication.html#
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
// Emphircally, found rowGroupSize=16 for mi300x achieves good performance
unsigned rowGroupSize = chipletGroupTile;
Value rowGroupSizeVal =
b.createOrFold<arith::ConstantIndexOp>(loc, rowGroupSize);
// group every 16 workgroups along Y dimension
// Number of workgroups in the group
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
Value numWorkGroupsPerRowBlock =
b.create<arith::MulIOp>(loc, rowGroupSizeVal, workgroupCountX);

Value groupId =
b.create<arith::DivUIOp>(loc, linearized, numWorkGroupsPerRowBlock);
Value firstRowID = b.create<arith::MulIOp>(loc, groupId, rowGroupSizeVal);

Value currentRowGroupSize = b.create<arith::MinUIOp>(
loc, b.create<arith::SubIOp>(loc, workgroupCountY, firstRowID),
rowGroupSizeVal);

Value newY = b.create<arith::AddIOp>(
loc, firstRowID,
b.create<arith::RemUIOp>(loc, linearized, currentRowGroupSize));

Value newX = b.create<arith::DivUIOp>(
loc, b.create<arith::RemUIOp>(loc, linearized, numWorkGroupsPerRowBlock),
currentRowGroupSize);
return {newX, newY};
}

/// Transpose IDs, i.e., changes the traversal order from left -> right then
/// top -> bottom to top -> bottom then left -> right.
static std::pair<Value, Value> makeTransposedIds(Location loc, OpBuilder b,
Expand Down Expand Up @@ -112,11 +201,11 @@ getWorkgroupCountsXY(OpBuilder &builder, FunctionOpInterface funcOp) {

static LogicalResult reorderWorkgroupsInFunc(FunctionOpInterface funcOp,
ReorderWorkgroupsStrategy strategy,
unsigned swizzleLogTile) {
unsigned logTile) {
assert(strategy != ReorderWorkgroupsStrategy::None &&
"Expected a concrete strategy");

unsigned swizzleTile = 1u << swizzleLogTile;
unsigned reorderWgTileSize = 1u << logTile;
IREE::HAL::InterfaceWorkgroupIDOp oldXId;
IREE::HAL::InterfaceWorkgroupIDOp oldYId;
unsigned numXIdOps = 0;
Expand Down Expand Up @@ -153,7 +242,11 @@ static LogicalResult reorderWorkgroupsInFunc(FunctionOpInterface funcOp,
if (strategy == ReorderWorkgroupsStrategy::Swizzle) {
std::tie(newWorkgroupIdX, newWorkgroupIdY) =
makeSwizzledIds(funcOp.getLoc(), builder, workgroupIdX, workgroupIdY,
workgroupCntX, workgroupCntY, swizzleTile);
workgroupCntX, workgroupCntY, reorderWgTileSize);
} else if (strategy == ReorderWorkgroupsStrategy::ChipletGroup) {
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
std::tie(newWorkgroupIdX, newWorkgroupIdY) = makeChipletGroupedIds(
funcOp.getLoc(), builder, workgroupIdX, workgroupIdY, workgroupCntX,
workgroupCntY, reorderWgTileSize);
} else {
assert(strategy == ReorderWorkgroupsStrategy::Transpose &&
"Unhandled strategy");
Expand Down Expand Up @@ -186,9 +279,9 @@ namespace {
struct ReorderWorkgroupsPass final
: impl::ReorderWorkgroupsPassBase<ReorderWorkgroupsPass> {
ReorderWorkgroupsPass(
ReorderWorkgroupsStrategy strategy, unsigned logSwizzleTile,
ReorderWorkgroupsStrategy strategy, unsigned logTile,
std::function<LogicalResult(mlir::FunctionOpInterface)> filterFn)
: reorderingStrategy(strategy), logSwizzleTile(logSwizzleTile),
: reorderingStrategy(strategy), reorderWgLogTileSize(logTile),
filterFn(std::move(filterFn)) {}

LogicalResult initializeOptions(
Expand All @@ -197,10 +290,11 @@ struct ReorderWorkgroupsPass final
if (failed(Pass::initializeOptions(options, errorHandler))) {
return failure();
}
logSwizzleTile = logTile;
reorderWgLogTileSize = logTile;
auto selectedStrategy =
llvm::StringSwitch<FailureOr<ReorderWorkgroupsStrategy>>(strategy)
.Case("", ReorderWorkgroupsStrategy::None)
.Case("chipletgroup", ReorderWorkgroupsStrategy::ChipletGroup)
.Case("swizzle", ReorderWorkgroupsStrategy::Swizzle)
.Case("transpose", ReorderWorkgroupsStrategy::Transpose)
.Default(failure());
Expand All @@ -216,7 +310,11 @@ struct ReorderWorkgroupsPass final
return;

if (reorderingStrategy == ReorderWorkgroupsStrategy::Swizzle &&
logSwizzleTile == 0)
reorderWgLogTileSize == 0)
return;

if (reorderingStrategy == ReorderWorkgroupsStrategy::ChipletGroup &&
reorderWgLogTileSize == 0)
return;

FunctionOpInterface funcOp = getOperation();
Expand All @@ -229,7 +327,8 @@ struct ReorderWorkgroupsPass final
llvm::dbgs() << "\n\n";
});

if (failed(reorderWorkgroupsInFunc(funcOp, reorderingStrategy, logTile))) {
if (failed(reorderWorkgroupsInFunc(funcOp, reorderingStrategy,
reorderWgLogTileSize))) {
LLVM_DEBUG(llvm::dbgs() << "Failed to reorder workgroups\n");
return;
}
Expand All @@ -244,7 +343,7 @@ struct ReorderWorkgroupsPass final
private:
ReorderWorkgroupsStrategy reorderingStrategy =
ReorderWorkgroupsStrategy::None;
unsigned logSwizzleTile = 0;
unsigned reorderWgLogTileSize = 0;
std::function<LogicalResult(mlir::FunctionOpInterface)> filterFn;
};
} // namespace
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-reorder-workgroups{strategy=transpose}))" \
// RUN: --split-input-file %s | FileCheck --check-prefix=TRANSPOSE %s

// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-reorder-workgroups{strategy=chipletgroup logTile=3}))" \
// RUN: --split-input-file %s | FileCheck --check-prefix=CHIPLETGROUP %s

func.func @matmul() {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
Expand Down Expand Up @@ -55,6 +58,41 @@ func.func @matmul() {
// SWIZZLE: %[[S13:.*]] = arith.select %[[S12]], %[[WG_X]], %[[S6]] : index
// SWIZZLE: %[[S14:.*]] = arith.select %[[S12]], %[[WG_Y]], %[[S7]] : index

// CHIPLETGROUP-LABEL: func.func @matmul
// CHIPLETGROUP: %[[WG_X:.*]] = hal.interface.workgroup.id[0] : index
// CHIPLETGROUP: %[[WG_Y:.*]] = hal.interface.workgroup.id[1] : index
// CHIPLETGROUP: %[[WG_CNT_X:.*]] = hal.interface.workgroup.count[0] : index
// CHIPLETGROUP: %[[WG_CNT_Y:.*]] = hal.interface.workgroup.count[1] : index
// CHIPLETGROUP: %[[S0:.*]] = arith.muli %[[WG_Y]], %[[WG_CNT_X]] : index
// CHIPLETGROUP: %[[S1:.*]] = arith.addi %[[S0]], %[[WG_X]] : index
// CHIPLETGROUP: %[[CST4:.*]] = arith.constant 4 : index
// CHIPLETGROUP: %[[WG_CNT:.*]] = arith.muli %[[WG_CNT_X]], %[[WG_CNT_Y]] : index
// CHIPLETGROUP: %[[S3:.*]] = arith.divui %[[WG_CNT]], %[[CST4]] : index
// CHIPLETGROUP: %[[S4:.*]] = arith.remui %[[S1]], %[[CST4]] : index
// CHIPLETGROUP: %[[S5:.*]] = arith.divui %[[S1]], %[[CST4]] : index
// CHIPLETGROUP: %[[S6:.*]] = arith.muli %[[S4]], %[[S3]] : index
// CHIPLETGROUP: %[[S7:.*]] = arith.addi %[[S5]], %[[S6]] : index
// CHIPLETGROUP: %[[CST1:.*]] = arith.constant 1 : index
// CHIPLETGROUP: %[[S8:.*]] = arith.subi %[[WG_CNT]], %[[CST1]] : index
// CHIPLETGROUP: %[[S9:.*]] = arith.remui %[[WG_CNT]], %[[CST4]] : index
// CHIPLETGROUP: %[[S10:.*]] = arith.subi %[[S8]], %[[S9]] : index
// CHIPLETGROUP: %[[S11:.*]] = arith.cmpi ugt, %[[S1]], %[[S10]] : index
// CHIPLETGROUP: %[[S12:.*]] = arith.select %[[S11]], %[[S1]], %[[S7]] : index
// CHIPLETGROUP: %[[CST8:.*]] = arith.constant 8 : index
// CHIPLETGROUP: %[[S13:.*]] = arith.muli %[[CST8]], %[[WG_CNT_X]] : index
// CHIPLETGROUP: %[[S14:.*]] = arith.divui %[[S12]], %[[S13]] : index
// CHIPLETGROUP: %[[S15:.*]] = arith.muli %[[S14]], %[[CST8]] : index
// CHIPLETGROUP: %[[S16:.*]] = arith.subi %[[WG_CNT_Y]], %[[S15]] : index
// CHIPLETGROUP: %[[S17:.*]] = arith.minui %[[S16]], %[[CST8]] : index
// CHIPLETGROUP: %[[S18:.*]] = arith.remui %[[S12]], %[[S17]] : index
// CHIPLETGROUP: %[[S19:.*]] = arith.addi %[[S15]], %[[S18]] : index
// CHIPLETGROUP: %[[S20:.*]] = arith.remui %[[S12]], %[[S13]] : index
// CHIPLETGROUP: %[[S21:.*]] = arith.divui %[[S20]], %[[S17]] : index
// CHIPLETGROUP: %26 = affine.apply #map()[%[[S19]]]
// CHIPLETGROUP: %27 = affine.apply #map()[%workgroup_count_y_1]
// CHIPLETGROUP: %28 = affine.apply #map()[%[[S21]]]
// CHIPLETGROUP: %29 = affine.apply #map()[%workgroup_count_x_0]

// TRANSPOSE-LABEL: func.func @matmul
// TRANSPOSE: %[[WG_X:.*]] = hal.interface.workgroup.id[0] : index
// TRANSPOSE: %[[WG_Y:.*]] = hal.interface.workgroup.id[1] : index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
// RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-reorder-workgroups{strategy=transpose})))))" \
// RUN: %s | FileCheck --check-prefix=TRANSPOSE %s

// RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-reorder-workgroups{strategy=chipletgroup logTile=3})))))" \
// RUN: %s | FileCheck --check-prefix=CHIPLETGROUP %s

// Make sure we use static workgroup counts instead of introducting
// `hal.interface.workgroup.count` ops. These are currently not supported on ROCm.

Expand All @@ -18,6 +21,25 @@
// SWIZZLE-DAG: affine.apply #{{.+}}()[%[[SEL_Y]]]
// SWIZZLE: return

// CHIPLETGROUP-LABEL: hal.executable private @main_dispatch_0 {
// CHIPLETGROUP-LABEL: func.func @main_dispatch_0_matmul_transpose_b_32000x32000x4096_f16
// CHIPLETGROUP-DAG: %[[WG_X:.+]] = hal.interface.workgroup.id[0] : index
// CHIPLETGROUP-DAG: %[[WG_Y:.+]] = hal.interface.workgroup.id[1] : index
// CHIPLETGROUP-NOT: hal.interface.workgroup.count
// CHIPLETGROUP-DAG: %[[C250:.+]] = arith.constant 250 : index
// CHIPLETGROUP-DAG: %[[C500:.+]] = arith.constant 500 : index
// CHIPLETGROUP: %[[MUL:.+]] = arith.muli %[[WG_Y]], %[[C250]] : index
// CHIPLETGROUP: %[[ADD:.+]] = arith.addi %[[MUL]], %[[WG_X]] : index
// CHIPLETGROUP: %[[CMP:.+]] = arith.cmpi ugt, %[[ADD]], %{{.+}} : index
// CHIPLETGROUP: %[[SELECT:.+]] = arith.select %[[CMP]], %[[ADD]], %{{.+}} : index
// CHIPLETGROUP: %[[REM:.+]] = arith.remui %[[SELECT]], %{{.+}} : index
// CHIPLETGROUP: %[[ADDI:.+]] = arith.addi %{{.+}}, %[[REM]] : index
// CHIPLETGROUP: %[[REMI:.+]] = arith.remui %[[SELECT]], %{{.+}} : index
// CHIPLETGROUP: %[[DIV:.+]] = arith.divui %[[REMI]], %{{.+}} : index
// CHIPLETGROUP-DAG: affine.apply #{{.+}}()[%[[ADDI]]]
// CHIPLETGROUP-DAG: affine.apply #{{.+}}()[%[[DIV]]]
// CHIPLETGROUP: return

// TRANSPOSE-LABEL: hal.executable private @main_dispatch_0 {
// TRANSPOSE-LABEL: func.func @main_dispatch_0_matmul_transpose_b_32000x32000x4096_f16
// TRANSPOSE-DAG: %[[WG_X:.+]] = hal.interface.workgroup.id[0] : index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,27 @@ def IREEGPU_MmaScheduleAttr : AttrDef<IREEGPU_Dialect, "MMASchedule"> {
}];
}

//===----------------------------------------------------------------------===//
// Workgroup Reordering Attr

def IREEGPU_WorkGroupReorderAttr: AttrDef<IREEGPU_Dialect, "WorkgroupReorderOptions">{
let mnemonic = "reorder_workgroups";
qedawkins marked this conversation as resolved.
Show resolved Hide resolved
let cppNamespace = "::mlir::iree_compiler::IREE::GPU";

string description = [{
options for workgroup reordering strategies to improve L2 cache hit rate
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
and thus provide performance improvement.
}];

let parameters = (ins
"::mlir::iree_compiler::IREE::GPU::ReorderWorkgroupEnum":$reorder_option,
OptionalParameter<"std::optional<int64_t>", "the tile size to use">:$tileSize
);

let assemblyFormat = "`<` struct(params) `>`";
}


//===----------------------------------------------------------------------===//
// Workgroup processor level description

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,20 @@ def IREEGPU_TilingLevel : IREEGPU_I32MmaEnumAttr<"TilingLevel",
Lane
]>;

//===----------------------------------------------------------------------===//
// Workgroup reordering strategies

def None : I32EnumAttrCase<"none", 0>;
def Transpose :I32EnumAttrCase<"transpose", 1>;
def Swizzle : I32EnumAttrCase<"swizzle", 2>;
def Chipletgroup : I32EnumAttrCase<"chipletgroup", 3>;

def IREEGPU_ReorderWorkgroupEnum : IREEGPU_I32MmaEnumAttr<"ReorderWorkgroupEnum",
"Descriptor for strategies of reordering workgroups on GPUs", [
None,
Transpose,
Swizzle,
Chipletgroup
]>;

#endif // IREE_COMPILER_CODEGEN_DIALECT_GPU_IREEGPUENUMS
Original file line number Diff line number Diff line change
Expand Up @@ -92,21 +92,31 @@ getPipelineOptions(FunctionOpInterface funcOp,
// Get the workgroups reorder config and enable the workgroup reordering.
Attribute reorderWorkgroupOption =
config.get(LLVMGPUAttrNames::kReorderWorkgroups);
if (!isa<StringAttr>(reorderWorkgroupOption))
funcOp.emitOpError() << "'" << LLVMGPUAttrNames::kReorderWorkgroups
<< "' is expected to be a string attribute";
StringRef reorderStr = llvm::cast<StringAttr>(reorderWorkgroupOption);
if (reorderStr == "transpose") {
pipelineOptions.reorderStrategy = ReorderWorkgroupsStrategy::Transpose;
} else if (reorderStr == "swizzle") {
pipelineOptions.reorderStrategy = ReorderWorkgroupsStrategy::Swizzle;
} else {
if (reorderStr != "none")
funcOp.emitOpError()
<< "Unknown " << LLVMGPUAttrNames::kReorderWorkgroups
<< "value: " << reorderWorkgroupOption;
else
if (llvm::isa<IREE::GPU::WorkgroupReorderOptionsAttr>(
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
reorderWorkgroupOption)) {
IREE::GPU::WorkgroupReorderOptionsAttr ReorderOption =
llvm::dyn_cast<IREE::GPU::WorkgroupReorderOptionsAttr>(
bangtianliu marked this conversation as resolved.
Show resolved Hide resolved
reorderWorkgroupOption);
pipelineOptions.reorderWgLogTileSize = ReorderOption.getTileSize();
switch (ReorderOption.getReorderOption()) {
case IREE::GPU::ReorderWorkgroupEnum::none:
pipelineOptions.reorderStrategy = ReorderWorkgroupsStrategy::None;
break;
case IREE::GPU::ReorderWorkgroupEnum::transpose:
pipelineOptions.reorderStrategy =
ReorderWorkgroupsStrategy::Transpose;
break;
case IREE::GPU::ReorderWorkgroupEnum::swizzle:
pipelineOptions.reorderStrategy = ReorderWorkgroupsStrategy::Swizzle;
break;
case IREE::GPU::ReorderWorkgroupEnum::chipletgroup:
pipelineOptions.reorderStrategy =
ReorderWorkgroupsStrategy::ChipletGroup;
break;
default:
funcOp.emitOpError(
"unsupported workgroup reordering option on GPU target.");
}
}
}
}
Expand Down
Loading