Skip to content

Commit

Permalink
GPU data tiling changes from `shared/gpu-data-tiling-materialize-enco…
Browse files Browse the repository at this point in the history
…ding` (#18492)

This PR is a squashed rebasing of
https://github.com/iree-org/iree/tree/shared/gpu-data-tiling-materialize-encoding
.

This squashes together commits by @hanhanW , @lialan and myself. Here
are all the commits:

40258db...shared/gpu-data-tiling-materialize-encoding

The intent is to carry on on `main` branch. The motivation is to pick up
the recent TileAndFuse pipeline.

---------

Signed-off-by: hanhanW <hanhan0912@gmail.com>
Signed-off-by: Alan Li <me@alanli.org>
Signed-off-by: Benoit Jacob <jacob.benoit.1@gmail.com>
Co-authored-by: hanhanW <hanhan0912@gmail.com>
Co-authored-by: Alan Li <me@alanli.org>
  • Loading branch information
3 people committed Sep 11, 2024
1 parent bb82e78 commit 4395c11
Show file tree
Hide file tree
Showing 19 changed files with 1,371 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -464,9 +464,11 @@ materializeFuncOpEncodings(FunctionOpInterface funcOp,
targetAttr);
MaterializeEncodingConversionTarget target(*funcOp.getContext());
auto materializeEncodingValueFn = getMaterializeEncodingValueFn(targetAttr);
populateMaterializeEncodingIntoPackUnPackPatterns(materializeEncodingPattern,
target, typeConverter,
materializeEncodingValueFn);
populateMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, typeConverter, materializeEncodingValueFn);
populateIREEMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, target, typeConverter,
materializeEncodingValueFn);

if (failed(applyPartialConversion(funcOp, target,
std::move(materializeEncodingPattern)))) {
Expand Down
44 changes: 30 additions & 14 deletions compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,29 @@ MaterializeEncodingTypeConverter::MaterializeEncodingTypeConverter(
if (failed(maybeEncodingInfo)) {
return dropEncoding(type);
}
return cast<RankedTensorType>(tensor::PackOp::inferPackedType(
auto encodingInfo = *maybeEncodingInfo;
auto packedType = cast<RankedTensorType>(tensor::PackOp::inferPackedType(
tensorType, maybeEncodingInfo->innerTileSizes,
maybeEncodingInfo->innerDimsPos, maybeEncodingInfo->outerDimsPerm));

// There is no swizzle, we are already done. Typically the case on CPU.
if (!encodingInfo.swizzle) {
return packedType;
}

// There is a swizzle, we need to handle it. Typically the case on GPU.
auto swizzle = *encodingInfo.swizzle;
SmallVector<int64_t> newShape(
packedType.getShape().drop_back(encodingInfo.innerTileSizes.size()));
SmallVector<int64_t> swizzledTileShape;
for (auto expandedDimShape : swizzle.expandShape) {
for (int64_t d : expandedDimShape) {
swizzledTileShape.push_back(d);
}
}
applyPermutationToVector(swizzledTileShape, swizzle.permutation);
newShape.append(swizzledTileShape);
return RankedTensorType::get(newShape, packedType.getElementType());
});
}

Expand All @@ -143,19 +163,6 @@ MaterializeEncodingConversionTarget::MaterializeEncodingConversionTarget(
});
}

RankedTensorType getOriginalTypeWithEncoding(RankedTensorType type) {
auto encoding = getEncodingAttr(type);
if (!encoding) {
return type;
}
RankedTensorType originalType = type;
if (auto originalTypeAttr = encoding.getOriginalType()) {
originalType = cast<RankedTensorType>(originalTypeAttr.getValue());
}
return RankedTensorType::get(originalType.getShape(),
originalType.getElementType(), encoding);
}

RankedTensorType dropEncoding(RankedTensorType type) {
return RankedTensorType::get(type.getShape(), type.getElementType());
}
Expand Down Expand Up @@ -213,4 +220,13 @@ bool isNarrowNResult(EncodingAttr encoding) {
return narrowN && (!narrowM || narrowM.getInt() > narrowN.getInt());
}

SmallVector<int64_t>
getExpandedTileShape(SmallVector<SmallVector<int64_t>> expandShape) {
SmallVector<int64_t> result;
for (auto expandShapeDim : expandShape) {
result.append(expandShapeDim);
}
return result;
}

} // namespace mlir::iree_compiler
67 changes: 62 additions & 5 deletions compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,47 @@

#include "iree/compiler/Dialect/Encoding/IR/EncodingOps.h"
#include "iree/compiler/Dialect/HAL/IR/HALTypes.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Transforms/DialectConversion.h"

namespace mlir::iree_compiler {

/// Container of information needed to materialize the pack operation.
/// Container of information needed to materialize the layout transformations.
///
/// On CPU, these layout transformations consist of a single `temsor.pack`
/// or `tensor.unpack` op, implementing a tiled layout where each tile is
/// row-major.
///
/// On GPU, there is an additional `swizzle`, which changes the layout inside
/// of the tile. See the comment on the nested Swizzle struct.
struct MaterializeEncodingInfo {
// Metadata for a swizzle, that is, an (expand_shape -> transposition)
// pair of ops performing a change of layout within the tiles. This is used
// on GPU, where the tiles themselves can have an arbitrary layout.
struct Swizzle {
// This vector-of-vectors contains all the information needed to generate
// a `tensor.expand_shape` creating additional internal dimensions into the
// tile. For example, expandShape = [[16], [4, 2]] means that the original
// tile shape [16, 8] gets expanded such that the first dimension 16 is left
// unchanged, and the second dimension 8 gets split into two internal dims
// of size 4 and 2.
SmallVector<SmallVector<int64_t>> expandShape;
// This permutation vector applies to the expanded dimensions and is used
// to generate a `linalg.transpose` changing the layout of the tile. For
// example, permutation[0] dictates which of the expanded dimensions becomes
// the leading dimension of the layout.
SmallVector<int64_t> permutation;
};

// The next 3 fields are used to create a `tensor.pack` or `tensor.unpack` op,
// changing the overall layout between row-major and tiled (where each tile is
// row-major).
SmallVector<int64_t> innerDimsPos;
SmallVector<int64_t> innerTileSizes;
SmallVector<int64_t> outerDimsPerm;
unsigned srcRank = 0;

// The optional swizzle, see the above comment on Swizzle. Only used on GPU.
std::optional<Swizzle> swizzle;
};

using MaterializeEncodingFn = std::function<FailureOr<MaterializeEncodingInfo>(
Expand Down Expand Up @@ -83,9 +114,6 @@ class OpMaterializeEncodingPattern : public OpConversionPattern<OpTy> {
// Utility methods about Encoding.
//===---------------------------------------------------------------------===//

/// Returns the original type that carried by encoding.
RankedTensorType getOriginalTypeWithEncoding(RankedTensorType type);

/// Returns the RankedTensorType without encodings.
RankedTensorType dropEncoding(RankedTensorType type);

Expand All @@ -102,7 +130,32 @@ MaterializeEncodingInfo
getEncodingInfoForMatmul(IREE::Encoding::EncodingAttr encoding, int64_t rank,
TileMxNxK tileMxNxK);

/// Utility method to convert from `set_encoding` op to `pack` operation.
/// For now this takes a `paddingValue` as input. The source is also taken
/// as input so that these could be used with `OpConversionPatterns`.
FailureOr<tensor::PackOp> lowerSetEncodingOpToPackOp(
RewriterBase &rewriter, IREE::Encoding::SetEncodingOp encodingOp,
Value source, const MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);

/// Utility method to convert from `unset_encoding` op to `unpack` operation.
/// The source is taken as input so that these could be used with
/// `OpConversionPatterns`.
FailureOr<tensor::UnPackOp> lowerUnsetEncodingToUnpackOp(
RewriterBase &rewriter, IREE::Encoding::UnsetEncodingOp encodingOp,
Value packedValue, const MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);

/// Pouplates the set of patterns that lowers set_encoding, unset_encoding, and
/// upstream dialect ops with encoding types to pack/unpack ops.
void populateMaterializeEncodingIntoPackUnPackPatterns(
RewritePatternSet &patterns,
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);

/// Pouplates the set of patterns that lowers IREE dialect (e.g., Flow, Hal,
/// etc) ops with encoding types to pack/unpack ops.
void populateIREEMaterializeEncodingIntoPackUnPackPatterns(
RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target,
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);
Expand All @@ -111,6 +164,10 @@ void populateMaterializeEncodingIntoPackUnPackPatterns(
// result of a matvec.
bool isNarrowNResult(IREE::Encoding::EncodingAttr encoding);

// Concatenates the vectors.
SmallVector<int64_t>
getExpandedTileShape(SmallVector<SmallVector<int64_t>> expandShape);

} // namespace mlir::iree_compiler

#endif // IREE_COMPILER_SRC_IREE_COMPILER_CODEGEN_COMMON_ENCODINGUTILS_H_
2 changes: 2 additions & 0 deletions compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ iree_compiler_cc_library(
"GPUGeneralizeNamedOps.cpp",
"GPUInferMemorySpace.cpp",
"GPULowerToUKernels.cpp",
"GPUMaterializeEncoding.cpp",
"GPUMultiBuffering.cpp",
"GPUNestedLayoutDistributionPatterns.cpp",
"GPUPatterns.cpp",
Expand Down Expand Up @@ -98,6 +99,7 @@ iree_compiler_cc_library(
"//compiler/src/iree/compiler/Codegen/Transforms",
"//compiler/src/iree/compiler/Codegen/Utils",
"//compiler/src/iree/compiler/Codegen/Utils:VectorOpUtils",
"//compiler/src/iree/compiler/Dialect/Encoding/IR",
"//compiler/src/iree/compiler/Dialect/HAL/IR",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:AMDGPUDialect",
Expand Down
2 changes: 2 additions & 0 deletions compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ iree_cc_library(
"GPUGeneralizeNamedOps.cpp"
"GPUInferMemorySpace.cpp"
"GPULowerToUKernels.cpp"
"GPUMaterializeEncoding.cpp"
"GPUMultiBuffering.cpp"
"GPUNestedLayoutDistributionPatterns.cpp"
"GPUPatterns.cpp"
Expand Down Expand Up @@ -129,6 +130,7 @@ iree_cc_library(
iree::compiler::Codegen::Transforms
iree::compiler::Codegen::Utils
iree::compiler::Codegen::Utils::VectorOpUtils
iree::compiler::Dialect::Encoding::IR
iree::compiler::Dialect::HAL::IR
PUBLIC
)
Expand Down
Loading

0 comments on commit 4395c11

Please sign in to comment.