diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h index 87aabdc015fea..7aaa4ecc7ee77 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h @@ -9,12 +9,7 @@ #ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H #define MLIR_DIALECT_XEGPU_IR_XEGPU_H -#include "mlir/Bytecode/BytecodeOpInterface.h" -#include "mlir/IR/BuiltinTypes.h" -#include "mlir/IR/Dialect.h" -#include "mlir/Interfaces/ShapedOpInterfaces.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" -#include "mlir/Interfaces/ViewLikeInterface.h" +#include namespace mlir { namespace xegpu { diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index cd38549f1ccf4..bb325c272e332 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -10,7 +10,6 @@ #define MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td" -include "mlir/IR/EnumAttr.td" class XeGPUAttr traits = [], string baseCppClass = "::mlir::Attribute"> @@ -18,64 +17,4 @@ class XeGPUAttr traits = [], let mnemonic = attrMnemonic; } -def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> { - let parameters = (ins - OptionalParameter<"MemoryScopeAttr">: $memory_scope, - OptionalParameter<"IntegerAttr", "1">: $array_length, - OptionalParameter<"BoolAttr", "true">: $boundary_check - ); - - let builders = [ - AttrBuilder<(ins - CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope, - CArg<"int", "1">:$array_length, - CArg<"bool", "true">: $boundary_check - )> - ]; - - let assemblyFormat = "`<` struct(params) `>`"; -} - -//===----------------------------------------------------------------------===// -// XeGPU Memory Scope Enums. -//===----------------------------------------------------------------------===// -def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">; -def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">; -def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope", - "The address space of the memory the tensor descritor is created for", - [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> { - let genSpecializedAttr = 0; - let cppNamespace = "::mlir::xegpu"; -} - -def XeGPU_MemoryScopeAttr: - EnumAttr { - let assemblyFormat = "$value"; -} - -//===----------------------------------------------------------------------===// -// XeGPU Cache Enums. -//===----------------------------------------------------------------------===// -def XeGPU_CachePolicyCached: I32EnumAttrCase<"CACHED", 0, "cached">; // valid for read and write -def XeGPU_CachePolicyUncached: I32EnumAttrCase<"UNCACHED", 1, "uncached">; // valid for read and write -def XeGPU_CachePolicyStreaming: I32EnumAttrCase<"STREAMING", 2, "streaming">; // valid for read only -def XeGPU_CachePolicyInvalid: I32EnumAttrCase<"READ_INVALIDATE", 3, "read_invalidate">; // valid for read only -def XeGPU_CachePolicyWriteBack: I32EnumAttrCase<"WRITE_BACK", 4, "write_back">; // valid for write only -def XeGPU_CachePolicyWriteThrough: I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">; // valid for write only - -def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy", - [XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached, - XeGPU_CachePolicyStreaming, XeGPU_CachePolicyInvalid, - XeGPU_CachePolicyWriteBack, XeGPU_CachePolicyWriteThrough]> { - let genSpecializedAttr = 0; - let cppNamespace = "::mlir::xegpu"; -} - -def XeGPU_CacheHintAttr - : EnumAttr { - let assemblyFormat = "`<` $value `>`"; -} - - - #endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td index c2f09319c790e..3851275ad30a0 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td @@ -23,8 +23,8 @@ def XeGPU_Dialect : Dialect { the lower-level GPU compiler. }]; - let useDefaultTypePrinterParser = true; - let useDefaultAttributePrinterParser = true; + // let useDefaultTypePrinterParser = true; + // let useDefaultAttributePrinterParser = true; } #endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 1f90dcb4bf55a..5825ef9195b03 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -9,13 +9,10 @@ #ifndef MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD #define MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD -include "mlir/IR/AttrTypeBase.td" include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td" include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td" include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td" -include "mlir/Interfaces/ShapedOpInterfaces.td" -include "mlir/Interfaces/SideEffectInterfaces.td" -include "mlir/Interfaces/ViewLikeInterface.td" + // Base class for dialect operations. This operation inherits from the base // `Op` class in OpBase.td, and provides: @@ -23,291 +20,7 @@ include "mlir/Interfaces/ViewLikeInterface.td" // * The mnemonic for the operation, or the name without the dialect prefix. // * A list of traits for the operation. class XeGPU_Op traits = []>: - Op { - - code extraBaseClassDeclaration = [{ - void printProperties(::mlir::MLIRContext *ctx, - ::mlir::OpAsmPrinter &p, const Properties &prop) { - Attribute propAttr = getPropertiesAsAttr(ctx, prop); - if (propAttr) - p << "<" << propAttr << ">"; - } - - static ::mlir::ParseResult parseProperties(::mlir::OpAsmParser &parser, - ::mlir::OperationState &result) { - if (mlir::succeeded(parser.parseLess())) { - if (parser.parseAttribute(result.propertiesAttr) || parser.parseGreater()) - return failure(); - } - return success(); - } - - }]; -} - - -def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface, - AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> { - - let summary = "Create nd-tensor descriptor operation"; - let description = [{ - The "create_nd_tdesc" operation creates a TensorDescType which represents - a sub-view of a 2D memory region (It can be extended to support n-D memory - region if needed in future). Elements in the subview continuous in each - dimention. It encodes the following important information for supporting - Intel hardware features: - - * source: an object representing (starting address/pointer of) a 2D memory region. - It can be either a 2D memref object, or simply a pointer represented by uint64_t type. - for the later case, the shape and layout information of the 2D memory region should - be explicitly passed via `dynamic_shape` and `dynamic_strides` parameters. - * offsets: two index values represents offsets from the "source" at the each dimension - at which the subview of the target memory will be created. It is encoded via two - variables, including "dynamic_offsets" and "static_offsets", such that it can - accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4])). - * shape: the shape information of the memory region pointed by the "source". It is - typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>. - But if "source" is simply a pointer represented as uint64_t type, or a memref - type without shape information e.g., memref, the shape information has - to be explicitly passed via the "dynamic_shape" argument. Currently "dynamic_shape" - only accepts operands(e.g., [%c4096, %c4096]), not attributes(e.g., [4096, 4096]). - * strides: the strides of the memory region pointed by the "source". Similar to shape, - it is typically encoded via the MemRefType of the source too. But if "source" is - simply a pointer represented as uint64_t type, or a memref type without shape - information e.g., memref, the strides information has to be explicitly - passed via the "dynamic_strides" argument. And it currently only accepts operands two. - - Example 1 (suppose the tensor shape inferred by the compiler is 8x16): - %0 = memref.alloc() : memref<1024x1024xf32> - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %1 = xegpu.create_nd_tdesc %0[%c0, %c0]: memref<1024x1024xf32> -> TensorDesc<8x16xf32> - - Example 2 (suppose the tensor shape inferred by the compiler is 8x16): - %0 = memref.alloc(%h, %w) : memref - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: memref -> TensorDesc<8x16xf32> - - Example 3 (suppose the tensor shape inferred by the compiler is 8x16): - %0 = ... : ui64 - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: ui64 -> TensorDesc<8x16xf32> - }]; - - let arguments = (ins - XeGPU_BaseAddrType: $source, - Variadic: $offsets, - Variadic: $shape, - Variadic: $strides, - DenseI64ArrayAttr: $static_offsets - ); - let results = (outs XeGPU_TensorDesc: $TensorDesc); - - let assemblyFormat = [{ - $source `` - custom($offsets, $static_offsets) - (`,` `[` $shape^ `]` `,` `[` $strides `]`)? - attr-dict `:` type($source) `->` qualified(type($TensorDesc)) - }]; - - let hasVerifier = 1; - - let builders = [ - OpBuilder<(ins "Type": $tdesc, "TypedValue": $source, - "llvm::ArrayRef": $offsets)>, - - OpBuilder<(ins "Type": $tdesc, "TypedValue ": $source, - "llvm::ArrayRef": $offsets, - "ValueRange": $shape, "ValueRange": $stride)> - ]; - - let extraClassDeclaration = extraBaseClassDeclaration # [{ - /// Returns the type of the source memref operand. - Type getSourceType() { - return getSource().getType(); - } - - /// Returns the type of the result TensorDesc. - xegpu::TensorDescType getType() { - return getTensorDesc().getType(); - } - - /// Return the element type of the TensorDesc - Type getElementType() { - return getType().getElementType(); - } - - /// Return the shape of the TensorDesc - llvm::ArrayRef getTensorDescShape() { - return getType().getShape(); - } - - /// wrapper for matching with OffsetSizeAndStrideOpInterface - OperandRange getSizes() { - return getShape(); - } - - /// wrapper for matching with OffsetSizeAndStrideOpInterface - /// If source is IntegerType and `shape` is filled, it will - /// return an array of ShapedType::kDynamic representing dynamic - /// shape encoded in the `shape` argument will be used. Presence - /// of `shape` overides static shape from source memref type. - SmallVector getStaticSizes() { - if (getSourceType().isa() || getShape().size()) { - auto dims = getMixedOffsets().size(); - return SmallVector(dims, ShapedType::kDynamic); - } - auto memrefType = getSourceType().dyn_cast(); - return SmallVector(memrefType.getShape()); - } - - /// wrapper for matching with OffsetSizeAndStrideOpInterface - /// If source is IntegerType or `strides` is filled, it will - /// return an array of ShapedType::kDynamic representing dynamic - /// strides encoded in the `strides` argument will be used. Presence - /// of `strides` overides static strides from source memref type. - SmallVector getStaticStrides() { - if (getSourceType().isa() || getStrides().size()) { - auto dims = getMixedOffsets().size(); - return SmallVector(dims, ShapedType::kDynamic); - } - auto memrefType = getSourceType().dyn_cast(); - auto [strides, offset] = getStridesAndOffset(memrefType); - return strides; - } - - /// Return the expected rank of each of the`static_offsets`, - /// `static_shape` and `static_strides` attributes. - std::array getArrayAttrMaxRanks() { - unsigned rank; - if (auto ty = getSourceType().dyn_cast()) { - rank = ty.getRank(); - } else { - rank = (unsigned)getMixedOffsets().size(); - } - return {rank, rank, rank}; - } - - /// Return the number of leading operands before the `offsets`, - /// `shape` and `strides` operands. - static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; } - - mlir::Value getViewSource() { return getSource(); } - }]; -} - -def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> { - let summary = "prefetches a nD block to cache"; - let description = [{ - It issues an instruction to prefetch the data from memory to each - level of the cache based on their cache policy. - - Example: - ``` - xegpu.prefetch_nd %tdesc {l1_hint = #xegpu.cache_hint, - l2_hint = #xegpu.cache_hint, - l3_hint = #xegpu.cache_hint} - : !xegpu.tensor_desc<8x16xf16> - ``` - - }]; - - let arguments = (ins XeGPU_TensorDesc: $TensorDesc, - OptionalAttr: $l1_hint, - OptionalAttr: $l2_hint, - OptionalAttr: $l3_hint); - - let extraClassDeclaration = extraBaseClassDeclaration; - - let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc))"; -} - - -def XeGPU_LoadNdOp : XeGPU_Op<"load_nd"> { - let summary = "loads a n-D block from memory (represented by TensorDesc)" - "to registers (represented by vector)"; - let description = [{ - LoadNdOp essentially mimics the hardware block read instruction to read - a block of data from memory to register. It takes a set of optional cache - hints for each level of cache, L1, L2 and L3. If hardware does not have a - correspoding cache, Corresponding cache hint attribute will be masked. - vnni transform is an hardware feature for Intel GPU, which is used to - do data packing during the load for B operand of matrix operation, if - the bit width of the data type is less then 32 bits, e.g., fp16. And - transpose is another Intel hardware feature, which will do transpose - operation when loading the data if the bit width of the data type is - fp32 or fp64. It implies that vnni and transpose cannot exit at the - same time. - - Example: - ``` - xegpu.load_nd %1 {transpose = [1, 0], - l1_hint = #xegpu.cache_hint, - l2_hint = #xegpu.cache_hint, - l3_hint = #xegpu.cache_hint} - : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32> - ``` - - - }]; - - let arguments = (ins XeGPU_TensorDesc: $TensorDesc, - OptionalAttr: $vnni_axis, - OptionalAttr: $transpose, - OptionalAttr: $l1_hint, - OptionalAttr: $l2_hint, - OptionalAttr: $l3_hint); - - let results = (outs XeGPU_ValueType: $value); - - let extraClassDeclaration = extraBaseClassDeclaration # [{ - VectorType getType() { - return llvm::dyn_cast(getValue().getType()); - } - - xegpu::TensorDescType getTensorDescType() { - return getTensorDesc().getType(); - } - }]; - - let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)"; - let hasVerifier = 1; -} - -def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", []> { - let summary = "stores a n-D block register region back to memory, currently only supports 2D"; - - let description = [{ - StoreNdOp essentially mimics the hardware block write instruction io - write a block of data from register into the memory region as described - by the TensorDesc. It takes a set of optional cache hints for each level - of cache, L1, L2 and L3. If hardware does not have a correspoding cache, - Corresponding cache hint attribute will be masked. - - Example: - ``` - xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint, - l2_hint = #xegpu.cache_hint, - l3_hint = #xegpu.cache_hint} - : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16> - ``` - - - }]; - - let arguments = (ins XeGPU_ValueType: $value, - XeGPU_TensorDesc: $TensorDesc, - OptionalAttr: $l1_hint, - OptionalAttr: $l2_hint, - OptionalAttr: $l3_hint); - - let extraClassDeclaration = extraBaseClassDeclaration; + Op; - let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict - `:` type($value) `,` qualified(type($TensorDesc))}]; - let hasVerifier = 1; -} #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index 19ac1693712dd..1d75bb4e2906f 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -9,9 +9,9 @@ #ifndef MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD #define MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD +include "mlir/IR/BuiltinTypes.td" include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td" include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td" -include "mlir/IR/BuiltinTypes.td" def XeGPU_IntType: AnyTypeOf<[I1, I8, I16, I32, I64, SI1, SI8, SI16, SI32, SI64, UI1, UI8, UI16, UI32, UI64]>; def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>; @@ -30,106 +30,4 @@ class XeGPUTypeDef traits = [], let mnemonic = typeMnemonic; } -def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", - [ShapedTypeInterface], "::mlir::TensorType"> { - let summary = "TensorDesc describing regions of interested data."; - let description = [{ - TensorDesc is a type designed to describe regions of the interested data as well as some - features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, - it essentially only contains the meta data, and doesn't hold the data by itself. It is designed - to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. - It encodes the following information: - - * shape: the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows - and each row contains 16 contiguous data element. The rows could be - either contiguous or not, depends on whether the encoding attribute - is set or not. - * element_type: the data type of the data element, e.g., f16, f32. - - Similar to the builtin tensor, it also provides an optinal attribute to encoding - the following information via the TensorDescAttr object: - * memory_scope (xegpu::MemoryScope): [optional] where the data is located, - global memory or shared memory. It is default to Global. - * array_length (int): [optional] The number of contiguous blocks with size as `shape`, - that will be loaded by block load at a time. It is default to 1. - * boundary_check (bool): [optional] indicates whether the operation detects the boundary - and pads with zero for out-of-boundary access. It is default to do boundary check. - - - Syntax: - - ``` - TensorDesc-type ::= `tensor_desc` `<` dim-list element-type (attr-list)? `>` - element-type ::= float-type | integer-type | index-type - dim-list := (static-dim-list `x`)? - static-dim-list ::= decimal-literal `x` decimal-literal - attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)? - ``` - - Examples: - - ```mlir - // A block TensorDesc with 8x16 i32 elements - xegpu.tensor_desc<8x16xi32> - - // A block TensorDesc with 8x16 f32 elements - xegpu.tensor_desc<8x16xf32> - - // A TensorDesc with 8x16 f32 elements for a memory region in shared memory space. - xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr> - ``` - }]; - - let parameters = (ins ArrayRefParameter<"int64_t">: $shape, - "mlir::Type": $elementType, - OptionalParameter<"mlir::Attribute">: $encoding); - - let extraClassDeclaration = [{ - using TensorType::clone; - using mlir::ShapedType::Trait::getElementTypeBitWidth; - using mlir::ShapedType::Trait::getRank; - using mlir::ShapedType::Trait::getNumElements; - using mlir::ShapedType::Trait::isDynamicDim; - using mlir::ShapedType::Trait::hasStaticShape; - using mlir::ShapedType::Trait::getNumDynamicDims; - using mlir::ShapedType::Trait::getDimSize; - using mlir::ShapedType::Trait::getDynamicDimIndex; - - TensorDescType clone(::mlir::Type elementType) { - return llvm::cast(cloneWith(getShape(), elementType)); - } - - TensorDescAttr getEncodingAsTensorDescAttr() const { - return llvm::dyn_cast_if_present(getEncoding()); - } - - xegpu::MemoryScope getMemoryScope() const { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getMemoryScope()) - return attr.getMemoryScope().getValue(); - // return default value - return MemoryScope::Global; - } - - int getArrayLength() { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getArrayLength()) - return attr.getArrayLength().getInt(); - // return default value - return 1; - } - - bool getBoundaryCheck() { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getBoundaryCheck()) - return attr.getBoundaryCheck().getValue(); - // return default value - return true; - } - }]; - - let hasCustomAssemblyFormat = true; - -} - #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 0b3f4b9c9dbea..4f839ee773476 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -6,10 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/XeGPU/IR/XeGPU.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/DialectImplementation.h" -#include "llvm/ADT/TypeSwitch.h" +#include namespace mlir { namespace xegpu { @@ -29,72 +26,8 @@ void XeGPUDialect::initialize() { >(); } -//===----------------------------------------------------------------------===// -// XeGPU_TensorDescAttr -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// XeGPU_TensorDescType -//===----------------------------------------------------------------------===// -mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) { - llvm::SmallVector shape; - mlir::Type elementType; - mlir::FailureOr encoding; - - // Parse literal '<' - if (parser.parseLess()) - return {}; - - auto shapeLoc = parser.getCurrentLocation(); - if (mlir::failed(parser.parseDimensionList(shape))) { - parser.emitError(shapeLoc, "failed to parse parameter 'shape'"); - return {}; - } - - auto elemTypeLoc = parser.getCurrentLocation(); - if (mlir::failed(parser.parseType(elementType))) { - parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'"); - return {}; - } - - // parse optional attributes - if (mlir::succeeded(parser.parseOptionalComma())) { - encoding = mlir::FieldParser::parse(parser); - if (mlir::failed(encoding)) { - parser.emitError( - parser.getCurrentLocation(), - "Failed to parse the attribute field for TensorDescType.\n"); - return {}; - } - } - - // Parse literal '>' - if (parser.parseGreater()) - return {}; - - return TensorDescType::get(parser.getContext(), shape, elementType, - encoding.value_or(mlir::Attribute())); -} - -void TensorDescType::print(::mlir::AsmPrinter &printer) const { - printer << "<"; - - auto shape = getShape(); - for (int64_t dim : shape) { - if (mlir::ShapedType::isDynamic(dim)) - printer << '?'; - else - printer << dim; - printer << 'x'; - } - - printer << getElementType(); - - if (auto encoding = getEncoding()) - printer << ", " << encoding; - - printer << ">"; -} +// this file is for position occupation, +// we will add functions in following PRs. } // namespace xegpu } // namespace mlir diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index e6ecf26c22481..b356c397fb836 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -6,186 +6,15 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/Utils/StaticValueUtils.h" -#include "mlir/Dialect/XeGPU/IR/XeGPU.h" -#include "mlir/IR/Builders.h" +#include #define DEBUG_TYPE "xegpu" namespace mlir { namespace xegpu { -static void transpose(llvm::ArrayRef trans, - std::vector &shape) { - std::vector old = shape; - for (size_t i = 0; i < trans.size(); i++) - shape[i] = old[trans[i]]; -} - -template -static std::string makeString(T array, bool breakline = false) { - std::string buf; - buf.clear(); - llvm::raw_string_ostream os(buf); - os << "["; - for (size_t i = 1; i < array.size(); i++) { - os << array[i - 1] << ", "; - if (breakline) - os << "\n\t\t"; - } - os << array.back() << "]"; - os.flush(); - return buf; -} - -//===----------------------------------------------------------------------===// -// XeGPU_CreateNdDescOp -//===----------------------------------------------------------------------===// -void CreateNdDescOp::build(OpBuilder &builder, OperationState &state, - Type tdesc, TypedValue source, - llvm::ArrayRef offsets) { - auto ty = source.getType(); - (void)ty; - assert(ty && ty.hasStaticShape() && offsets.size() == (size_t)ty.getRank()); - - llvm::SmallVector staticOffsets; - llvm::SmallVector dynamicOffsets; - dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); - - build(builder, state, tdesc, source, dynamicOffsets /* dynamic offsets */, - ValueRange({}) /* empty dynamic shape */, - ValueRange({}) /* empty dynamic strides */, - staticOffsets /* static offsets */); -} - -void CreateNdDescOp::build(OpBuilder &builder, OperationState &state, - Type tdesc, TypedValue source, - llvm::ArrayRef offsets, - ValueRange shape, ValueRange stride) { - assert(shape.size() && offsets.size() && stride.size() && - shape.size() == stride.size() && shape.size() == offsets.size()); - - llvm::SmallVector staticOffsets; - llvm::SmallVector dynamicOffsets; - - dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); - - build(builder, state, tdesc, source, /* dynamic_offsets = */ dynamicOffsets, - /* dynamic shape = */ shape, /* dynamic strides = */ stride, - /* static offsets = */ staticOffsets); -} - -LogicalResult CreateNdDescOp::verify() { - auto rank = (int64_t)getMixedOffsets().size(); - bool invalidRank = (rank != 2); - bool invalidElemTy = false; - - // check source type matches the rank if it is a memref. - // It also should have the same ElementType as TensorDesc. - auto memrefTy = getSourceType().dyn_cast(); - if (memrefTy) { - invalidRank |= (memrefTy.getRank() != rank); - invalidElemTy |= memrefTy.getElementType() != getElementType(); - } - - // check result type matches the rank - invalidRank = (getType().getRank() != rank); - - // mismatches among shape, strides, and offsets are - // already handeled by OffsetSizeAndStrideOpInterface. - // So they are not check here. - if (invalidRank) - return emitOpError( - "Expecting the rank of shape, strides, offsets, " - "source memref type (if source is a memref) and TensorDesc " - "should match with each other. They currenlty are 2D."); - - if (invalidElemTy) - return emitOpError("TensorDesc should have the same element " - "type with the source if it is a memref.\n"); - - return success(); -} - -//===----------------------------------------------------------------------===// -// XeGPU_LoadNdOp -//===----------------------------------------------------------------------===// -LogicalResult LoadNdOp::verify() { - auto tdescTy = getTensorDescType(); - auto valueTy = getType(); - - if (tdescTy.getRank() != 2) - return emitOpError( - "The TensorDesc for LoadNdOp should be a 2D TensorDesc."); - - if (!valueTy) - return emitOpError("Invalid result, it should be a VectorType.\n"); - - auto tdescElemTy = tdescTy.getElementType(); - auto valueElemTy = valueTy.getElementType(); - - if (tdescElemTy != valueElemTy) - return emitOpError( - "Value should have the same element type as TensorDesc."); - - auto array_len = tdescTy.getArrayLength(); - auto tdescShape = tdescTy.getShape().vec(); - auto valueShape = valueTy.getShape().vec(); - - if (getTranspose()) { - auto trans = getTranspose().value(); - if (tdescShape.size() >= trans.size()) - transpose(trans, tdescShape); - else - emitWarning("Invalid transpose attr. It is ignored."); - } - - if (getVnniAxis()) { - auto axis = getVnniAxis().value(); - auto vnni_factor = valueShape.back(); - tdescShape[axis] /= vnni_factor; - tdescShape.push_back(vnni_factor); - } - - if (array_len > 1) { - auto it = tdescShape.begin(); - tdescShape.insert(it, array_len); - } - - if (tdescShape != valueShape) - return emitOpError() << "Result shape doesn't match TensorDesc shape." - << "The expected shape is " << makeString(tdescShape) - << ". But the given shape is " - << makeString(valueShape) << ".\n"; - return success(); -} - -//===----------------------------------------------------------------------===// -// XeGPU_StoreNdOp -//===----------------------------------------------------------------------===// -LogicalResult StoreNdOp::verify() { - auto dstTy = getTensorDesc().getType(); // Tile - auto valTy = getValue().getType().cast(); // Vector - - if (dstTy.getRank() != 2) - return emitOpError("Expecting a 2D TensorDesc shape.\n"); - - if (!valTy) - return emitOpError("Exepcting a VectorType result.\n"); - - auto dstElemTy = dstTy.getElementType(); - auto valElemTy = valTy.getElementType(); - - if (dstElemTy != valElemTy) { - return emitOpError() << "The element type of the value should " - "match the elementtype of the TensorDesc.\n"; - } - - if (dstTy.getShape() != valTy.getShape()) - return emitOpError() - << "The result shape should match the TensorDesc shape.\n"; - return success(); -} +// this file is for position occupation, +// we will add functions in following PRs. } // namespace xegpu } // namespace mlir diff --git a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir deleted file mode 100644 index 039346adbb851..0000000000000 --- a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir +++ /dev/null @@ -1,62 +0,0 @@ -// RUN: mlir-opt %s | FileCheck %s -// Verify the printed output can be parsed. -// RUN: mlir-opt %s | mlir-opt | FileCheck %s -// Verify the generic form can be parsed. -// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s - -// CHECK-LABEL: gpu.module @test { -gpu.module @test { -// CHECK: gpu.func @test_create_nd_tdesc_vc_1(%[[arg0:.*]]: memref<24x32xf32>) { -gpu.func @test_create_nd_tdesc_vc_1(%src: memref<24x32xf32>) { - // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> - %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> - gpu.return -} - -// CHECK: gpu.func @test_create_nd_tdesc_vc_2(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index) { -gpu.func @test_create_nd_tdesc_vc_2(%src: ui64, %w : index, %h : index, %x : index, %y : index) { - //CHECK: %[[C:.*]] = arith.constant 1 : index - %c1 = arith.constant 1 : index - // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][%[[arg3]], %[[arg4]]], [%[[arg2]], %[[arg1]]], [%[[arg1]], %[[C]]] : ui64 -> !xegpu.tensor_desc<8x16xf32> - %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32> - gpu.return -} - -// CHECK: gpu.func @test_create_nd_tdesc_vc_3(%[[arg0:.*]]: memref<24x32xf32>) { -gpu.func @test_create_nd_tdesc_vc_3(%src: memref<24x32xf32>) { - // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr - %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr> - gpu.return -} - -// CHECK: gpu.func @test_prefetch_nd_vc(%[[arg0:.*]]: memref<24x32xf16>) { -gpu.func @test_prefetch_nd_vc(%src: memref<24x32xf16>) { - // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> - %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> - // CHECK: xegpu.prefetch_nd %[[R0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> : !xegpu.tensor_desc<8x16xf16> - xegpu.prefetch_nd %1 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<8x16xf16> - gpu.return -} - -// CHECK: func @test_load_nd_vc(%[[arg0:.*]]: memref<8x16xf16>) { -gpu.func @test_load_nd_vc(%src: memref<8x16xf16>) { - // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> - %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> - // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, vnni_axis = 0 : i64}> : !xegpu.tensor_desc<8x16xf16> -> vector<4x16x2xf16> - %2 = xegpu.load_nd %1 <{vnni_axis = 0, l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> - : !xegpu.tensor_desc<8x16xf16> -> vector<4x16x2xf16> - gpu.return -} - -// CHECK: func @test_store_nd_vc(%[[arg0:.*]]: memref<24x32xf16>) { -gpu.func @test_store_nd_vc(%dst: memref<24x32xf16>) { - // CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<24x32xf16> - %1 = arith.constant dense<1.0>: vector<24x32xf16> - // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16> - %2 = xegpu.create_nd_tdesc %dst[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16> - // CHECK: xegpu.store_nd %[[C]], %[[R0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> : vector<24x32xf16>, !xegpu.tensor_desc<24x32xf16> - xegpu.store_nd %1, %2 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: vector<24x32xf16>, !xegpu.tensor_desc<24x32xf16> - gpu.return -} - -} \ No newline at end of file