From 6c0ed04603e49ba5d54f2b71b01d2a26d1b6be36 Mon Sep 17 00:00:00 2001 From: Petr Kurapov Date: Fri, 25 Oct 2024 06:31:40 -0700 Subject: [PATCH] [GPU] Add XeVM block load/store ops definitions --- include/gc/Dialect/LLVMIR/XeVMDialect.h | 2 + include/gc/Dialect/LLVMIR/XeVMOps.td | 137 +++++++++++++++++++++++ lib/gc/Dialect/LLVMIR/IR/XeVMDialect.cpp | 5 + 3 files changed, 144 insertions(+) diff --git a/include/gc/Dialect/LLVMIR/XeVMDialect.h b/include/gc/Dialect/LLVMIR/XeVMDialect.h index 43d5ca74..80f68248 100644 --- a/include/gc/Dialect/LLVMIR/XeVMDialect.h +++ b/include/gc/Dialect/LLVMIR/XeVMDialect.h @@ -14,6 +14,8 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" +#include "gc/Dialect/LLVMIR/XeVMOpsEnums.h.inc" + #define GET_ATTRDEF_CLASSES #include "gc/Dialect/LLVMIR/XeVMOpsAttributes.h.inc" diff --git a/include/gc/Dialect/LLVMIR/XeVMOps.td b/include/gc/Dialect/LLVMIR/XeVMOps.td index e8210bc4..b460a5d8 100644 --- a/include/gc/Dialect/LLVMIR/XeVMOps.td +++ b/include/gc/Dialect/LLVMIR/XeVMOps.td @@ -12,6 +12,9 @@ include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/IR/OpBase.td" +include "mlir/IR/EnumAttr.td" + def XeVM_Dialect : Dialect { let name = "xevm"; let cppNamespace = "::mlir::xevm"; @@ -26,4 +29,138 @@ class XeVM_Attr traits = []> def XeVM_TargettAttr : XeVM_Attr<"XeVMTarget", "target"> {} +class XeVM_Op traits = []> : + Op; + +def XeVM_ElemType : AnyTypeOf<[AnyI8, AnyI16, AnyI32, F32, F16, BF16]>; + +class XeVM_LoadCacheControl : I32EnumAttr, + I32EnumAttrCase<"UC", 1, !strconcat(cacheMnemonic, "UC")>, // uncached + I32EnumAttrCase<"C", 2, !strconcat(cacheMnemonic, "C")>, // cached + I32EnumAttrCase<"S", 3, !strconcat(cacheMnemonic, "S")>, // streaming + I32EnumAttrCase<"IAR", 4, !strconcat(cacheMnemonic, "IAR")>, // invalidate-after-read + ]> { + let cppNamespace = "::mlir::xevm"; +} + +def XeVM_L1LoadCacheControl : XeVM_LoadCacheControl<"L1">; +def XeVM_L3LoadCacheControl : XeVM_LoadCacheControl<"L3">; + +class XeVM_StoreCacheControl : I32EnumAttr, + I32EnumAttrCase<"UC", 1, !strconcat(cacheMnemonic, "UC")>, // uncached + I32EnumAttrCase<"WT", 2, !strconcat(cacheMnemonic, "WT")>, // write-through + I32EnumAttrCase<"S", 3, !strconcat(cacheMnemonic, "S")>, // streaming + I32EnumAttrCase<"WB", 4, !strconcat(cacheMnemonic, "WB")>, // write back + ]> { + let cppNamespace = "::mlir::xevm"; +} + +def XeVM_L1StoreCacheControl : XeVM_StoreCacheControl<"L1">; +def XeVM_L3StoreCacheControl : XeVM_StoreCacheControl<"L3">; + +def XeVM_BlockLoad2dOp : XeVM_Op<"blockload2d">, + Results<(outs FixedVectorOf<[XeVM_ElemType]>:$res)>, + Arguments<(ins + Arg:$ptr, + I32:$base_width, + I32:$base_height, + I32:$base_pitch, + I32:$x, + I32:$y, + I32Attr:$elem_size_in_bits, + I32Attr:$tile_width, + I32Attr:$tile_height, + I32Attr:$v_blocks, + I1Attr:$transpose, + I1Attr:$vnni_transform, + DefaultValuedAttr:$l1_cache_control, + DefaultValuedAttr:$l3_cache_control + )> { + + let summary = "2D block load"; + + let description = [{ + The `xevm.blockload2d` operation loads a two dimensional matrix tile + from a larger matrix residing in memory. The parameters are: + $ptr - the base address of the matrix containing the tile to load + $base_width, $base_height, $base_pitch - the shape of matrix + $x, $y, $tile_width, $tile_height - the starting offsets and shape of the tile to load + $elem_size_in_bits - the size in bits of the matrix element + - 32 for f32, bf32 + - 16 for f16, int16, bf16 + - 8 for int8, int4, int2 + $v_blocks - number of tiles to load + $transpose - transpose the tile in registers (useful for 32 bit element type) + $vnni_transform - transpose and pack the submatrix in registers (useful for < 32 bit element types) + $cache_control - an enumerator that sets the L1 and L3 cache behaviour + + Notes: + - the $transpose and $vnni_transform parameters are mutual exclusive + - transposing the tile loaded is typically used for the B matrix operand + (D = C + A * B), where A has row-major layout in registers and B should have column-major layout. + - if the tile loaded contains out of bound elements of the matrix, they are filled with 0. + - coordinate is provided in elements, while width and pitch are provided in bytes. + }]; + + let assemblyFormat = [{ + operands ` ` `{` `elem_size_in_bits` `=` $elem_size_in_bits `,` `tile_width` `=` $tile_width `,` + `tile_height` `=` $tile_height `,` `v_blocks` `=` $v_blocks `,` `transpose` `=` $transpose `,` + `vnni_transform` `=` $vnni_transform `,` `l1_cache_control` `=` $l1_cache_control `,` + `l3_cache_control` `=` $l3_cache_control `}` attr-dict `:` functional-type(operands, results) + }]; + + let hasVerifier = 1; +} + +def XeVM_BlockStore2dOp : XeVM_Op<"blockstore2d">, + Arguments<(ins + Arg:$ptr, + I32:$base_width, + I32:$base_height, + I32:$base_pitch, + I32:$x, + I32:$y, + I32Attr:$elem_size_in_bits, + I32Attr:$tile_width, + I32Attr:$tile_height, + I32Attr:$v_blocks, + FixedVectorOf<[XeVM_ElemType]>:$stored_val, + DefaultValuedAttr:$l1_cache_control, + DefaultValuedAttr:$l3_cache_control + )> { + + let summary = "2D block store"; + + let description = [{ + The `xevm.blockstore2d` operation stores a two dimensional tile into a + larger matrix residing in memory. The parameters are: + $ptr - the base address of the matrix where to store the tile + $base_width, $base_height, $base_pitch - the shape of the matrix + $x, $y, $tile_width, $tile_height - the starting offsets and shape of the tile to store + $elem_size_in_bits - the size in bits of the matrix element + - 32 for f32, bf32 + - 16 for f16, int16, bf16 + - 8 for int8, int4, int2 + $v_blocks - number of tiles to store + $cache_control - an enumerator that sets the L1 and L3 cache behaviour + $stored_val - the tile to store + + Notes: + - coordinate is provided in elements, while width and pitch are provided in bytes. + }]; + + let assemblyFormat = [{ + operands ` ` `{` `elem_size_in_bits` `=` $elem_size_in_bits `,` `tile_width` `=` $tile_width `,` + `tile_height` `=` $tile_height `,` `v_blocks` `=` $v_blocks `,` `l1_cache_control` `=` $l1_cache_control `,` + `l3_cache_control` `=` $l3_cache_control `}` + attr-dict `:` `(` type(operands) `)` + }]; + + let hasVerifier = 1; +} + #endif // XEVMIR_OPS diff --git a/lib/gc/Dialect/LLVMIR/IR/XeVMDialect.cpp b/lib/gc/Dialect/LLVMIR/IR/XeVMDialect.cpp index c330dae9..9e0c2506 100644 --- a/lib/gc/Dialect/LLVMIR/IR/XeVMDialect.cpp +++ b/lib/gc/Dialect/LLVMIR/IR/XeVMDialect.cpp @@ -16,6 +16,11 @@ using namespace mlir; using namespace xevm; #include "gc/Dialect/LLVMIR/XeVMOpsDialect.cpp.inc" +#include "gc/Dialect/LLVMIR/XeVMOpsEnums.cpp.inc" + +// TODO +LogicalResult BlockLoad2dOp::verify() { return success(); } +LogicalResult BlockStore2dOp::verify() { return success(); } void XeVMDialect::initialize() { // NOLINTBEGIN