From 492acaa8b42520d5b3c839adf4d37ff0398f0be6 Mon Sep 17 00:00:00 2001 From: dchigarev Date: Mon, 28 Oct 2024 14:54:16 +0000 Subject: [PATCH 1/3] Update IMEX/LLVM versions Signed-off-by: dchigarev --- cmake/imex-version.txt | 2 +- cmake/imex.cmake | 2 +- cmake/llvm-version-imex.txt | 2 +- cmake/llvm-version.txt | 2 +- lib/gc/Transforms/GPU/LinalgToXeGPU.cpp | 9 +++++---- lib/gc/Transforms/Pipeline.cpp | 4 ++-- .../GPU/linalg-to-xegpu-dpas-transpose-sep-alloc.mlir | 2 +- .../GPU/linalg-to-xegpu-dpas-transpose-sep.mlir | 2 +- .../Transforms/GPU/linalg-to-xegpu-dpas-transpose.mlir | 2 +- .../test/gc/Transforms/GPU/linalg-to-xegpu-dpas.mlir | 2 +- test/mlir/test/gc/Transforms/bf16Legalization.mlir | 2 +- 11 files changed, 16 insertions(+), 15 deletions(-) diff --git a/cmake/imex-version.txt b/cmake/imex-version.txt index d5adf32e0..c2f453c02 100644 --- a/cmake/imex-version.txt +++ b/cmake/imex-version.txt @@ -1 +1 @@ -25123cc3692fdbcf837510f39de47ff353d482fc \ No newline at end of file +0a6d2901990183ecdbab9240dbd8be92036d9c20 \ No newline at end of file diff --git a/cmake/imex.cmake b/cmake/imex.cmake index 41d1ded68..638feb24f 100644 --- a/cmake/imex.cmake +++ b/cmake/imex.cmake @@ -14,7 +14,7 @@ if (NOT DEFINED IMEX_INCLUDES) # TODO: Change to main https://github.com/intel/mlir-extensions when all the # required functionality is merged. - set(IMEX_URL https://github.com/intel/mlir-extensions) + set(IMEX_URL https://github.com/dchigarev/mlir-extensions) gc_fetch_content(imex "${IMEX_HASH}" "${IMEX_URL}" SET IMEX_CHECK_LLVM_VERSION=ON IMEX_ENABLE_L0_RUNTIME=0 ) diff --git a/cmake/llvm-version-imex.txt b/cmake/llvm-version-imex.txt index f12cc87a1..33000613b 100644 --- a/cmake/llvm-version-imex.txt +++ b/cmake/llvm-version-imex.txt @@ -1 +1 @@ -3191587666aa3d1e53966bc8876614c7197fac4f +add6b2f35f2bcf1f59a2ab2d5b3dab124fe0895a diff --git a/cmake/llvm-version.txt b/cmake/llvm-version.txt index 196da69d4..33000613b 100644 --- a/cmake/llvm-version.txt +++ b/cmake/llvm-version.txt @@ -1 +1 @@ -f6a756f35a4d0719a96b4e214905369d565d87da +add6b2f35f2bcf1f59a2ab2d5b3dab124fe0895a diff --git a/lib/gc/Transforms/GPU/LinalgToXeGPU.cpp b/lib/gc/Transforms/GPU/LinalgToXeGPU.cpp index bdbccd2da..2ff65325d 100644 --- a/lib/gc/Transforms/GPU/LinalgToXeGPU.cpp +++ b/lib/gc/Transforms/GPU/LinalgToXeGPU.cpp @@ -764,7 +764,9 @@ loadNdDescTiles(PatternRewriter &rewriter, Location loc, ValueRange loadTiles, if (vnniConf) { vecLoadType = getVnniVector(tileType.getShape(), tileType.getElementType(), *vnniConf); - packedAttr = mlir::UnitAttr::get(rewriter.getContext()); + if (!transpose_bit) { + packedAttr = mlir::UnitAttr::get(rewriter.getContext()); + } } SmallVector loadVec; for (auto tile : loadTiles) { @@ -1165,7 +1167,6 @@ static LogicalResult createDPASKernel(linalg::LinalgOp linalgOp, if (vnniFactor == -1) return failure(); - VnniConfig vnniConfA{.vnniFactor = vnniFactor, .vnniAxis = 1}; VnniConfig vnniConfB{.vnniFactor = vnniFactor, .vnniAxis = 0}; // Load A sub-tiles. @@ -1214,7 +1215,7 @@ static LogicalResult createDPASKernel(linalg::LinalgOp linalgOp, // Extract DPAS tiles from loaded sub-tiles. TilesArray dpasVecA = extractVecSubTiles(rewriter, loc, loadVecA, {dimM, kTile}, tileTypeA.getShape(), - {dpasTileM, dpasTileK}, vnniConfA); + {dpasTileM, dpasTileK}); TilesArray dpasVecB = extractVecSubTiles(rewriter, loc, loadVecB, {kTile, dimN}, tileTypeB.getShape(), {dpasTileK, dpasTileN}, vnniConfB); @@ -1629,7 +1630,7 @@ struct LinalgToXeGPU : public gc::impl::LinalgToXeGPUBase { using LinalgToXeGPUBase::LinalgToXeGPUBase; void runOnOperation() override { - LinalgToXeGPUOptions options{kTile, stages, dpasTile}; + LinalgToXeGPUOptions options{kTile, stages, SmallVector{dpasTile.begin(), dpasTile.end()}}; // Run GEMM pattern first to allow fusion with its consumers. RewritePatternSet gemmPatterns(&getContext()); diff --git a/lib/gc/Transforms/Pipeline.cpp b/lib/gc/Transforms/Pipeline.cpp index 6fdc445cf..5fb830e31 100644 --- a/lib/gc/Transforms/Pipeline.cpp +++ b/lib/gc/Transforms/Pipeline.cpp @@ -78,10 +78,10 @@ void populateTensorPasses(mlir::OpPassManager &pm) { // scf + arith + math + vector + tensor + linalg.brgemm void populateVectorPasses(mlir::OpPassManager &pm) { // Do promotion for math / arith ops - pm.addNestedPass(math::createMathLegalizeToF32()); + pm.addNestedPass(math::createMathExtendToSupportedTypes()); // sourceTypeStrs can be extended arith::ArithEmulateUnsupportedFloatsOptions options; - std::array typeStr = {"bf16"}; + SmallVector typeStr = {"bf16"}; options.sourceTypeStrs = typeStr; options.targetTypeStr = "f32"; pm.addNestedPass( diff --git a/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose-sep-alloc.mlir b/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose-sep-alloc.mlir index 81726450f..6e08efe46 100644 --- a/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose-sep-alloc.mlir +++ b/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose-sep-alloc.mlir @@ -76,7 +76,7 @@ module { // Tile B is already in the correct shape. // CHECK: %[[vA_flat:.+]] = vector.shape_cast %[[vA]] : vector<32x16xf16> to vector<512xf16> // CHECK: %[[vA_dpas_flat:.+]] = vector.extract_strided_slice{{.*}}: vector<512xf16> to vector<128xf16> -// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x8x2xf16> +// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x16xf16> // CHECK-COUNT-3: vector.extract_strided_slice // Perform DPAS computation. diff --git a/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose-sep.mlir b/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose-sep.mlir index 1687f0ba8..f7ec70ac9 100644 --- a/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose-sep.mlir +++ b/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose-sep.mlir @@ -75,7 +75,7 @@ module { // Tile B is already in the correct shape. // CHECK: %[[vA_flat:.+]] = vector.shape_cast %[[vA]] : vector<32x16xf16> to vector<512xf16> // CHECK: %[[vA_dpas_flat:.+]] = vector.extract_strided_slice{{.*}}: vector<512xf16> to vector<128xf16> -// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x8x2xf16> +// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x16xf16> // CHECK-COUNT-3: vector.extract_strided_slice // Perform DPAS computation. diff --git a/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose.mlir b/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose.mlir index d9dc180bf..be38afae0 100644 --- a/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose.mlir +++ b/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas-transpose.mlir @@ -68,7 +68,7 @@ module { // Tile B is already in the correct shape. // CHECK: %[[vA_flat:.+]] = vector.shape_cast %[[vA]] : vector<32x16xf16> to vector<512xf16> // CHECK: %[[vA_dpas_flat:.+]] = vector.extract_strided_slice{{.*}}: vector<512xf16> to vector<128xf16> -// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x8x2xf16> +// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x16xf16> // CHECK-COUNT-3: vector.extract_strided_slice // Perform DPAS computation. diff --git a/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas.mlir b/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas.mlir index a62ae1ce5..0564fb27d 100644 --- a/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas.mlir +++ b/test/mlir/test/gc/Transforms/GPU/linalg-to-xegpu-dpas.mlir @@ -65,7 +65,7 @@ func.func @matmul(%arg0: memref<32x32xf16>, %arg1: memref<32x32xf16>, %arg2: mem // Tile B is already in the correct shape. // CHECK: %[[vA_flat:.+]] = vector.shape_cast %[[vA]] : vector<32x16xf16> to vector<512xf16> // CHECK: %[[vA_dpas_flat:.+]] = vector.extract_strided_slice{{.*}}: vector<512xf16> to vector<128xf16> -// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x8x2xf16> +// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x16xf16> // CHECK-COUNT-3: vector.extract_strided_slice // Perform DPAS computation. diff --git a/test/mlir/test/gc/Transforms/bf16Legalization.mlir b/test/mlir/test/gc/Transforms/bf16Legalization.mlir index 0e2e2d45a..abd20c35a 100644 --- a/test/mlir/test/gc/Transforms/bf16Legalization.mlir +++ b/test/mlir/test/gc/Transforms/bf16Legalization.mlir @@ -1,4 +1,4 @@ -// RUN: gc-opt %s --math-legalize-to-f32 --arith-emulate-unsupported-floats="source-types=bf16 target-type=f32" --canonicalize | FileCheck %s +// RUN: gc-opt %s --math-extend-to-supported-types --arith-emulate-unsupported-floats="source-types=bf16 target-type=f32" --canonicalize | FileCheck %s // CHECK-LABEL: @sin // CHECK-SAME: ([[ARG0:%.+]]: bf16) From 3f5c79bcf788287ebb88ece9fdd9d52ccd512e06 Mon Sep 17 00:00:00 2001 From: dchigarev Date: Mon, 28 Oct 2024 14:58:34 +0000 Subject: [PATCH 2/3] fix code-style Signed-off-by: dchigarev --- lib/gc/Transforms/GPU/LinalgToXeGPU.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/gc/Transforms/GPU/LinalgToXeGPU.cpp b/lib/gc/Transforms/GPU/LinalgToXeGPU.cpp index 2ff65325d..0135d8fe3 100644 --- a/lib/gc/Transforms/GPU/LinalgToXeGPU.cpp +++ b/lib/gc/Transforms/GPU/LinalgToXeGPU.cpp @@ -1213,9 +1213,9 @@ static LogicalResult createDPASKernel(linalg::LinalgOp linalgOp, } // Extract DPAS tiles from loaded sub-tiles. - TilesArray dpasVecA = extractVecSubTiles(rewriter, loc, loadVecA, - {dimM, kTile}, tileTypeA.getShape(), - {dpasTileM, dpasTileK}); + TilesArray dpasVecA = + extractVecSubTiles(rewriter, loc, loadVecA, {dimM, kTile}, + tileTypeA.getShape(), {dpasTileM, dpasTileK}); TilesArray dpasVecB = extractVecSubTiles(rewriter, loc, loadVecB, {kTile, dimN}, tileTypeB.getShape(), {dpasTileK, dpasTileN}, vnniConfB); @@ -1630,7 +1630,8 @@ struct LinalgToXeGPU : public gc::impl::LinalgToXeGPUBase { using LinalgToXeGPUBase::LinalgToXeGPUBase; void runOnOperation() override { - LinalgToXeGPUOptions options{kTile, stages, SmallVector{dpasTile.begin(), dpasTile.end()}}; + LinalgToXeGPUOptions options{ + kTile, stages, SmallVector(dpasTile.begin(), dpasTile.end())}; // Run GEMM pattern first to allow fusion with its consumers. RewritePatternSet gemmPatterns(&getContext()); From 79e1634fd521d2f585d048c0f5a8b0245d2d5695 Mon Sep 17 00:00:00 2001 From: dchigarev Date: Tue, 29 Oct 2024 15:27:44 +0000 Subject: [PATCH 3/3] change repo Signed-off-by: dchigarev --- cmake/imex.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/imex.cmake b/cmake/imex.cmake index 638feb24f..41d1ded68 100644 --- a/cmake/imex.cmake +++ b/cmake/imex.cmake @@ -14,7 +14,7 @@ if (NOT DEFINED IMEX_INCLUDES) # TODO: Change to main https://github.com/intel/mlir-extensions when all the # required functionality is merged. - set(IMEX_URL https://github.com/dchigarev/mlir-extensions) + set(IMEX_URL https://github.com/intel/mlir-extensions) gc_fetch_content(imex "${IMEX_HASH}" "${IMEX_URL}" SET IMEX_CHECK_LLVM_VERSION=ON IMEX_ENABLE_L0_RUNTIME=0 )