From b81e56bd08bad7a33daf8b2952fc02112cd6aba5 Mon Sep 17 00:00:00 2001 From: dchigarev Date: Mon, 28 Oct 2024 11:51:56 +0000 Subject: [PATCH 1/4] Add shared mem-space for SLM allocs Signed-off-by: dchigarev --- include/gc/Transforms/Passes.td | 8 ++ lib/gc/Transforms/GPU/AllocsToSLM.cpp | 107 ++++++++++++++++++ lib/gc/Transforms/GPU/CMakeLists.txt | 1 + .../test/gc/Transforms/GPU/allocs-to-slm.mlir | 27 +++++ 4 files changed, 143 insertions(+) create mode 100644 lib/gc/Transforms/GPU/AllocsToSLM.cpp create mode 100644 test/mlir/test/gc/Transforms/GPU/allocs-to-slm.mlir diff --git a/include/gc/Transforms/Passes.td b/include/gc/Transforms/Passes.td index fb5581bb..43dbb31d 100644 --- a/include/gc/Transforms/Passes.td +++ b/include/gc/Transforms/Passes.td @@ -101,6 +101,14 @@ def AddContextArg : Pass<"add-ctx-arg", "func::FuncOp"> { }]; } +def AllocsToSLM : Pass<"allocs-to-slm", "func::FuncOp"> { + let summary = "Add 'shared' memory space to memrefs allocated inside a gpu.block."; + let description = [{Add 'shared' memory space to memrefs allocated inside a gpu.block.}]; + let dependentDialects = [ + "gpu::GPUDialect", "memref::MemRefDialect" + ]; +} + def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> { let summary = "Convert the GPU operations to GpuOclRuntime calls."; let description = [{ diff --git a/lib/gc/Transforms/GPU/AllocsToSLM.cpp b/lib/gc/Transforms/GPU/AllocsToSLM.cpp new file mode 100644 index 00000000..eefdb7c9 --- /dev/null +++ b/lib/gc/Transforms/GPU/AllocsToSLM.cpp @@ -0,0 +1,107 @@ +//===- LinalgToXeGPU.cpp - Linalg To XeGPU Lowering -------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gc/Transforms/Passes.h" + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/GPU/TransformOps/Utils.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/Dialect.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +#include +#include + +using namespace mlir; +using namespace mlir::gc; + +namespace mlir { +namespace gc { +#define GEN_PASS_DEF_ALLOCSTOSLM +#include "gc/Transforms/Passes.h.inc" +} // namespace gc +} // namespace mlir + +namespace { + +bool isInGpuLaunch(mlir::Operation *op) { + // Traverse up through parent operations + mlir::Operation *parentOp = op; + while (parentOp) { + // Check if the current parent is a gpu.launch operation + if (llvm::isa(parentOp)) { + return true; + } + // Move to the parent operation + parentOp = parentOp->getParentOp(); + } + // If we reached the top without finding a gpu.launch, return false + return false; +} + +bool hasAssignedMemSpace(mlir::Value value) { + if (auto memrefType = value.getType().dyn_cast()) { + if (memrefType.getMemorySpace()) { + return true; + } + } + return false; +} + +struct ConvertAlloc : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + ConvertAlloc(MLIRContext *ctx) : OpRewritePattern(ctx) {} + + LogicalResult matchAndRewrite(memref::AllocOp allocOp, + PatternRewriter &rewriter) const override { + if (hasAssignedMemSpace(allocOp->getResult(0))) { + return rewriter.notifyMatchFailure( + allocOp, "Memref already has some memory space attribute"); + } + + if (!isInGpuLaunch(allocOp)) { + return rewriter.notifyMatchFailure(allocOp, + "Only support allocs in GPU regions"); + } + + mlir::Value memref = allocOp->getResult(0); + mlir::MemRefType originalMemRefType = + memref.getType().cast(); + + IntegerAttr sharedAddressSpace = + IntegerAttr::get(rewriter.getIntegerType(64), + static_cast(gpu::AddressSpace::Private)); + + // Create a new MemRefType with the desired address space + mlir::MemRefType newMemRefType = mlir::MemRefType::get( + originalMemRefType.getShape(), originalMemRefType.getElementType(), + originalMemRefType.getLayout(), sharedAddressSpace); + + mlir::Value newMemRef = rewriter.create( + allocOp.getLoc(), newMemRefType, allocOp.getOperands()); + + memref.replaceAllUsesWith(newMemRef); + + return success(); + } +}; + +struct AllocsToSLM : public gc::impl::AllocsToSLMBase { + void runOnOperation() override { + const auto ctx = &getContext(); + + RewritePatternSet patterns(ctx); + patterns.add(patterns.getContext()); + (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); + } +}; + +} // namespace diff --git a/lib/gc/Transforms/GPU/CMakeLists.txt b/lib/gc/Transforms/GPU/CMakeLists.txt index 47d3899b..28981f20 100644 --- a/lib/gc/Transforms/GPU/CMakeLists.txt +++ b/lib/gc/Transforms/GPU/CMakeLists.txt @@ -12,6 +12,7 @@ set_property(GLOBAL APPEND PROPERTY IMEX_LIBS ${IMEX_LIBS}) gc_add_mlir_library(GcGpuPasses AddContextArg.cpp + AllocsToSLM.cpp GpuToGpuOcl.cpp LinalgToXeGPU.cpp Pipeline.cpp diff --git a/test/mlir/test/gc/Transforms/GPU/allocs-to-slm.mlir b/test/mlir/test/gc/Transforms/GPU/allocs-to-slm.mlir new file mode 100644 index 00000000..5aaca698 --- /dev/null +++ b/test/mlir/test/gc/Transforms/GPU/allocs-to-slm.mlir @@ -0,0 +1,27 @@ +// RUN: gc-opt %s --allocs-to-slm | FileCheck %s + +func.func @entry() { + %c1 = arith.constant 1 : index + + // Memory space wasn't assigned as it's allocated outside of gpu.launch block + // CHECK: %[[NEW_MEMREF_0:.*]] = memref.alloc() : memref<16x16xf16> + %0 = memref.alloc() : memref<16x16xf16> + gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c1, %sz_by = %c1, %sz_bz = %c1) + threads(%tx, %ty, %tz) in (%sz_tx = %c1, %sz_ty = %c1, %sz_tz = %c1) { + // Memory space was changed as it's explicitly specifided + // CHECK: %[[NEW_MEMREF_1:.*]] = memref.alloc() : memref<16x16xf16, 1> + %1 = memref.alloc() : memref<16x16xf16, 1> + // Added 'shared' memory space + // CHECK: %[[NEW_MEMREF_2:.*]] = memref.alloc() : memref<16x16xf16, 3> + %2 = memref.alloc() : memref<16x16xf16> + + // CHECK: linalg.add ins(%[[NEW_MEMREF_1]], %[[NEW_MEMREF_2]] : memref<16x16xf16, 1>, memref<16x16xf16, 3>) outs(%[[NEW_MEMREF_0]] : memref<16x16xf16>) + linalg.add ins(%1, %2 :memref<16x16xf16, 1>, memref<16x16xf16>) outs(%0 : memref<16x16xf16>) + // CHECK: memref.dealloc %[[NEW_MEMREF_1]] : memref<16x16xf16, 1> + // CHECK: memref.dealloc %[[NEW_MEMREF_2]] : memref<16x16xf16, 3> + memref.dealloc %1 : memref<16x16xf16, 1> + memref.dealloc %2 : memref<16x16xf16> + gpu.terminator + } + return +} From 6332edc58e8d885efb3bc0133d26dde40c95c376 Mon Sep 17 00:00:00 2001 From: dchigarev Date: Mon, 28 Oct 2024 12:29:59 +0000 Subject: [PATCH 2/4] fix license Signed-off-by: dchigarev --- lib/gc/Transforms/GPU/AllocsToSLM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gc/Transforms/GPU/AllocsToSLM.cpp b/lib/gc/Transforms/GPU/AllocsToSLM.cpp index eefdb7c9..a92c6a36 100644 --- a/lib/gc/Transforms/GPU/AllocsToSLM.cpp +++ b/lib/gc/Transforms/GPU/AllocsToSLM.cpp @@ -1,4 +1,4 @@ -//===- LinalgToXeGPU.cpp - Linalg To XeGPU Lowering -------------*- C++ -*-===// +//===- AllocsToSLM.cpp - A pass adding shared mem-space attr ----*- C++ -*-===// // // This file is licensed under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 2a649cd2474b2925203ae985badab225ccfa7323 Mon Sep 17 00:00:00 2001 From: dchigarev Date: Mon, 28 Oct 2024 13:29:47 +0000 Subject: [PATCH 3/4] use 'getParentOfType' Signed-off-by: dchigarev --- lib/gc/Transforms/GPU/AllocsToSLM.cpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/lib/gc/Transforms/GPU/AllocsToSLM.cpp b/lib/gc/Transforms/GPU/AllocsToSLM.cpp index a92c6a36..58f6fcaf 100644 --- a/lib/gc/Transforms/GPU/AllocsToSLM.cpp +++ b/lib/gc/Transforms/GPU/AllocsToSLM.cpp @@ -32,18 +32,8 @@ namespace gc { namespace { bool isInGpuLaunch(mlir::Operation *op) { - // Traverse up through parent operations - mlir::Operation *parentOp = op; - while (parentOp) { - // Check if the current parent is a gpu.launch operation - if (llvm::isa(parentOp)) { - return true; - } - // Move to the parent operation - parentOp = parentOp->getParentOp(); - } - // If we reached the top without finding a gpu.launch, return false - return false; + auto launchOp = op->getParentOfType(); + return launchOp != nullptr; } bool hasAssignedMemSpace(mlir::Value value) { From 873bf4c9bac394d96f882a3ec9c854c991b421a4 Mon Sep 17 00:00:00 2001 From: dchigarev Date: Mon, 28 Oct 2024 14:27:16 +0000 Subject: [PATCH 4/4] do not use deprecated api Signed-off-by: dchigarev --- lib/gc/Transforms/GPU/AllocsToSLM.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/gc/Transforms/GPU/AllocsToSLM.cpp b/lib/gc/Transforms/GPU/AllocsToSLM.cpp index 58f6fcaf..1d531829 100644 --- a/lib/gc/Transforms/GPU/AllocsToSLM.cpp +++ b/lib/gc/Transforms/GPU/AllocsToSLM.cpp @@ -31,13 +31,13 @@ namespace gc { namespace { -bool isInGpuLaunch(mlir::Operation *op) { +bool isInGpuLaunch(Operation *op) { auto launchOp = op->getParentOfType(); return launchOp != nullptr; } -bool hasAssignedMemSpace(mlir::Value value) { - if (auto memrefType = value.getType().dyn_cast()) { +bool hasAssignedMemSpace(Value value) { + if (auto memrefType = dyn_cast(value.getType())) { if (memrefType.getMemorySpace()) { return true; } @@ -62,20 +62,19 @@ struct ConvertAlloc : public OpRewritePattern { "Only support allocs in GPU regions"); } - mlir::Value memref = allocOp->getResult(0); - mlir::MemRefType originalMemRefType = - memref.getType().cast(); + Value memref = allocOp->getResult(0); + MemRefType originalMemRefType = cast(memref.getType()); IntegerAttr sharedAddressSpace = IntegerAttr::get(rewriter.getIntegerType(64), static_cast(gpu::AddressSpace::Private)); // Create a new MemRefType with the desired address space - mlir::MemRefType newMemRefType = mlir::MemRefType::get( + MemRefType newMemRefType = MemRefType::get( originalMemRefType.getShape(), originalMemRefType.getElementType(), originalMemRefType.getLayout(), sharedAddressSpace); - mlir::Value newMemRef = rewriter.create( + Value newMemRef = rewriter.create( allocOp.getLoc(), newMemRefType, allocOp.getOperands()); memref.replaceAllUsesWith(newMemRef);