intel · dchigarev · Oct 29, 2024 · Oct 28, 2024 · Oct 28, 2024 · Oct 28, 2024
diff --git a/include/gc/Transforms/Passes.td b/include/gc/Transforms/Passes.td
@@ -101,6 +101,14 @@ def AddContextArg : Pass<"add-ctx-arg", "func::FuncOp"> {
   }];
 }
 
+def AllocsToSLM : Pass<"allocs-to-slm", "func::FuncOp"> {
+  let summary = "Add 'shared' memory space to memrefs allocated inside a gpu.block.";
+  let description = [{Add 'shared' memory space to memrefs allocated inside a gpu.block.}];
+  let dependentDialects = [
+    "gpu::GPUDialect", "memref::MemRefDialect"
+  ];
+}
+
 def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
   let summary = "Convert the GPU operations to GpuOclRuntime calls.";
   let description = [{

diff --git a/lib/gc/Transforms/GPU/AllocsToSLM.cpp b/lib/gc/Transforms/GPU/AllocsToSLM.cpp
@@ -0,0 +1,96 @@
+//===- AllocsToSLM.cpp - A pass adding shared mem-space attr ----*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gc/Transforms/Passes.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/GPU/TransformOps/Utils.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+#include <numeric>
+#include <optional>
+
+using namespace mlir;
+using namespace mlir::gc;
+
+namespace mlir {
+namespace gc {
+#define GEN_PASS_DEF_ALLOCSTOSLM
+#include "gc/Transforms/Passes.h.inc"
+} // namespace gc
+} // namespace mlir
+
+namespace {
+
+bool isInGpuLaunch(Operation *op) {
+  auto launchOp = op->getParentOfType<gpu::LaunchOp>();
+  return launchOp != nullptr;
+}
+
+bool hasAssignedMemSpace(Value value) {
+  if (auto memrefType = dyn_cast<MemRefType>(value.getType())) {
+    if (memrefType.getMemorySpace()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+struct ConvertAlloc : public OpRewritePattern<memref::AllocOp> {
+  using OpRewritePattern<memref::AllocOp>::OpRewritePattern;
+
+  ConvertAlloc(MLIRContext *ctx) : OpRewritePattern<memref::AllocOp>(ctx) {}
+
+  LogicalResult matchAndRewrite(memref::AllocOp allocOp,
+                                PatternRewriter &rewriter) const override {
+    if (hasAssignedMemSpace(allocOp->getResult(0))) {
+      return rewriter.notifyMatchFailure(
+          allocOp, "Memref already has some memory space attribute");
+    }
+
+    if (!isInGpuLaunch(allocOp)) {
+      return rewriter.notifyMatchFailure(allocOp,
+                                         "Only support allocs in GPU regions");
+    }
+
+    Value memref = allocOp->getResult(0);
+    MemRefType originalMemRefType = cast<MemRefType>(memref.getType());
+
+    IntegerAttr sharedAddressSpace =
+        IntegerAttr::get(rewriter.getIntegerType(64),
+                         static_cast<int64_t>(gpu::AddressSpace::Private));
+
+    // Create a new MemRefType with the desired address space
+    MemRefType newMemRefType = MemRefType::get(
+        originalMemRefType.getShape(), originalMemRefType.getElementType(),
+        originalMemRefType.getLayout(), sharedAddressSpace);
+
+    Value newMemRef = rewriter.create<memref::AllocOp>(
+        allocOp.getLoc(), newMemRefType, allocOp.getOperands());
+
+    memref.replaceAllUsesWith(newMemRef);
+
+    return success();
+  }
+};
+
+struct AllocsToSLM : public gc::impl::AllocsToSLMBase<AllocsToSLM> {
+  void runOnOperation() override {
+    const auto ctx = &getContext();
+
+    RewritePatternSet patterns(ctx);
+    patterns.add<ConvertAlloc>(patterns.getContext());
+    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
+  }
+};
+
+} // namespace
diff --git a/lib/gc/Transforms/GPU/CMakeLists.txt b/lib/gc/Transforms/GPU/CMakeLists.txt
@@ -12,6 +12,7 @@ set_property(GLOBAL APPEND PROPERTY IMEX_LIBS ${IMEX_LIBS})
 
 gc_add_mlir_library(GcGpuPasses
   AddContextArg.cpp
+  AllocsToSLM.cpp
   GpuToGpuOcl.cpp
   LinalgToXeGPU.cpp
   Pipeline.cpp

diff --git a/test/mlir/test/gc/Transforms/GPU/allocs-to-slm.mlir b/test/mlir/test/gc/Transforms/GPU/allocs-to-slm.mlir
@@ -0,0 +1,27 @@
+// RUN: gc-opt %s --allocs-to-slm | FileCheck %s
+
+func.func @entry() {
+  %c1 = arith.constant 1 : index
+
+  // Memory space wasn't assigned as it's allocated outside of gpu.launch block
+  // CHECK: %[[NEW_MEMREF_0:.*]] = memref.alloc() : memref<16x16xf16>
+  %0 = memref.alloc() : memref<16x16xf16>
+  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c1, %sz_by = %c1, %sz_bz = %c1)
+             threads(%tx, %ty, %tz) in (%sz_tx = %c1, %sz_ty = %c1, %sz_tz = %c1) {
+    // Memory space was changed as it's explicitly specifided
+    // CHECK: %[[NEW_MEMREF_1:.*]] = memref.alloc() : memref<16x16xf16, 1>
+    %1 = memref.alloc() : memref<16x16xf16, 1>
+    // Added 'shared' memory space
+    // CHECK: %[[NEW_MEMREF_2:.*]] = memref.alloc() : memref<16x16xf16, 3>
+    %2 = memref.alloc() : memref<16x16xf16>
+
+    // CHECK: linalg.add ins(%[[NEW_MEMREF_1]], %[[NEW_MEMREF_2]] : memref<16x16xf16, 1>, memref<16x16xf16, 3>) outs(%[[NEW_MEMREF_0]] : memref<16x16xf16>)
+    linalg.add ins(%1, %2 :memref<16x16xf16, 1>, memref<16x16xf16>) outs(%0 : memref<16x16xf16>)
+    // CHECK: memref.dealloc %[[NEW_MEMREF_1]] : memref<16x16xf16, 1>
+    // CHECK: memref.dealloc %[[NEW_MEMREF_2]] : memref<16x16xf16, 3>
+    memref.dealloc %1 : memref<16x16xf16, 1>
+    memref.dealloc %2 : memref<16x16xf16>
+    gpu.terminator
+  }
+  return
+}