Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/gc/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,14 @@ def AddContextArg : Pass<"add-ctx-arg", "func::FuncOp"> {
}];
}

def AllocsToSLM : Pass<"allocs-to-slm", "func::FuncOp"> {
let summary = "Add 'shared' memory space to memrefs allocated inside a gpu.block.";
let description = [{Add 'shared' memory space to memrefs allocated inside a gpu.block.}];
let dependentDialects = [
"gpu::GPUDialect", "memref::MemRefDialect"
];
}

def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
let summary = "Convert the GPU operations to GpuOclRuntime calls.";
let description = [{
Expand Down
96 changes: 96 additions & 0 deletions lib/gc/Transforms/GPU/AllocsToSLM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
//===- AllocsToSLM.cpp - A pass adding shared mem-space attr ----*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "gc/Transforms/Passes.h"

#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/TransformOps/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/IR/Dialect.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

#include <numeric>
#include <optional>

using namespace mlir;
using namespace mlir::gc;

namespace mlir {
namespace gc {
#define GEN_PASS_DEF_ALLOCSTOSLM
#include "gc/Transforms/Passes.h.inc"
} // namespace gc
} // namespace mlir

namespace {

bool isInGpuLaunch(Operation *op) {
auto launchOp = op->getParentOfType<gpu::LaunchOp>();
return launchOp != nullptr;
}

bool hasAssignedMemSpace(Value value) {
if (auto memrefType = dyn_cast<MemRefType>(value.getType())) {
if (memrefType.getMemorySpace()) {
return true;
}
}
return false;
}

struct ConvertAlloc : public OpRewritePattern<memref::AllocOp> {
using OpRewritePattern<memref::AllocOp>::OpRewritePattern;

ConvertAlloc(MLIRContext *ctx) : OpRewritePattern<memref::AllocOp>(ctx) {}

LogicalResult matchAndRewrite(memref::AllocOp allocOp,
PatternRewriter &rewriter) const override {
if (hasAssignedMemSpace(allocOp->getResult(0))) {
return rewriter.notifyMatchFailure(
allocOp, "Memref already has some memory space attribute");
}

if (!isInGpuLaunch(allocOp)) {
return rewriter.notifyMatchFailure(allocOp,
"Only support allocs in GPU regions");
}

Value memref = allocOp->getResult(0);
MemRefType originalMemRefType = cast<MemRefType>(memref.getType());

IntegerAttr sharedAddressSpace =
IntegerAttr::get(rewriter.getIntegerType(64),
static_cast<int64_t>(gpu::AddressSpace::Private));

// Create a new MemRefType with the desired address space
MemRefType newMemRefType = MemRefType::get(
originalMemRefType.getShape(), originalMemRefType.getElementType(),
originalMemRefType.getLayout(), sharedAddressSpace);

Value newMemRef = rewriter.create<memref::AllocOp>(
allocOp.getLoc(), newMemRefType, allocOp.getOperands());

memref.replaceAllUsesWith(newMemRef);

return success();
}
};

struct AllocsToSLM : public gc::impl::AllocsToSLMBase<AllocsToSLM> {
void runOnOperation() override {
const auto ctx = &getContext();

RewritePatternSet patterns(ctx);
patterns.add<ConvertAlloc>(patterns.getContext());
(void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
}
};

} // namespace
1 change: 1 addition & 0 deletions lib/gc/Transforms/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ set_property(GLOBAL APPEND PROPERTY IMEX_LIBS ${IMEX_LIBS})

gc_add_mlir_library(GcGpuPasses
AddContextArg.cpp
AllocsToSLM.cpp
GpuToGpuOcl.cpp
LinalgToXeGPU.cpp
Pipeline.cpp
Expand Down
27 changes: 27 additions & 0 deletions test/mlir/test/gc/Transforms/GPU/allocs-to-slm.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// RUN: gc-opt %s --allocs-to-slm | FileCheck %s

func.func @entry() {
%c1 = arith.constant 1 : index

// Memory space wasn't assigned as it's allocated outside of gpu.launch block
// CHECK: %[[NEW_MEMREF_0:.*]] = memref.alloc() : memref<16x16xf16>
%0 = memref.alloc() : memref<16x16xf16>
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c1, %sz_by = %c1, %sz_bz = %c1)
threads(%tx, %ty, %tz) in (%sz_tx = %c1, %sz_ty = %c1, %sz_tz = %c1) {
// Memory space was changed as it's explicitly specifided
// CHECK: %[[NEW_MEMREF_1:.*]] = memref.alloc() : memref<16x16xf16, 1>
%1 = memref.alloc() : memref<16x16xf16, 1>
// Added 'shared' memory space
// CHECK: %[[NEW_MEMREF_2:.*]] = memref.alloc() : memref<16x16xf16, 3>
%2 = memref.alloc() : memref<16x16xf16>

// CHECK: linalg.add ins(%[[NEW_MEMREF_1]], %[[NEW_MEMREF_2]] : memref<16x16xf16, 1>, memref<16x16xf16, 3>) outs(%[[NEW_MEMREF_0]] : memref<16x16xf16>)
linalg.add ins(%1, %2 :memref<16x16xf16, 1>, memref<16x16xf16>) outs(%0 : memref<16x16xf16>)
// CHECK: memref.dealloc %[[NEW_MEMREF_1]] : memref<16x16xf16, 1>
// CHECK: memref.dealloc %[[NEW_MEMREF_2]] : memref<16x16xf16, 3>
memref.dealloc %1 : memref<16x16xf16, 1>
memref.dealloc %2 : memref<16x16xf16>
gpu.terminator
}
return
}