Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1125,7 +1125,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
// configure the pipeline.
OptimizationLevel Level = mapToLevel(CodeGenOpts);

if (LangOpts.SYCLIsDevice)
if (LangOpts.SYCLIsDevice) {
PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM,
OptimizationLevel Level) {
MPM.addPass(SYCLVirtualFunctionsAnalysisPass());
Expand All @@ -1139,17 +1139,23 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
/*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu,
/*ExcludeAspects=*/{"fp64"}));
MPM.addPass(SYCLPropagateJointMatrixUsagePass());
// Lowers static/dynamic local memory builtin calls.
MPM.addPass(SYCLLowerWGLocalMemoryPass());
// Compile-time properties pass must create standard metadata as early
// as possible to make them available for other passes.
MPM.addPass(CompileTimePropertiesPass());
});
else if (LangOpts.SYCLIsHost && !LangOpts.SYCLESIMDBuildHostCode)
PB.registerOptimizerEarlyEPCallback(
[](ModulePassManager &MPM, OptimizationLevel, ThinOrFullLTOPhase) {
// Allocate static local memory in SYCL kernel scope for each
// allocation call. This pass must run after AlwaysInline pass due
// to current implementation restriction.
MPM.addPass(SYCLLowerWGLocalMemoryPass());
});
} else if (LangOpts.SYCLIsHost && !LangOpts.SYCLESIMDBuildHostCode) {
PB.registerPipelineStartEPCallback(
[&](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(ESIMDRemoveHostCodePass());
});
}

// Add the InferAddressSpaces and SYCLOptimizeBarriers passes for all
// the SPIR[V] targets
Expand Down
29 changes: 29 additions & 0 deletions clang/test/CodeGenSYCL/group-local-memory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Check that SYCLLowerWGLocalMemory pass is added to the SYCL device
// compilation pipeline with the inliner pass (new Pass Manager).

// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -O2 \
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHECK-INL,CHECK

// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -O0 \
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK

// Check that AlwaysInliner pass is always run for compilation of SYCL device
// target code, even if all optimizations are disabled.

// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -fno-sycl-early-optimizations \
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK

// CHECK-INL: Running pass: ModuleInlinerWrapperPass on [module]
// CHECK-ALWINL: Running pass: AlwaysInlinerPass on [module]
// CHECK: Running pass: SYCLLowerWGLocalMemoryPass on [module]

// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -disable-llvm-passes \
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
// RUN: | FileCheck %s --check-prefixes=CHECK-NO-PASSES-ALWINL,CHECK-NO-PASSES,CHECK-NO-PASSES-INL

// CHECK-NO-PASSES-INL-NOT: Running pass: ModuleInlinerWrapperPass on [module]
// CHECK-NO-PASSES-ALWINL-NOT: Running pass: AlwaysInlinerPass on [module]
// CHECK-NO-PASSES-NOT: Running pass: SYCLLowerWGLocalMemoryPass on [module]
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
// CHECK: SYCLConditionalCallOnDevicePass
// CHECK: SYCLPropagateAspectsUsagePass
// CHECK: SYCLPropagateJointMatrixUsagePass
// CHECK: SYCLLowerWGLocalMemoryPass
// CHECK: CompileTimePropertiesPass
// CHECK: InferFunctionAttrsPass
// CHECK: AlwaysInlinerPass
// CHECK: ModuleInlinerWrapperPass
// CHECK: SYCLLowerWGLocalMemoryPass
// CHECK: SYCLOptimizeBarriersPass
// CHECK: ConstantMergePass
// CHECK: SYCLMutatePrintfAddrspacePass
Expand Down
44 changes: 1 addition & 43 deletions llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,11 @@
//===----------------------------------------------------------------------===//

#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Pass.h"
#include "llvm/SYCLLowerIR/SYCLUtils.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Cloning.h"

using namespace llvm;

Expand Down Expand Up @@ -91,44 +88,6 @@ ModulePass *llvm::createSYCLLowerWGLocalMemoryLegacyPass() {
return new SYCLLowerWGLocalMemoryLegacy();
}

// In sycl header __sycl_allocateLocalMemory builtin call is wrapped in
// group_local_memory/group_local_memory_for_overwrite functions, which must be
// inlined first before each __sycl_allocateLocalMemory call can be lowered to a
// distinct global variable. Inlining them here so that this pass doesn't have
// implicit dependency on AlwaysInlinerPass.
//
// syclcompat::local_mem, which represents a distinct allocation, calls
// group_local_memory_for_overwrite. So local_mem should be inlined as well.
static bool inlineGroupLocalMemoryFunc(Module &M) {
Function *ALMFunc = M.getFunction(SYCL_ALLOCLOCALMEM_CALL);
if (!ALMFunc || ALMFunc->use_empty())
return false;

SmallVector<Function *, 4> WorkList{ALMFunc};
DenseSet<Function *> Visited;
while (!WorkList.empty()) {
auto *F = WorkList.pop_back_val();
for (auto *U : make_early_inc_range(F->users())) {
auto *CI = cast<CallInst>(U);
auto *Caller = CI->getFunction();
// Frontend propagates sycl-forceinline attribute to SYCL_EXTERNAL
// function which directly calls group_local_memory_for_overwrite.
// Don't inline the SYCL_EXTERNAL function.
if (Caller->hasFnAttribute("sycl-forceinline") &&
!sycl::utils::isSYCLExternalFunction(Caller) &&
Visited.insert(Caller).second)
WorkList.push_back(Caller);
if (F != ALMFunc) {
InlineFunctionInfo IFI;
[[maybe_unused]] auto Result = InlineFunction(*CI, IFI);
assert(Result.isSuccess() && "inlining failed");
}
}
}

return !Visited.empty();
}

// TODO: It should be checked that __sycl_allocateLocalMemory (or its source
// form - group_local_memory) does not occur:
// - in a function (other than user lambda/functor)
Expand Down Expand Up @@ -392,8 +351,7 @@ static bool dynamicWGLocalMemory(Module &M) {

PreservedAnalyses SYCLLowerWGLocalMemoryPass::run(Module &M,
ModuleAnalysisManager &) {
bool Changed = inlineGroupLocalMemoryFunc(M);
Changed |= allocaWGLocalMemory(M);
bool Changed = allocaWGLocalMemory(M);
Changed |= dynamicWGLocalMemory(M);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
66 changes: 0 additions & 66 deletions llvm/test/SYCLLowerIR/group_local_memory_inline.ll

This file was deleted.

49 changes: 0 additions & 49 deletions llvm/test/SYCLLowerIR/group_local_memory_sycl_device_user.ll

This file was deleted.

6 changes: 0 additions & 6 deletions sycl/include/sycl/ext/oneapi/group_local_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ namespace sycl {
inline namespace _V1 {
namespace ext::oneapi {
template <typename T, typename Group>
#ifdef __SYCL_DEVICE_ONLY__
[[__sycl_detail__::add_ir_attributes_function("sycl-forceinline", true)]]
#endif
std::enable_if_t<
std::is_trivially_destructible_v<T> && sycl::detail::is_group<Group>::value,
multi_ptr<T, access::address_space::local_space, access::decorated::legacy>>
Expand All @@ -47,9 +44,6 @@ std::enable_if_t<
}

template <typename T, typename Group, typename... Args>
#ifdef __SYCL_DEVICE_ONLY__
[[__sycl_detail__::add_ir_attributes_function("sycl-forceinline", true)]]
#endif
std::enable_if_t<
std::is_trivially_destructible_v<T> && sycl::detail::is_group<Group>::value,
multi_ptr<T, access::address_space::local_space, access::decorated::legacy>>
Expand Down