Skip to content

[SYCL][ESIMD] Run all passes with O0 opt level #19554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 20 additions & 14 deletions llvm/lib/SYCLPostLink/ESIMDPostSplitProcessing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,30 @@ using namespace llvm::module_split;

namespace {

ModulePassManager buildESIMDLoweringPipeline(bool OptLevelO0, bool SplitESIMD) {
ModulePassManager buildESIMDLoweringPipeline(bool ForceDisableESIMDOpt,
bool SplitESIMD) {
ModulePassManager MPM;
MPM.addPass(SYCLLowerESIMDPass(!SplitESIMD));

FunctionPassManager FPM;
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
if (!ForceDisableESIMDOpt) {
FunctionPassManager FPM;
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
MPM.addPass(ESIMDOptimizeVecArgCallConvPass{});
FunctionPassManager MainFPM;
MainFPM.addPass(ESIMDLowerLoadStorePass{});

MainFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
MainFPM.addPass(EarlyCSEPass(true));
MainFPM.addPass(InstCombinePass{});
MainFPM.addPass(DCEPass{});
MainFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
MainFPM.addPass(EarlyCSEPass(true));
MainFPM.addPass(InstCombinePass{});
MainFPM.addPass(DCEPass{});
if (!ForceDisableESIMDOpt) {
MainFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
MainFPM.addPass(EarlyCSEPass(true));
MainFPM.addPass(InstCombinePass{});
MainFPM.addPass(DCEPass{});
MainFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
MainFPM.addPass(EarlyCSEPass(true));
MainFPM.addPass(InstCombinePass{});
MainFPM.addPass(DCEPass{});
}
MPM.addPass(ESIMDLowerSLMReservationCalls{});
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
MPM.addPass(GenXSPIRVWriterAdaptor(/*RewriteTypes=*/true,
Expand All @@ -60,7 +65,7 @@ ModulePassManager buildESIMDLoweringPipeline(bool OptLevelO0, bool SplitESIMD) {

// When ESIMD code was separated from the regular SYCL code,
// we can safely process ESIMD part.
bool sycl::lowerESIMDConstructs(ModuleDesc &MD, bool OptLevelO0,
bool sycl::lowerESIMDConstructs(ModuleDesc &MD, bool ForceDisableESIMDOpt,
bool SplitESIMD) {
// TODO: support options like -debug-pass, -print-[before|after], and others
LoopAnalysisManager LAM;
Expand All @@ -77,7 +82,8 @@ bool sycl::lowerESIMDConstructs(ModuleDesc &MD, bool OptLevelO0,

std::vector<std::string> Names;
MD.saveEntryPointNames(Names);
ModulePassManager MPM = buildESIMDLoweringPipeline(OptLevelO0, SplitESIMD);
ModulePassManager MPM =
buildESIMDLoweringPipeline(ForceDisableESIMDOpt, SplitESIMD);
PreservedAnalyses Res = MPM.run(MD.getModule(), MAM);

// GenXSPIRVWriterAdaptor pass replaced some functions with "rewritten"
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/tools/sycl-post-link/sycl-esimd/basic-esimd-lower.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
; RUN: sycl-post-link -properties -split-esimd -lower-esimd -O2 -S < %s -o %t.table
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O2

; -O0 lowering, requires `-force-disable-esimd-opt` to disable all optimizations.
; RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -force-disable-esimd-opt -S < %s -o %t.table
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O0

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-linux"

Expand Down Expand Up @@ -50,6 +54,15 @@ attributes #0 = { "sycl-module-id"="a.cpp" }
; CHECK-NO-LOWERING: ret void
; CHECK-NO-LOWERING: }

; With -O0, we only lower ESIMD code, but no other optimizations
; CHECK-O0: define dso_local spir_kernel void @ESIMD_kernel() #{{[0-9]}} !sycl_explicit_simd !{{[0-9]}} !intel_reqd_sub_group_size !{{[0-9]}} {
; CHECK-O0: entry:
; CHECK-O0: %0 = load <3 x i64>, {{.*}} addrspacecast {{.*}} @__spirv_BuiltInGlobalInvocationId
; CHECK-O0: %1 = extractelement <3 x i64> %0, i64 0
; CHECK-O0: call void @llvm.genx.barrier()
; CHECK-O0: ret void
; CHECK-O0: }

; With -O2, unused call was optimized away
; CHECK-O2: define dso_local spir_kernel void @ESIMD_kernel()
; CHECK-O2: entry:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -S < %s -o %t.table
; -O0 lowering, requires `-force-disable-esimd-opt` to disable all optimizations.
; RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -force-disable-esimd-opt -S < %s -o %t.table
; RUN: FileCheck %s -input-file=%t_esimd_0.ll
; This test checks that IR code below can be successfully processed by
; sycl-post-link. In this IR no extractelement instruction and no casting are used
Expand All @@ -20,8 +21,10 @@ entry:
store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
ret void
}
; CHECK: store i64 0, ptr addrspace(1) %_arg_DoNotOptimize, align 8
; CHECK: store i32 3, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
; CHECK: %conv.i = zext i32 0 to i64
; CHECK: store i64 %conv.i, ptr addrspace(1) %_arg_DoNotOptimize, align 8
; CHECK: %add.i = add i32 0, 3
; CHECK: store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4

; Function Attrs: convergent norecurse
define dso_local spir_kernel void @kernel_SubgroupSize(ptr addrspace(1) noundef align 8 %_arg_DoNotOptimize, ptr addrspace(1) noundef align 4 %_arg_DoNotOptimize32)#0 !sycl_explicit_simd !3{
Expand All @@ -33,8 +36,10 @@ entry:
store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
ret void
}
; CHECK: store i64 1, ptr addrspace(1) %_arg_DoNotOptimize, align 8
; CHECK: store i32 8, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
; CHECK: %conv.i = zext i32 1 to i64
; CHECK: store i64 %conv.i, ptr addrspace(1) %_arg_DoNotOptimize, align 8
; CHECK: %add.i = add i32 1, 7
; CHECK: store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4

; Function Attrs: convergent norecurse
define dso_local spir_kernel void @kernel_SubgroupMaxSize(ptr addrspace(1) noundef align 8 %_arg_DoNotOptimize, ptr addrspace(1) noundef align 4 %_arg_DoNotOptimize32) #0 !sycl_explicit_simd !3 {
Expand All @@ -46,8 +51,10 @@ entry:
store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
ret void
}
; CHECK: store i64 1, ptr addrspace(1) %_arg_DoNotOptimize, align 8
; CHECK: store i32 10, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
; CHECK: %conv.i = zext i32 1 to i64
; CHECK: store i64 %conv.i, ptr addrspace(1) %_arg_DoNotOptimize, align 8
; CHECK: %add.i = add i32 1, 9
; CHECK: store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4

attributes #0 = { "sycl-module-id"="a.cpp" }

Expand Down
7 changes: 6 additions & 1 deletion llvm/tools/sycl-post-link/sycl-post-link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ cl::opt<bool> OptLevelO3("O3",
cl::desc("Optimization level 3. Similar to clang -O3"),
cl::cat(PostLinkCat));

cl::opt<bool> ForceDisableESIMDOpt("force-disable-esimd-opt", cl::Hidden,
cl::desc("Force no optimizations."),
cl::cat(PostLinkCat));

cl::opt<module_split::IRSplitMode> SplitMode(
"split", cl::desc("split input module"), cl::Optional,
cl::init(module_split::SPLIT_NONE),
Expand Down Expand Up @@ -523,7 +527,8 @@ handleESIMD(module_split::ModuleDesc &&MDesc, bool &Modified,
for (auto &MD : Result) {
DUMP_ENTRY_POINTS(MD.entries(), MD.Name.c_str(), 3);
if (LowerEsimd && MD.isESIMD())
Modified |= sycl::lowerESIMDConstructs(MD, OptLevelO0, SplitEsimd);
Modified |=
sycl::lowerESIMDConstructs(MD, ForceDisableESIMDOpt, SplitEsimd);
}

if (!SplitEsimd && Result.size() > 1) {
Expand Down
6 changes: 5 additions & 1 deletion sycl/test/check_device_code/esimd/fp16_converts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

// Checks that lowerESIMD pass builds proper vc-intrinsics
// RUN: %clangxx -O2 -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -S %t -o %t.table
// -O0 lowering, requires `-force-disable-esimd-opt` to disable all
// optimizations.
// RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -force-disable-esimd-opt -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll

#include <sycl/ext/intel/esimd.hpp>
Expand Down Expand Up @@ -34,7 +36,9 @@ __attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void bf16_vector() {
simd<float, 8> F32 = 0;
simd<bfloat16, 8> BF16 = F32;
// CHECK: call <8 x half> @llvm.genx.bf.cvt.v8f16.v8f32(<8 x float> {{[^)]+}})
simd<float, 8> F32_conv = BF16;
// CHECK: call <8 x float> @llvm.genx.bf.cvt.v8f32.v8f16(<8 x half> {{[^)]+}})
}

SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void bf16_scalar() {
Expand Down
Loading