From 43526397fa2b4c559666fdfd6f302bfba7409255 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Tue, 26 Nov 2024 08:21:24 +0800 Subject: [PATCH] [SYCL] Move SYCLLowerWGLocalMemoryPass to PipelineStart The pass transforms __sycl_allocateLocalMemory call to access of global variable @WGLocalMem. Move the transform to beginning of the pipeline since the access could enable more optimization than the function call. In addition, intel gpu compiler has a pass to transform global variable in addrspace(3) to alloca that runs after pipeline basic simplification. Therefore, we shall run SYCLLowerWGLocalMemoryPass ealier. --- clang/lib/CodeGen/BackendUtil.cpp | 7 ++--- clang/test/CodeGenSYCL/group-local-memory.cpp | 29 ----------------- .../kernel-early-optimization-pipeline.cpp | 31 ++++++++++++------- 3 files changed, 23 insertions(+), 44 deletions(-) delete mode 100644 clang/test/CodeGenSYCL/group-local-memory.cpp diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index f617923670204..d849a08404820 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1041,6 +1041,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( /*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu, /*ExcludeAspects=*/{"fp64"})); MPM.addPass(SYCLPropagateJointMatrixUsagePass()); + // Allocate static local memory in SYCL kernel scope for each allocation + // call. + MPM.addPass(SYCLLowerWGLocalMemoryPass()); }); else if (LangOpts.SYCLIsHost && !LangOpts.SYCLESIMDBuildHostCode) PB.registerPipelineStartEPCallback( @@ -1184,10 +1187,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline( MPM.addPass(SPIRITTAnnotationsPass()); } - // Allocate static local memory in SYCL kernel scope for each allocation - // call. - MPM.addPass(SYCLLowerWGLocalMemoryPass()); - // Process properties and annotations MPM.addPass(CompileTimePropertiesPass()); diff --git a/clang/test/CodeGenSYCL/group-local-memory.cpp b/clang/test/CodeGenSYCL/group-local-memory.cpp deleted file mode 100644 index 02610e33760ab..0000000000000 --- a/clang/test/CodeGenSYCL/group-local-memory.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// Check that SYCLLowerWGLocalMemory pass is added to the SYCL device -// compilation pipeline with the inliner pass (new Pass Manager). - -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -O2 \ -// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=CHECK-INL,CHECK - -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -O0 \ -// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \ -// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK - -// Check that AlwaysInliner pass is always run for compilation of SYCL device -// target code, even if all optimizations are disabled. - -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -fno-sycl-early-optimizations \ -// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \ -// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK - -// CHECK-INL: Running pass: ModuleInlinerWrapperPass on [module] -// CHECK-ALWINL: Running pass: AlwaysInlinerPass on [module] -// CHECK: Running pass: SYCLLowerWGLocalMemoryPass on [module] - -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -disable-llvm-passes \ -// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \ -// RUN: | FileCheck %s --check-prefixes=CHECK-NO-PASSES-ALWINL,CHECK-NO-PASSES,CHECK-NO-PASSES-INL - -// CHECK-NO-PASSES-INL-NOT: Running pass: ModuleInlinerWrapperPass on [module] -// CHECK-NO-PASSES-ALWINL-NOT: Running pass: AlwaysInlinerPass on [module] -// CHECK-NO-PASSES-NOT: Running pass: SYCLLowerWGLocalMemoryPass on [module] diff --git a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp index 17527b58e5a8e..c75e48b9727a9 100644 --- a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp +++ b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp @@ -2,15 +2,24 @@ // SYCL device target, and can be disabled with -fno-sycl-early-optimizations. // New pass manager doesn't print all passes tree, only module level. // -// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-NEWPM-EARLYOPT -// CHECK-NEWPM-EARLYOPT: ConstantMergePass -// CHECK-NEWPM-EARLYOPT: SYCLMutatePrintfAddrspacePass +// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s +// CHECK: SYCLVirtualFunctionsAnalysisPass +// CHECK: ESIMDVerifierPass +// CHECK: SYCLConditionalCallOnDevicePass +// CHECK: SYCLPropagateAspectsUsagePass +// CHECK: SYCLPropagateJointMatrixUsagePass +// CHECK: SYCLLowerWGLocalMemoryPass +// CHECK: InferFunctionAttrsPass +// CHECK: AlwaysInlinerPass +// CHECK: ModuleInlinerWrapperPass +// CHECK: ConstantMergePass +// CHECK: SYCLMutatePrintfAddrspacePass +// CHECK: SYCLPropagateAspectsUsagePass +// CHECK: SYCLAddOptLevelAttributePass +// CHECK: CompileTimePropertiesPass +// CHECK: RecordSYCLAspectNamesPass +// CHECK: CleanupSYCLMetadataPass // -// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -fno-sycl-early-optimizations -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-NEWPM-NOEARLYOPT -// CHECK-NEWPM-NOEARLYOPT-NOT: ConstantMergePass -// CHECK-NEWPM-NOEARLYOPT: SYCLMutatePrintfAddrspacePass - -// Checks that the compile time properties pass is added into the compilation pipeline -// -// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-COMPTIMEPROPS -// CHECK-COMPTIMEPROPS: Running pass: CompileTimePropertiesPass on [module] +// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -fno-sycl-early-optimizations -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-NOEARLYOPT +// CHECK-NOEARLYOPT-NOT: ConstantMergePass1 +// CHECK-NOEARLYOPT: SYCLMutatePrintfAddrspacePass