From b58c27d3b3ab203f260fbabbb7e8c011a8c9c451 Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Thu, 17 Oct 2024 16:43:21 +0100 Subject: [PATCH 1/5] [SYCL][NATIVECPU] Materialize floating point atomic builtins --- clang/lib/Frontend/InitPreprocessor.cpp | 3 +- .../llvm/SYCLLowerIR/FAtomicsNativeCPU.h | 29 ++++++++++++ llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt | 1 + .../SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp | 47 +++++++++++++++++++ .../PipelineSYCLNativeCPU.cpp | 2 + 5 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h create mode 100644 llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index b99b42838a4cb..f1ecd982830a9 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1513,7 +1513,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI, const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch(); if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() || (DeviceTriple.isSPIR() && - DeviceSubArch != llvm::Triple::SPIRSubArch_fpga)) + DeviceSubArch != llvm::Triple::SPIRSubArch_fpga) || + LangOpts.SYCLIsNativeCPU) Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS"); // Enable generation of USM address spaces for FPGA. if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) { diff --git a/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h new file mode 100644 index 0000000000000..db8f39112238a --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h @@ -0,0 +1,29 @@ +//===------- FAtomicsNativeCPU.h - Materializes FP Atomics ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass that materializes floating points atomics by emitting +// corresponding atomicrmw instruction. +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class ModulePass; + +class FAtomicsNativeCPU + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +} // namespace llvm diff --git a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt index b1f71ff191544..bbfb74f7a3529 100644 --- a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt +++ b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils RenameKernelSYCLNativeCPU.cpp ConvertToMuxBuiltinsSYCLNativeCPU.cpp FixABIMuxBuiltinsSYCLNativeCPU.cpp + FAtomicsNativeCPU.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR diff --git a/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp new file mode 100644 index 0000000000000..76acceaf0ef04 --- /dev/null +++ b/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp @@ -0,0 +1,47 @@ +//===------- FAtomicsNativeCPU.cpp - Materializes FP Atomics --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass that materializes floating points atomics by emitting +// corresponding atomicrmw instruction. +// +//===----------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/AtomicOrdering.h" + +using namespace llvm; + +PreservedAnalyses FAtomicsNativeCPU::run(Module &M, + ModuleAnalysisManager &MAM) { + bool ModuleChanged = false; + auto &Ctx = M.getContext(); + for (auto &F : M) { + AtomicRMWInst::BinOp OpCode; + if (F.getName().starts_with("_Z21__spirv_AtomicFAddEXT")) { + OpCode = AtomicRMWInst::BinOp::FAdd; + } else if (F.getName().starts_with("_Z21__spirv_AtomicFMinEXT")) { + OpCode = AtomicRMWInst::BinOp::FMin; + } else if (F.getName().starts_with("_Z21__spirv_AtomicFMaxEXT")) { + OpCode = AtomicRMWInst::BinOp::FMax; + } else { + continue; + } + + BasicBlock *BB = BasicBlock::Create(Ctx, "entry", &F); + IRBuilder<> Builder(BB); + auto A = + Builder.CreateAtomicRMW(OpCode, F.getArg(0), F.getArg(3), MaybeAlign(), + AtomicOrdering::Monotonic, SyncScope::System); + Builder.CreateRet(A); + } + return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index 28ca1eb7103d1..b0b720a940305 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -11,6 +11,7 @@ // When NATIVECPU_USE_OCK is set, adds passes from the oneAPI Construction Kit. // //===----------------------------------------------------------------------===// +#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" #include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h" @@ -70,6 +71,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses( OptimizationLevel OptLevel) { MPM.addPass(SpecConstantsPass(SpecConstantsPass::HandlingMode::emulation)); MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass()); + MPM.addPass(FAtomicsNativeCPU()); #ifdef NATIVECPU_USE_OCK MPM.addPass(compiler::utils::PrepareBarriersPass()); MPM.addPass(compiler::utils::TransferKernelMetadataPass()); From 5167331d0119d093c3a83a276c88b67471aa62e9 Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Fri, 18 Oct 2024 17:06:08 +0100 Subject: [PATCH 2/5] Add lit test --- .../native_cpu/fp_atomic.cpp | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 sycl/test/check_device_code/native_cpu/fp_atomic.cpp diff --git a/sycl/test/check_device_code/native_cpu/fp_atomic.cpp b/sycl/test/check_device_code/native_cpu/fp_atomic.cpp new file mode 100644 index 0000000000000..31bd18f78e102 --- /dev/null +++ b/sycl/test/check_device_code/native_cpu/fp_atomic.cpp @@ -0,0 +1,40 @@ +// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -S -emit-llvm -o %t_temp.ll %s +// RUN: %clangxx -mllvm -sycl-native-cpu-backend -S -emit-llvm -o - %t_temp.ll | FileCheck %s +#include + +constexpr sycl::memory_order order = sycl::memory_order::relaxed; +constexpr sycl::memory_scope scope = sycl::memory_scope::work_group; +constexpr sycl::access::address_space space = + sycl::access::address_space::global_space; + +class Test; +using namespace sycl; +int main() { + queue q; + const size_t N = 32; + float sum = 0; + std::vector output(N); + std::fill(output.begin(), output.end(), 0.f); + { + buffer sum_buf(&sum, 1); + q.submit([&](handler &cgh) { + auto sum = sum_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(sum[0]); + atm.fetch_add(1.f, order); + //CHECK-DAG: float @_Z21__spirv_AtomicFAddEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]]) + //CHECK: %[[RES:.*]] = atomicrmw fadd ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4 + //CHECK: ret float %[[RES]] + atm.fetch_max(1.f, order); + //CHECK-DAG: float @_Z21__spirv_AtomicFMaxEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]]) + //CHECK: %[[RES:.*]] = atomicrmw fmax ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4 + //CHECK: ret float %[[RES]] + atm.fetch_min(1.f, order); + //CHECK-DAG: float @_Z21__spirv_AtomicFMinEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]]) + //CHECK: %[[RES:.*]] = atomicrmw fmin ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4 + //CHECK: ret float %[[RES]] + }); + }).wait_and_throw(); + } +} From 5331f3f8671ce55b7b986e8e90fe4a1c9495b834 Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Fri, 25 Oct 2024 08:49:17 +0100 Subject: [PATCH 3/5] Add comment --- llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp index 76acceaf0ef04..a1bc82155c68d 100644 --- a/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp @@ -38,6 +38,8 @@ PreservedAnalyses FAtomicsNativeCPU::run(Module &M, BasicBlock *BB = BasicBlock::Create(Ctx, "entry", &F); IRBuilder<> Builder(BB); + // Currently we drop arguments 1 and 2 (scope and memory ordering), + // defaulting to Monotonic ordering and System scope. auto A = Builder.CreateAtomicRMW(OpCode, F.getArg(0), F.getArg(3), MaybeAlign(), AtomicOrdering::Monotonic, SyncScope::System); From 20736d3cddad1b0fca86267efd33160da81e1d2e Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Mon, 28 Oct 2024 14:52:55 +0000 Subject: [PATCH 4/5] formatting --- llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h | 3 +-- llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h index db8f39112238a..dbe22c62b56f3 100644 --- a/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h +++ b/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h @@ -20,8 +20,7 @@ namespace llvm { class ModulePass; -class FAtomicsNativeCPU - : public PassInfoMixin { +class FAtomicsNativeCPU : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); }; diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index b0b720a940305..b30b6c41c2b99 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -11,8 +11,8 @@ // When NATIVECPU_USE_OCK is set, adds passes from the oneAPI Construction Kit. // //===----------------------------------------------------------------------===// -#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" #include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h" +#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" #include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/RenameKernelSYCLNativeCPU.h" From 08380228b79bd119e3c6adeaa7e3b1b4befa6e7d Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Tue, 29 Oct 2024 10:22:13 +0000 Subject: [PATCH 5/5] Update test --- clang/test/Preprocessor/sycl-macro-target-specific.cpp | 2 ++ llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp | 1 + sycl/test/check_device_code/native_cpu/fp_atomic.cpp | 1 + 3 files changed, 4 insertions(+) diff --git a/clang/test/Preprocessor/sycl-macro-target-specific.cpp b/clang/test/Preprocessor/sycl-macro-target-specific.cpp index 001df46104560..1d586b366469e 100644 --- a/clang/test/Preprocessor/sycl-macro-target-specific.cpp +++ b/clang/test/Preprocessor/sycl-macro-target-specific.cpp @@ -42,6 +42,8 @@ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s // RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple x86_64-unknown-linux-gnu -fsycl-is-native-cpu \ +// RUN: -E -dM | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s // CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS // CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS diff --git a/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp index a1bc82155c68d..e9043e21ec338 100644 --- a/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp @@ -24,6 +24,7 @@ PreservedAnalyses FAtomicsNativeCPU::run(Module &M, ModuleAnalysisManager &MAM) { bool ModuleChanged = false; auto &Ctx = M.getContext(); + // TODO: add checks for windows mangling for (auto &F : M) { AtomicRMWInst::BinOp OpCode; if (F.getName().starts_with("_Z21__spirv_AtomicFAddEXT")) { diff --git a/sycl/test/check_device_code/native_cpu/fp_atomic.cpp b/sycl/test/check_device_code/native_cpu/fp_atomic.cpp index 31bd18f78e102..d1abd7ec13f11 100644 --- a/sycl/test/check_device_code/native_cpu/fp_atomic.cpp +++ b/sycl/test/check_device_code/native_cpu/fp_atomic.cpp @@ -1,3 +1,4 @@ +// REQUIRES: linux // RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -S -emit-llvm -o %t_temp.ll %s // RUN: %clangxx -mllvm -sycl-native-cpu-backend -S -emit-llvm -o - %t_temp.ll | FileCheck %s #include