From 7c3eeeb0427e48cb3aa9c6eee995a4f51807e92a Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 26 Jul 2024 01:41:46 -0700 Subject: [PATCH 1/3] Add new regression tests --- sycl/test/regression/esimd/abs.cpp | 11 +++++++++++ sycl/test/regression/esimd/clz.cpp | 11 +++++++++++ sycl/test/regression/esimd/ctz.cpp | 11 +++++++++++ sycl/test/regression/esimd/fabs.cpp | 11 +++++++++++ sycl/test/regression/esimd/fma.cpp | 15 +++++++++++++++ sycl/test/regression/esimd/fmax.cpp | 14 ++++++++++++++ sycl/test/regression/esimd/fmin.cpp | 14 ++++++++++++++ sycl/test/regression/esimd/popcount.cpp | 13 +++++++++++++ 8 files changed, 100 insertions(+) create mode 100644 sycl/test/regression/esimd/abs.cpp create mode 100644 sycl/test/regression/esimd/clz.cpp create mode 100644 sycl/test/regression/esimd/ctz.cpp create mode 100644 sycl/test/regression/esimd/fabs.cpp create mode 100644 sycl/test/regression/esimd/fma.cpp create mode 100644 sycl/test/regression/esimd/fmax.cpp create mode 100644 sycl/test/regression/esimd/fmin.cpp create mode 100644 sycl/test/regression/esimd/popcount.cpp diff --git a/sycl/test/regression/esimd/abs.cpp b/sycl/test/regression/esimd/abs.cpp new file mode 100644 index 000000000000..6e267586b664 --- /dev/null +++ b/sycl/test/regression/esimd/abs.cpp @@ -0,0 +1,11 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +#include + +SYCL_EXTERNAL sycl::vec call_abs_vec(sycl::vec input) { + return sycl::abs(input); +} + +SYCL_EXTERNAL int call_abs_scalar(int input) { return sycl::abs(input); } diff --git a/sycl/test/regression/esimd/clz.cpp b/sycl/test/regression/esimd/clz.cpp new file mode 100644 index 000000000000..c3a4fcb4fa98 --- /dev/null +++ b/sycl/test/regression/esimd/clz.cpp @@ -0,0 +1,11 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +#include + +SYCL_EXTERNAL sycl::vec call_clz_vec(sycl::vec input) { + return sycl::clz(input); +} + +SYCL_EXTERNAL int call_clz_scalar(int input) { return sycl::clz(input); } diff --git a/sycl/test/regression/esimd/ctz.cpp b/sycl/test/regression/esimd/ctz.cpp new file mode 100644 index 000000000000..0acb565220e7 --- /dev/null +++ b/sycl/test/regression/esimd/ctz.cpp @@ -0,0 +1,11 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +#include + +SYCL_EXTERNAL sycl::vec call_ctz_vec(sycl::vec input) { + return sycl::ctz(input); +} + +SYCL_EXTERNAL int call_ctz_scalar(int input) { return sycl::ctz(input); } diff --git a/sycl/test/regression/esimd/fabs.cpp b/sycl/test/regression/esimd/fabs.cpp new file mode 100644 index 000000000000..d1e3a0133f0d --- /dev/null +++ b/sycl/test/regression/esimd/fabs.cpp @@ -0,0 +1,11 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +#include + +SYCL_EXTERNAL sycl::vec call_fabs_vec(sycl::vec input) { + return sycl::fabs(input); +} + +SYCL_EXTERNAL float call_fabs_scalar(float input) { return sycl::fabs(input); } diff --git a/sycl/test/regression/esimd/fma.cpp b/sycl/test/regression/esimd/fma.cpp new file mode 100644 index 000000000000..937d473fd4d8 --- /dev/null +++ b/sycl/test/regression/esimd/fma.cpp @@ -0,0 +1,15 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +#include + +SYCL_EXTERNAL sycl::vec call_fma_vec(sycl::vec a, + sycl::vec b, + sycl::vec c) { + return sycl::fma(a, b, c); +} + +SYCL_EXTERNAL float call_fma_scalar(float a, float b, float c) { + return sycl::fma(a, b, c); +} diff --git a/sycl/test/regression/esimd/fmax.cpp b/sycl/test/regression/esimd/fmax.cpp new file mode 100644 index 000000000000..874970957e45 --- /dev/null +++ b/sycl/test/regression/esimd/fmax.cpp @@ -0,0 +1,14 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +#include + +SYCL_EXTERNAL sycl::vec call_fmax_vec(sycl::vec a, + sycl::vec b) { + return sycl::fmax(a, b); +} + +SYCL_EXTERNAL float call_fmax_scalar(float a, float b) { + return sycl::fmax(a, b); +} diff --git a/sycl/test/regression/esimd/fmin.cpp b/sycl/test/regression/esimd/fmin.cpp new file mode 100644 index 000000000000..6c5556d23b91 --- /dev/null +++ b/sycl/test/regression/esimd/fmin.cpp @@ -0,0 +1,14 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +#include + +SYCL_EXTERNAL sycl::vec call_fmin_vec(sycl::vec a, + sycl::vec b) { + return sycl::fmin(a, b); +} + +SYCL_EXTERNAL float call_fmin_scalar(float a, float b) { + return sycl::fmin(a, b); +} diff --git a/sycl/test/regression/esimd/popcount.cpp b/sycl/test/regression/esimd/popcount.cpp new file mode 100644 index 000000000000..b2c61217883f --- /dev/null +++ b/sycl/test/regression/esimd/popcount.cpp @@ -0,0 +1,13 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +#include + +SYCL_EXTERNAL sycl::vec call_popcount_vec(sycl::vec input) { + return sycl::popcount(input); +} + +SYCL_EXTERNAL int call_popcount_scalar(int input) { + return sycl::popcount(input); +} From 4c34b744a5f9e59272aa34e7dee94a9a96460565 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 26 Jul 2024 02:06:37 -0700 Subject: [PATCH 2/3] Revert "[SYCL][ESIMD] Move fmax to SPIR-V intrinsic (#14020)" This reverts commit 1f1be9c642889b7c0fd045b073d411e544dc6007. --- .../sycl/ext/intel/esimd/detail/math_intrin.hpp | 10 +++++----- sycl/include/sycl/ext/intel/esimd/math.hpp | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp index b77bf6030db4..d8022f48a9a1 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp @@ -85,10 +85,6 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) __spirv_ocl_fmin(__ESIMD_raw_vec_t(T, N), __ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END; -template -__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) - __spirv_ocl_fmax(__ESIMD_raw_vec_t(T, N), - __ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END; // saturation intrinsics template __ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ) @@ -118,7 +114,11 @@ template __ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ) __esimd_sstrunc_sat(__ESIMD_raw_vec_t(T1, SZ) src) __ESIMD_INTRIN_END; -/// 3 kinds of max, the missing fmax uses spir-v intrinsics above +/// 3 kinds of max +template +__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ) + __esimd_fmax(__ESIMD_raw_vec_t(T, SZ) src0, + __ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END; template __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ) __esimd_umax(__ESIMD_raw_vec_t(T, SZ) src0, diff --git a/sycl/include/sycl/ext/intel/esimd/math.hpp b/sycl/include/sycl/ext/intel/esimd/math.hpp index 5a88a113bcca..67bcaace8067 100644 --- a/sycl/include/sycl/ext/intel/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/esimd/math.hpp @@ -185,7 +185,7 @@ __ESIMD_API simd(max)(simd src0, simd src1, Sat sat = {}) { constexpr bool is_sat = std::is_same_v; if constexpr (std::is_floating_point::value) { - auto Result = __spirv_ocl_fmax(src0.data(), src1.data()); + auto Result = __esimd_fmax(src0.data(), src1.data()); if constexpr (is_sat) Result = __esimd_sat(Result); return simd(Result); @@ -1466,7 +1466,7 @@ template struct esimd_apply_reduced_max { template simd operator()(simd v1, simd v2) { if constexpr (std::is_floating_point::value) { - return __spirv_ocl_fmax(v1.data(), v2.data()); + return __esimd_fmax(v1.data(), v2.data()); } else if constexpr (std::is_unsigned::value) { return __esimd_umax(v1.data(), v2.data()); } else { From 97abb43b0eecc543bb417ba7c5a0e2fe3a377e14 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 26 Jul 2024 02:40:38 -0700 Subject: [PATCH 3/3] Revert "[SYCL][ESIMD] Move a few math operations to SPIR-V intrinsics and support new functions (#13383)" This is a partial revert of bcca7a80adf50b04c0991ef48745353ac7829016. Notable changes: - new tests for `popcount`, `clz` and `ctz` built-ins were preserved - public definitions of those ESIMD APIs were preserved - the implementation of the latter was changed, though: - drop template args around `__spirv_ocl_*` intrinsics to use ones that are auto-declared by the compiler - added `#ifdef __SYCL_DEVICE_ONLY__`, because the compiler only declares `__spirv_ocl_*` intrinsics for device compilation --- llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp | 18 +++++++++++++++ .../SYCLLowerIR/ESIMD/lower_llvm_intrin.ll | 14 ++++++----- .../ext/intel/esimd/detail/math_intrin.hpp | 23 ++++++++----------- sycl/include/sycl/ext/intel/esimd/math.hpp | 10 +++----- .../experimental/esimd/detail/math_intrin.hpp | 15 ++---------- .../ext/intel/experimental/esimd/math.hpp | 20 ++++++++++++---- 6 files changed, 56 insertions(+), 44 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp index 06efee46c296..9506dcda2bcc 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp @@ -1234,6 +1234,21 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI, return NewI; } +// Translates the following intrinsics: +// %res = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +// %res = call double @llvm.fmuladd.f64(double %a, double %b, double %c) +// To +// %mul = fmul %a, %b +// %res = fadd %mul, %c +// TODO: Remove when newer GPU driver is used in CI. +void translateFmuladd(CallInst *CI) { + assert(CI->getIntrinsicID() == Intrinsic::fmuladd); + IRBuilder<> Bld(CI); + auto *Mul = Bld.CreateFMul(CI->getOperand(0), CI->getOperand(1)); + auto *Res = Bld.CreateFAdd(Mul, CI->getOperand(2)); + CI->replaceAllUsesWith(Res); +} + // Translates an LLVM intrinsic to a form, digestable by the BE. bool translateLLVMIntrinsic(CallInst *CI) { Function *F = CI->getCalledFunction(); @@ -1245,6 +1260,9 @@ bool translateLLVMIntrinsic(CallInst *CI) { // no translation - it will be simply removed. // TODO: make use of 'assume' info in the BE break; + case Intrinsic::fmuladd: + translateFmuladd(CI); + break; default: return false; // "intrinsic wasn't translated, keep the original call" } diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_llvm_intrin.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_llvm_intrin.ll index 5abbfc9f12c8..6f797c3b53bc 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_llvm_intrin.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/lower_llvm_intrin.ll @@ -1,7 +1,7 @@ ; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s -; This test checks that LowerESIMD pass does not lower some llvm intrinsics -; which can now be handled by the VC BE. +; This test checks that LowerESIMD pass correctly lowers some llvm intrinsics +; which can't be handled by the VC BE. target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" @@ -10,15 +10,17 @@ declare double @llvm.fmuladd.f64(double %x, double %y, double %z) define spir_func float @test_fmuladd_f32(float %x, float %y, float %z) { %1 = call float @llvm.fmuladd.f32(float %x, float %y, float %z) -; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call float @llvm.fmuladd.f32(float %x, float %y, float %z) +; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul float %x, %y +; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd float %[[A]], %z ret float %1 -; CHECK: ret float %[[A]] +; CHECK: ret float %[[B]] } define spir_func double @test_fmuladd_f64(double %x, double %y, double %z) { %1 = call double @llvm.fmuladd.f64(double %x, double %y, double %z) -; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call double @llvm.fmuladd.f64(double %x, double %y, double %z) +; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul double %x, %y +; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd double %[[A]], %z ret double %1 -; CHECK: ret double %[[A]] +; CHECK: ret double %[[B]] } diff --git a/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp index d8022f48a9a1..a283f5572e03 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp @@ -72,19 +72,6 @@ template __ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) __spirv_ocl_native_powr(__ESIMD_raw_vec_t(T, N), __ESIMD_raw_vec_t(T, N)); -template -__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) - __spirv_ocl_fabs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END; - -template -__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) - __spirv_ocl_s_abs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END; - -template -__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) - __spirv_ocl_fmin(__ESIMD_raw_vec_t(T, N), - __ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END; - // saturation intrinsics template __ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ) @@ -114,6 +101,10 @@ template __ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ) __esimd_sstrunc_sat(__ESIMD_raw_vec_t(T1, SZ) src) __ESIMD_INTRIN_END; +template +__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ) + __esimd_abs(__ESIMD_raw_vec_t(T, SZ) src0) __ESIMD_INTRIN_END; + /// 3 kinds of max template __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ) @@ -128,7 +119,11 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ) __esimd_smax(__ESIMD_raw_vec_t(T, SZ) src0, __ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END; -/// 3 kinds of min, the missing fmin uses spir-v instrinsics above +/// 3 kinds of min +template +__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ) + __esimd_fmin(__ESIMD_raw_vec_t(T, SZ) src0, + __ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END; template __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ) __esimd_umin(__ESIMD_raw_vec_t(T, SZ) src0, diff --git a/sycl/include/sycl/ext/intel/esimd/math.hpp b/sycl/include/sycl/ext/intel/esimd/math.hpp index 67bcaace8067..f6cc64eb2aae 100644 --- a/sycl/include/sycl/ext/intel/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/esimd/math.hpp @@ -97,11 +97,7 @@ namespace detail { template ESIMD_NODEBUG ESIMD_INLINE simd __esimd_abs_common_internal(simd src0) { - simd Result; - if constexpr (detail::is_generic_floating_point_v) - Result = simd(__spirv_ocl_fabs(src0.data())); - else - Result = simd(__spirv_ocl_s_abs(src0.data())); + simd Result = simd(__esimd_abs(src0.data())); return convert(Result); } @@ -270,7 +266,7 @@ __ESIMD_API simd(min)(simd src0, simd src1, Sat sat = {}) { constexpr bool is_sat = std::is_same_v; if constexpr (std::is_floating_point::value) { - auto Result = __spirv_ocl_fmin(src0.data(), src1.data()); + auto Result = __esimd_fmin(src0.data(), src1.data()); if constexpr (is_sat) Result = __esimd_sat(Result); return simd(Result); @@ -1479,7 +1475,7 @@ template struct esimd_apply_reduced_min { template simd operator()(simd v1, simd v2) { if constexpr (std::is_floating_point::value) { - return __spirv_ocl_fmin(v1.data(), v2.data()); + return __esimd_fmin(v1.data(), v2.data()); } else if constexpr (std::is_unsigned::value) { return __esimd_umin(v1.data(), v2.data()); } else { diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp index e76cca317e8d..d42afb54c2ef 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp @@ -112,19 +112,8 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(sycl::half, N) template __ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) - __spirv_ocl_fma(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b, - __ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END; -template -__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) - __spirv_ocl_popcount(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END; - -template -__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) - __spirv_ocl_ctz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END; - -template -__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) - __spirv_ocl_clz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END; + __esimd_fmadd(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b, + __ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END; #undef __ESIMD_raw_vec_t #undef __ESIMD_cpp_vec_t diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp index 99674732680a..f3759c269856 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp @@ -32,7 +32,11 @@ template __ESIMD_API std::enable_if_t && sizeof(T) < 8, __ESIMD_NS::simd> popcount(__ESIMD_NS::simd vec) { - return __spirv_ocl_popcount(vec.data()); +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv_ocl_popcount(vec.data()); +#else + return vec; +#endif } /// Count the number of leading zeros. @@ -44,7 +48,11 @@ template __ESIMD_API std::enable_if_t && sizeof(T) < 8, __ESIMD_NS::simd> clz(__ESIMD_NS::simd vec) { - return __spirv_ocl_clz(vec.data()); +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv_ocl_clz(vec.data()); +#else + return vec; +#endif } /// Count the number of trailing zeros. @@ -55,7 +63,11 @@ template __ESIMD_API std::enable_if_t && sizeof(T) < 8, __ESIMD_NS::simd> ctz(__ESIMD_NS::simd vec) { - return __spirv_ocl_ctz(vec.data()); +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv_ocl_ctz(vec.data()); +#else + return vec; +#endif } /// @} sycl_esimd_bitmanip @@ -740,7 +752,7 @@ ESIMD_INLINE __ESIMD_NS::simd fma(__ESIMD_NS::simd a, static_assert(__ESIMD_DNS::is_generic_floating_point_v, "fma only supports floating point types"); using CppT = __ESIMD_DNS::element_type_traits::EnclosingCppT; - auto Ret = __spirv_ocl_fma<__ESIMD_DNS::__raw_t, N>( + auto Ret = __esimd_fmadd<__ESIMD_DNS::__raw_t, N>( __ESIMD_DNS::convert_vector(a.data()), __ESIMD_DNS::convert_vector(b.data()), __ESIMD_DNS::convert_vector(c.data()));