intel · sarnex · Jul 29, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
@@ -1234,6 +1234,21 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI,
   return NewI;
 }
 
+// Translates the following intrinsics:
+//   %res = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+//   %res = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
+// To
+//   %mul = fmul <type> %a, <type> %b
+//   %res = fadd <type> %mul, <type> %c
+// TODO: Remove when newer GPU driver is used in CI.
+void translateFmuladd(CallInst *CI) {
+  assert(CI->getIntrinsicID() == Intrinsic::fmuladd);
+  IRBuilder<> Bld(CI);
+  auto *Mul = Bld.CreateFMul(CI->getOperand(0), CI->getOperand(1));
+  auto *Res = Bld.CreateFAdd(Mul, CI->getOperand(2));
+  CI->replaceAllUsesWith(Res);
+}
+
 // Translates an LLVM intrinsic to a form, digestable by the BE.
 bool translateLLVMIntrinsic(CallInst *CI) {
   Function *F = CI->getCalledFunction();
@@ -1245,6 +1260,9 @@ bool translateLLVMIntrinsic(CallInst *CI) {
     // no translation - it will be simply removed.
     // TODO: make use of 'assume' info in the BE
     break;
+  case Intrinsic::fmuladd:
+    translateFmuladd(CI);
+    break;
   default:
     return false; // "intrinsic wasn't translated, keep the original call"
   }

@@ -1,7 +1,7 @@
 ; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s
 
-; This test checks that LowerESIMD pass does not lower some llvm intrinsics
-; which can now be handled by the VC BE.
+; This test checks that LowerESIMD pass correctly lowers some llvm intrinsics
+; which can't be handled by the VC BE.
 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
 target triple = "spir64-unknown-unknown"
 
@@ -10,15 +10,17 @@ declare double @llvm.fmuladd.f64(double %x, double %y, double %z)
 
 define spir_func float @test_fmuladd_f32(float %x, float %y, float %z) {
   %1 = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
-; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
+; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul float %x, %y
+; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd float %[[A]], %z
   ret float %1
-; CHECK: ret float %[[A]]
+; CHECK: ret float %[[B]]
 }
 
 define spir_func double @test_fmuladd_f64(double %x, double %y, double %z) {
   %1 = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
-; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
+; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul double %x, %y
+; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd double %[[A]], %z
   ret double %1
-; CHECK: ret double %[[A]]
+; CHECK: ret double %[[B]]
 }
 
@@ -72,23 +72,6 @@ template <typename T, int N>
 __ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
     __spirv_ocl_native_powr(__ESIMD_raw_vec_t(T, N), __ESIMD_raw_vec_t(T, N));
 
-template <typename T, int N>
-__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
-    __spirv_ocl_fabs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
-
-template <typename T, int N>
-__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
-    __spirv_ocl_s_abs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
-
-template <typename T, int N>
-__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
-    __spirv_ocl_fmin(__ESIMD_raw_vec_t(T, N),
-                     __ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
-
-template <typename T, int N>
-__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
-    __spirv_ocl_fmax(__ESIMD_raw_vec_t(T, N),
-                     __ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
 // saturation intrinsics
 template <typename T0, typename T1, int SZ>
 __ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
@@ -118,7 +101,15 @@ template <typename T0, typename T1, int SZ>
 __ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
     __esimd_sstrunc_sat(__ESIMD_raw_vec_t(T1, SZ) src) __ESIMD_INTRIN_END;
 
-/// 3 kinds of max, the missing fmax uses spir-v intrinsics above
+template <typename T, int SZ>
+__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
+    __esimd_abs(__ESIMD_raw_vec_t(T, SZ) src0) __ESIMD_INTRIN_END;
+
+/// 3 kinds of max
+template <typename T, int SZ>
+__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
+    __esimd_fmax(__ESIMD_raw_vec_t(T, SZ) src0,
+                 __ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
 template <typename T, int SZ>
 __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
     __esimd_umax(__ESIMD_raw_vec_t(T, SZ) src0,
@@ -128,7 +119,11 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
     __esimd_smax(__ESIMD_raw_vec_t(T, SZ) src0,
                  __ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
 
-/// 3 kinds of min, the missing fmin uses spir-v instrinsics above
+/// 3 kinds of min
+template <typename T, int SZ>
+__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
+    __esimd_fmin(__ESIMD_raw_vec_t(T, SZ) src0,
+                 __ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
 template <typename T, int SZ>
 __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
     __esimd_umin(__ESIMD_raw_vec_t(T, SZ) src0,

@@ -97,11 +97,7 @@ namespace detail {
 template <typename TRes, typename TArg, int SZ>
 ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
 __esimd_abs_common_internal(simd<TArg, SZ> src0) {
-  simd<TArg, SZ> Result;
-  if constexpr (detail::is_generic_floating_point_v<TArg>)
-    Result = simd<TArg, SZ>(__spirv_ocl_fabs<TArg, SZ>(src0.data()));
-  else
-    Result = simd<TArg, SZ>(__spirv_ocl_s_abs<TArg, SZ>(src0.data()));
+  simd<TArg, SZ> Result = simd<TArg, SZ>(__esimd_abs<TArg, SZ>(src0.data()));
   return convert<TRes>(Result);
 }
 
@@ -185,7 +181,7 @@ __ESIMD_API simd<T, SZ>(max)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
   constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
 
   if constexpr (std::is_floating_point<T>::value) {
-    auto Result = __spirv_ocl_fmax<T, SZ>(src0.data(), src1.data());
+    auto Result = __esimd_fmax<T, SZ>(src0.data(), src1.data());
     if constexpr (is_sat)
       Result = __esimd_sat<T, T, SZ>(Result);
     return simd<T, SZ>(Result);
@@ -270,7 +266,7 @@ __ESIMD_API simd<T, SZ>(min)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
   constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
 
   if constexpr (std::is_floating_point<T>::value) {
-    auto Result = __spirv_ocl_fmin<T, SZ>(src0.data(), src1.data());
+    auto Result = __esimd_fmin<T, SZ>(src0.data(), src1.data());
     if constexpr (is_sat)
       Result = __esimd_sat<T, T, SZ>(Result);
     return simd<T, SZ>(Result);
@@ -1466,7 +1462,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
   template <typename... T>
   simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
     if constexpr (std::is_floating_point<T1>::value) {
-      return __spirv_ocl_fmax<T1, SZ>(v1.data(), v2.data());
+      return __esimd_fmax<T1, SZ>(v1.data(), v2.data());
     } else if constexpr (std::is_unsigned<T1>::value) {
       return __esimd_umax<T1, SZ>(v1.data(), v2.data());
     } else {
@@ -1479,7 +1475,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
   template <typename... T>
   simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
     if constexpr (std::is_floating_point<T1>::value) {
-      return __spirv_ocl_fmin<T1, SZ>(v1.data(), v2.data());
+      return __esimd_fmin<T1, SZ>(v1.data(), v2.data());
     } else if constexpr (std::is_unsigned<T1>::value) {
       return __esimd_umin<T1, SZ>(v1.data(), v2.data());
     } else {

@@ -112,19 +112,8 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(sycl::half, N)
 
 template <typename T, int N>
 __ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
-    __spirv_ocl_fma(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
-                    __ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;
-template <typename T, int N>
-__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
-    __spirv_ocl_popcount(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;
-
-template <typename T, int N>
-__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
-    __spirv_ocl_ctz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;
-
-template <typename T, int N>
-__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
-    __spirv_ocl_clz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;
+    __esimd_fmadd(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
+                  __ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;
 
 #undef __ESIMD_raw_vec_t
 #undef __ESIMD_cpp_vec_t

@@ -32,7 +32,11 @@ template <typename T, int N>
 __ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
                              __ESIMD_NS::simd<T, N>>
 popcount(__ESIMD_NS::simd<T, N> vec) {
-  return __spirv_ocl_popcount<T, N>(vec.data());
+#ifdef __SYCL_DEVICE_ONLY__
+  return __spirv_ocl_popcount(vec.data());
+#else
+  return vec;
+#endif
 }
 
 /// Count the number of leading zeros.
@@ -44,7 +48,11 @@ template <typename T, int N>
 __ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
                              __ESIMD_NS::simd<T, N>>
 clz(__ESIMD_NS::simd<T, N> vec) {
-  return __spirv_ocl_clz<T, N>(vec.data());
+#ifdef __SYCL_DEVICE_ONLY__
+  return __spirv_ocl_clz(vec.data());
+#else
+  return vec;
+#endif
 }
 
 /// Count the number of trailing zeros.
@@ -55,7 +63,11 @@ template <typename T, int N>
 __ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
                              __ESIMD_NS::simd<T, N>>
 ctz(__ESIMD_NS::simd<T, N> vec) {
-  return __spirv_ocl_ctz<T, N>(vec.data());
+#ifdef __SYCL_DEVICE_ONLY__
+  return __spirv_ocl_ctz(vec.data());
+#else
+  return vec;
+#endif
 }
 
 /// @} sycl_esimd_bitmanip
@@ -740,7 +752,7 @@ ESIMD_INLINE __ESIMD_NS::simd<T, N> fma(__ESIMD_NS::simd<T, N> a,
   static_assert(__ESIMD_DNS::is_generic_floating_point_v<T>,
                 "fma only supports floating point types");
   using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
-  auto Ret = __spirv_ocl_fma<__ESIMD_DNS::__raw_t<CppT>, N>(
+  auto Ret = __esimd_fmadd<__ESIMD_DNS::__raw_t<CppT>, N>(
       __ESIMD_DNS::convert_vector<CppT, T, N>(a.data()),
       __ESIMD_DNS::convert_vector<CppT, T, N>(b.data()),
       __ESIMD_DNS::convert_vector<CppT, T, N>(c.data()));

@@ -0,0 +1,11 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s
+
+#include <sycl/sycl.hpp>
+
+#include <sycl/ext/intel/esimd.hpp>
+
+SYCL_EXTERNAL sycl::vec<int, 8> call_abs_vec(sycl::vec<int, 8> input) {
+  return sycl::abs(input);
+}
+
+SYCL_EXTERNAL int call_abs_scalar(int input) { return sycl::abs(input); }
@@ -0,0 +1,11 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s
+
+#include <sycl/sycl.hpp>
+
+#include <sycl/ext/intel/esimd.hpp>
+
+SYCL_EXTERNAL sycl::vec<int, 8> call_clz_vec(sycl::vec<int, 8> input) {
+  return sycl::clz(input);
+}
+
+SYCL_EXTERNAL int call_clz_scalar(int input) { return sycl::clz(input); }
@@ -0,0 +1,11 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s
+
+#include <sycl/sycl.hpp>
+
+#include <sycl/ext/intel/esimd.hpp>
+
+SYCL_EXTERNAL sycl::vec<int, 8> call_ctz_vec(sycl::vec<int, 8> input) {
+  return sycl::ctz(input);
+}
+
+SYCL_EXTERNAL int call_ctz_scalar(int input) { return sycl::ctz(input); }
@@ -0,0 +1,11 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s
+
+#include <sycl/sycl.hpp>
+
+#include <sycl/ext/intel/esimd.hpp>
+
+SYCL_EXTERNAL sycl::vec<float, 8> call_fabs_vec(sycl::vec<float, 8> input) {
+  return sycl::fabs(input);
+}
+
+SYCL_EXTERNAL float call_fabs_scalar(float input) { return sycl::fabs(input); }
@@ -0,0 +1,15 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s
+
+#include <sycl/sycl.hpp>
+
+#include <sycl/ext/intel/esimd.hpp>
+
+SYCL_EXTERNAL sycl::vec<float, 8> call_fma_vec(sycl::vec<float, 8> a,
+                                               sycl::vec<float, 8> b,
+                                               sycl::vec<float, 8> c) {
+  return sycl::fma(a, b, c);
+}
+
+SYCL_EXTERNAL float call_fma_scalar(float a, float b, float c) {
+  return sycl::fma(a, b, c);
+}
@@ -0,0 +1,14 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s
+
+#include <sycl/sycl.hpp>
+
+#include <sycl/ext/intel/esimd.hpp>
+
+SYCL_EXTERNAL sycl::vec<float, 8> call_fmax_vec(sycl::vec<float, 8> a,
+                                                sycl::vec<float, 8> b) {
+  return sycl::fmax(a, b);
+}
+
+SYCL_EXTERNAL float call_fmax_scalar(float a, float b) {
+  return sycl::fmax(a, b);
+}
@@ -0,0 +1,14 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s
+
+#include <sycl/sycl.hpp>
+
+#include <sycl/ext/intel/esimd.hpp>
+
+SYCL_EXTERNAL sycl::vec<float, 8> call_fmin_vec(sycl::vec<float, 8> a,
+                                                sycl::vec<float, 8> b) {
+  return sycl::fmin(a, b);
+}
+
+SYCL_EXTERNAL float call_fmin_scalar(float a, float b) {
+  return sycl::fmin(a, b);
+}
@@ -0,0 +1,13 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s
+
+#include <sycl/sycl.hpp>
+
+#include <sycl/ext/intel/esimd.hpp>
+
+SYCL_EXTERNAL sycl::vec<int, 8> call_popcount_vec(sycl::vec<int, 8> input) {
+  return sycl::popcount(input);
+}
+
+SYCL_EXTERNAL int call_popcount_scalar(int input) {
+  return sycl::popcount(input);
+}