Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,21 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI,
return NewI;
}

// Translates the following intrinsics:
// %res = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
// %res = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
// To
// %mul = fmul <type> %a, <type> %b
// %res = fadd <type> %mul, <type> %c
// TODO: Remove when newer GPU driver is used in CI.
void translateFmuladd(CallInst *CI) {
assert(CI->getIntrinsicID() == Intrinsic::fmuladd);
IRBuilder<> Bld(CI);
auto *Mul = Bld.CreateFMul(CI->getOperand(0), CI->getOperand(1));
auto *Res = Bld.CreateFAdd(Mul, CI->getOperand(2));
CI->replaceAllUsesWith(Res);
}

// Translates an LLVM intrinsic to a form, digestable by the BE.
bool translateLLVMIntrinsic(CallInst *CI) {
Function *F = CI->getCalledFunction();
Expand All @@ -1245,6 +1260,9 @@ bool translateLLVMIntrinsic(CallInst *CI) {
// no translation - it will be simply removed.
// TODO: make use of 'assume' info in the BE
break;
case Intrinsic::fmuladd:
translateFmuladd(CI);
break;
default:
return false; // "intrinsic wasn't translated, keep the original call"
}
Expand Down
14 changes: 8 additions & 6 deletions llvm/test/SYCLLowerIR/ESIMD/lower_llvm_intrin.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s

; This test checks that LowerESIMD pass does not lower some llvm intrinsics
; which can now be handled by the VC BE.
; This test checks that LowerESIMD pass correctly lowers some llvm intrinsics
; which can't be handled by the VC BE.
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"

Expand All @@ -10,15 +10,17 @@ declare double @llvm.fmuladd.f64(double %x, double %y, double %z)

define spir_func float @test_fmuladd_f32(float %x, float %y, float %z) {
%1 = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul float %x, %y
; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd float %[[A]], %z
ret float %1
; CHECK: ret float %[[A]]
; CHECK: ret float %[[B]]
}

define spir_func double @test_fmuladd_f64(double %x, double %y, double %z) {
%1 = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul double %x, %y
; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd double %[[A]], %z
ret double %1
; CHECK: ret double %[[A]]
; CHECK: ret double %[[B]]
}

33 changes: 14 additions & 19 deletions sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,6 @@ template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_native_powr(__ESIMD_raw_vec_t(T, N), __ESIMD_raw_vec_t(T, N));

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead what if we SFINAE these to only work when N==1, so we use the compiler generated ones by default?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whatever, I'll deal with this later. Will approve/merge for now.

__spirv_ocl_fabs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_s_abs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_fmin(__ESIMD_raw_vec_t(T, N),
__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_fmax(__ESIMD_raw_vec_t(T, N),
__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
// saturation intrinsics
template <typename T0, typename T1, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
Expand Down Expand Up @@ -118,7 +101,15 @@ template <typename T0, typename T1, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
__esimd_sstrunc_sat(__ESIMD_raw_vec_t(T1, SZ) src) __ESIMD_INTRIN_END;

/// 3 kinds of max, the missing fmax uses spir-v intrinsics above
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_abs(__ESIMD_raw_vec_t(T, SZ) src0) __ESIMD_INTRIN_END;

/// 3 kinds of max
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_fmax(__ESIMD_raw_vec_t(T, SZ) src0,
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_umax(__ESIMD_raw_vec_t(T, SZ) src0,
Expand All @@ -128,7 +119,11 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_smax(__ESIMD_raw_vec_t(T, SZ) src0,
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;

/// 3 kinds of min, the missing fmin uses spir-v instrinsics above
/// 3 kinds of min
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_fmin(__ESIMD_raw_vec_t(T, SZ) src0,
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_umin(__ESIMD_raw_vec_t(T, SZ) src0,
Expand Down
14 changes: 5 additions & 9 deletions sycl/include/sycl/ext/intel/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,7 @@ namespace detail {
template <typename TRes, typename TArg, int SZ>
ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
__esimd_abs_common_internal(simd<TArg, SZ> src0) {
simd<TArg, SZ> Result;
if constexpr (detail::is_generic_floating_point_v<TArg>)
Result = simd<TArg, SZ>(__spirv_ocl_fabs<TArg, SZ>(src0.data()));
else
Result = simd<TArg, SZ>(__spirv_ocl_s_abs<TArg, SZ>(src0.data()));
simd<TArg, SZ> Result = simd<TArg, SZ>(__esimd_abs<TArg, SZ>(src0.data()));
return convert<TRes>(Result);
}

Expand Down Expand Up @@ -185,7 +181,7 @@ __ESIMD_API simd<T, SZ>(max)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;

if constexpr (std::is_floating_point<T>::value) {
auto Result = __spirv_ocl_fmax<T, SZ>(src0.data(), src1.data());
auto Result = __esimd_fmax<T, SZ>(src0.data(), src1.data());
if constexpr (is_sat)
Result = __esimd_sat<T, T, SZ>(Result);
return simd<T, SZ>(Result);
Expand Down Expand Up @@ -270,7 +266,7 @@ __ESIMD_API simd<T, SZ>(min)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;

if constexpr (std::is_floating_point<T>::value) {
auto Result = __spirv_ocl_fmin<T, SZ>(src0.data(), src1.data());
auto Result = __esimd_fmin<T, SZ>(src0.data(), src1.data());
if constexpr (is_sat)
Result = __esimd_sat<T, T, SZ>(Result);
return simd<T, SZ>(Result);
Expand Down Expand Up @@ -1466,7 +1462,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
template <typename... T>
simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
if constexpr (std::is_floating_point<T1>::value) {
return __spirv_ocl_fmax<T1, SZ>(v1.data(), v2.data());
return __esimd_fmax<T1, SZ>(v1.data(), v2.data());
} else if constexpr (std::is_unsigned<T1>::value) {
return __esimd_umax<T1, SZ>(v1.data(), v2.data());
} else {
Expand All @@ -1479,7 +1475,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
template <typename... T>
simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
if constexpr (std::is_floating_point<T1>::value) {
return __spirv_ocl_fmin<T1, SZ>(v1.data(), v2.data());
return __esimd_fmin<T1, SZ>(v1.data(), v2.data());
} else if constexpr (std::is_unsigned<T1>::value) {
return __esimd_umin<T1, SZ>(v1.data(), v2.data());
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,19 +112,8 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(sycl::half, N)

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_fma(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
__ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;
template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_popcount(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_ctz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_clz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;
__esimd_fmadd(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
__ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;

#undef __ESIMD_raw_vec_t
#undef __ESIMD_cpp_vec_t
Expand Down
20 changes: 16 additions & 4 deletions sycl/include/sycl/ext/intel/experimental/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ template <typename T, int N>
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
__ESIMD_NS::simd<T, N>>
popcount(__ESIMD_NS::simd<T, N> vec) {
return __spirv_ocl_popcount<T, N>(vec.data());
#ifdef __SYCL_DEVICE_ONLY__
return __spirv_ocl_popcount(vec.data());
#else
return vec;
#endif
}

/// Count the number of leading zeros.
Expand All @@ -44,7 +48,11 @@ template <typename T, int N>
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
__ESIMD_NS::simd<T, N>>
clz(__ESIMD_NS::simd<T, N> vec) {
return __spirv_ocl_clz<T, N>(vec.data());
#ifdef __SYCL_DEVICE_ONLY__
return __spirv_ocl_clz(vec.data());
#else
return vec;
#endif
}

/// Count the number of trailing zeros.
Expand All @@ -55,7 +63,11 @@ template <typename T, int N>
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
__ESIMD_NS::simd<T, N>>
ctz(__ESIMD_NS::simd<T, N> vec) {
return __spirv_ocl_ctz<T, N>(vec.data());
#ifdef __SYCL_DEVICE_ONLY__
return __spirv_ocl_ctz(vec.data());
#else
return vec;
#endif
}

/// @} sycl_esimd_bitmanip
Expand Down Expand Up @@ -740,7 +752,7 @@ ESIMD_INLINE __ESIMD_NS::simd<T, N> fma(__ESIMD_NS::simd<T, N> a,
static_assert(__ESIMD_DNS::is_generic_floating_point_v<T>,
"fma only supports floating point types");
using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
auto Ret = __spirv_ocl_fma<__ESIMD_DNS::__raw_t<CppT>, N>(
auto Ret = __esimd_fmadd<__ESIMD_DNS::__raw_t<CppT>, N>(
__ESIMD_DNS::convert_vector<CppT, T, N>(a.data()),
__ESIMD_DNS::convert_vector<CppT, T, N>(b.data()),
__ESIMD_DNS::convert_vector<CppT, T, N>(c.data()));
Expand Down
11 changes: 11 additions & 0 deletions sycl/test/regression/esimd/abs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<int, 8> call_abs_vec(sycl::vec<int, 8> input) {
return sycl::abs(input);
}

SYCL_EXTERNAL int call_abs_scalar(int input) { return sycl::abs(input); }
11 changes: 11 additions & 0 deletions sycl/test/regression/esimd/clz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<int, 8> call_clz_vec(sycl::vec<int, 8> input) {
return sycl::clz(input);
}

SYCL_EXTERNAL int call_clz_scalar(int input) { return sycl::clz(input); }
11 changes: 11 additions & 0 deletions sycl/test/regression/esimd/ctz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<int, 8> call_ctz_vec(sycl::vec<int, 8> input) {
return sycl::ctz(input);
}

SYCL_EXTERNAL int call_ctz_scalar(int input) { return sycl::ctz(input); }
11 changes: 11 additions & 0 deletions sycl/test/regression/esimd/fabs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<float, 8> call_fabs_vec(sycl::vec<float, 8> input) {
return sycl::fabs(input);
}

SYCL_EXTERNAL float call_fabs_scalar(float input) { return sycl::fabs(input); }
15 changes: 15 additions & 0 deletions sycl/test/regression/esimd/fma.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<float, 8> call_fma_vec(sycl::vec<float, 8> a,
sycl::vec<float, 8> b,
sycl::vec<float, 8> c) {
return sycl::fma(a, b, c);
}

SYCL_EXTERNAL float call_fma_scalar(float a, float b, float c) {
return sycl::fma(a, b, c);
}
14 changes: 14 additions & 0 deletions sycl/test/regression/esimd/fmax.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<float, 8> call_fmax_vec(sycl::vec<float, 8> a,
sycl::vec<float, 8> b) {
return sycl::fmax(a, b);
}

SYCL_EXTERNAL float call_fmax_scalar(float a, float b) {
return sycl::fmax(a, b);
}
14 changes: 14 additions & 0 deletions sycl/test/regression/esimd/fmin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<float, 8> call_fmin_vec(sycl::vec<float, 8> a,
sycl::vec<float, 8> b) {
return sycl::fmin(a, b);
}

SYCL_EXTERNAL float call_fmin_scalar(float a, float b) {
return sycl::fmin(a, b);
}
13 changes: 13 additions & 0 deletions sycl/test/regression/esimd/popcount.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<int, 8> call_popcount_vec(sycl::vec<int, 8> input) {
return sycl::popcount(input);
}

SYCL_EXTERNAL int call_popcount_scalar(int input) {
return sycl::popcount(input);
}