Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1237,21 +1237,6 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI,
return NewI;
}

// Translates the following intrinsics:
// %res = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
// %res = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
// To
// %mul = fmul <type> %a, <type> %b
// %res = fadd <type> %mul, <type> %c
// TODO: Remove when newer GPU driver is used in CI.
void translateFmuladd(CallInst *CI) {
assert(CI->getIntrinsicID() == Intrinsic::fmuladd);
IRBuilder<> Bld(CI);
auto *Mul = Bld.CreateFMul(CI->getOperand(0), CI->getOperand(1));
auto *Res = Bld.CreateFAdd(Mul, CI->getOperand(2));
CI->replaceAllUsesWith(Res);
}

// Translates an LLVM intrinsic to a form, digestable by the BE.
bool translateLLVMIntrinsic(CallInst *CI) {
Function *F = CI->getCalledFunction();
Expand All @@ -1263,9 +1248,6 @@ bool translateLLVMIntrinsic(CallInst *CI) {
// no translation - it will be simply removed.
// TODO: make use of 'assume' info in the BE
break;
case Intrinsic::fmuladd:
translateFmuladd(CI);
break;
default:
return false; // "intrinsic wasn't translated, keep the original call"
}
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/SYCLLowerIR/ESIMD/lower_llvm_intrin.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s

; This test checks that LowerESIMD pass correctly lowers some llvm intrinsics
; which can't be handled by the VC BE.
; This test checks that LowerESIMD pass does not lower some llvm intrinsics
; which can now be handled by the VC BE.
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"

Expand All @@ -10,17 +10,15 @@ declare double @llvm.fmuladd.f64(double %x, double %y, double %z)

define spir_func float @test_fmuladd_f32(float %x, float %y, float %z) {
%1 = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul float %x, %y
; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd float %[[A]], %z
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
ret float %1
; CHECK: ret float %[[B]]
; CHECK: ret float %[[A]]
}

define spir_func double @test_fmuladd_f64(double %x, double %y, double %z) {
%1 = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul double %x, %y
; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd double %[[A]], %z
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
ret double %1
; CHECK: ret double %[[B]]
; CHECK: ret double %[[A]]
}

23 changes: 14 additions & 9 deletions sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,19 @@ template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_native_powr(__ESIMD_raw_vec_t(T, N), __ESIMD_raw_vec_t(T, N));

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_fabs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_s_abs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_fmin(__ESIMD_raw_vec_t(T, N),
__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;

// saturation intrinsics
template <typename T0, typename T1, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
Expand Down Expand Up @@ -101,10 +114,6 @@ template <typename T0, typename T1, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
__esimd_sstrunc_sat(__ESIMD_raw_vec_t(T1, SZ) src) __ESIMD_INTRIN_END;

template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_abs(__ESIMD_raw_vec_t(T, SZ) src0) __ESIMD_INTRIN_END;

/// 3 kinds of max
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
Expand All @@ -119,11 +128,7 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_smax(__ESIMD_raw_vec_t(T, SZ) src0,
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;

/// 3 kinds of min
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_fmin(__ESIMD_raw_vec_t(T, SZ) src0,
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
/// 3 kinds of min, the missing fmin uses spir-v instrinsics above
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_umin(__ESIMD_raw_vec_t(T, SZ) src0,
Expand Down
10 changes: 7 additions & 3 deletions sycl/include/sycl/ext/intel/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,11 @@ namespace detail {
template <typename TRes, typename TArg, int SZ>
ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
__esimd_abs_common_internal(simd<TArg, SZ> src0) {
simd<TArg, SZ> Result = simd<TArg, SZ>(__esimd_abs<TArg, SZ>(src0.data()));
simd<TArg, SZ> Result;
if constexpr (detail::is_generic_floating_point_v<TArg>)
Result = simd<TArg, SZ>(__spirv_ocl_fabs<TArg, SZ>(src0.data()));
else
Result = simd<TArg, SZ>(__spirv_ocl_s_abs<TArg, SZ>(src0.data()));
return convert<TRes>(Result);
}

Expand Down Expand Up @@ -266,7 +270,7 @@ __ESIMD_API simd<T, SZ>(min)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;

if constexpr (std::is_floating_point<T>::value) {
auto Result = __esimd_fmin<T, SZ>(src0.data(), src1.data());
auto Result = __spirv_ocl_fmin<T, SZ>(src0.data(), src1.data());
if constexpr (is_sat)
Result = __esimd_sat<T, T, SZ>(Result);
return simd<T, SZ>(Result);
Expand Down Expand Up @@ -1475,7 +1479,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
template <typename... T>
simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
if constexpr (std::is_floating_point<T1>::value) {
return __esimd_fmin<T1, SZ>(v1.data(), v2.data());
return __spirv_ocl_fmin<T1, SZ>(v1.data(), v2.data());
} else if constexpr (std::is_unsigned<T1>::value) {
return __esimd_umin<T1, SZ>(v1.data(), v2.data());
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,19 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(sycl::half, N)

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__esimd_fmadd(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
__ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;
__spirv_ocl_fma(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
__ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;
template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_popcount(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_ctz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_clz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;

#undef __ESIMD_raw_vec_t
#undef __ESIMD_cpp_vec_t
Expand Down
36 changes: 35 additions & 1 deletion sycl/include/sycl/ext/intel/experimental/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,40 @@ __ESIMD_API std::enable_if_t<
return __ESIMD_NS::ror<T0, T1, T2>(src0, src1);
}

/// Count the number of 1-bits.
/// @tparam T element type.
/// @tparam N vector length.
/// @return the popcounted vector.
template <typename T, int N>
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
__ESIMD_NS::simd<T, N>>
popcount(__ESIMD_NS::simd<T, N> vec) {
return __spirv_ocl_popcount<T, N>(vec.data());
}

/// Count the number of leading zeros.
/// If the input is 0, the number of total bits is returned.
/// @tparam T element type.
/// @tparam N vector length.
/// @return vector with number of leading zeros of the input vector.
template <typename T, int N>
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
__ESIMD_NS::simd<T, N>>
clz(__ESIMD_NS::simd<T, N> vec) {
return __spirv_ocl_clz<T, N>(vec.data());
}

/// Count the number of trailing zeros.
/// @tparam T element type.
/// @tparam N vector length.
/// @return vector with number of trailing zeros of the input vector.
template <typename T, int N>
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
__ESIMD_NS::simd<T, N>>
ctz(__ESIMD_NS::simd<T, N> vec) {
return __spirv_ocl_ctz<T, N>(vec.data());
}

/// @} sycl_esimd_bitmanip

/// @addtogroup sycl_esimd_math
Expand Down Expand Up @@ -1671,7 +1705,7 @@ ESIMD_INLINE __ESIMD_NS::simd<T, N> fma(__ESIMD_NS::simd<T, N> a,
static_assert(__ESIMD_DNS::is_generic_floating_point_v<T>,
"fma only supports floating point types");
using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
auto Ret = __esimd_fmadd<__ESIMD_DNS::__raw_t<CppT>, N>(
auto Ret = __spirv_ocl_fma<__ESIMD_DNS::__raw_t<CppT>, N>(
__ESIMD_DNS::convert_vector<CppT, T, N>(a.data()),
__ESIMD_DNS::convert_vector<CppT, T, N>(b.data()),
__ESIMD_DNS::convert_vector<CppT, T, N>(c.data()));
Expand Down
116 changes: 116 additions & 0 deletions sycl/test-e2e/ESIMD/clz_ctz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
//==---------------- clz_ctz.cpp - DPC++ ESIMD on-device test -------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
// RUN: %{run} %t.out

#include "esimd_test_utils.hpp"

#include <iostream>
#include <sycl/detail/core.hpp>
#include <sycl/ext/intel/esimd.hpp>

using namespace sycl;

template <typename T, bool CLZ> bool test(queue &q) {
std::cout << "Running " << (CLZ ? "CLZ " : "CTZ ")
<< esimd_test::type_name<T>() << std::endl;
constexpr unsigned VL = 16;
constexpr unsigned Size = 1024 * 128;

T *A = new T[Size];
T *B = new T[Size];

for (unsigned i = 0; i < Size; ++i) {
A[i] = i;
B[i] = 0;
}

try {
buffer<T, 1> bufa(A, range<1>(Size));
buffer<T, 1> bufb(B, range<1>(Size));

// We need that many workgroups
range<1> GlobalRange{Size / VL};

// We need that many threads in each group
range<1> LocalRange{1};

queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());

auto e = q.submit([&](handler &cgh) {
auto PA = bufa.template get_access<access::mode::read>(cgh);
auto PB = bufb.template get_access<access::mode::write>(cgh);
cgh.parallel_for(GlobalRange * LocalRange,
[=](id<1> i) SYCL_ESIMD_KERNEL {
using namespace sycl::ext::intel::esimd;
unsigned int offset = i * VL * sizeof(T);
simd<T, VL> va;
va.copy_from(PA, offset);
simd<T, VL> vb;
if constexpr (CLZ)
vb = __ESIMD_ENS::clz(va);
else
vb = __ESIMD_ENS::ctz(va);
vb.copy_to(PB, offset);
});
});
e.wait();
} catch (sycl::exception const &e) {
std::cout << "SYCL exception caught: " << e.what() << '\n';

delete[] A;
delete[] B;
return false;
}

int err_cnt = 0;

for (unsigned i = 0; i < Size; ++i) {
int Expected =
CLZ ? (i == 0 ? sizeof(T) * 8 : __builtin_clz(i)) : __builtin_ctz(i);
int Computed = B[i];
if (Expected != Computed && ++err_cnt < 10)
std::cout << "Failure at " << std::to_string(i)
<< ": Expected: " << std::to_string(Expected)
<< " Computed: " << std::to_string(Computed) << std::endl;
}
if (err_cnt > 0) {
std::cout << " pass rate: "
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
<< (Size - err_cnt) << "/" << Size << ")\n";
}

delete[] A;
delete[] B;

std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
return err_cnt == 0;
}

int main() {
bool Passed = true;
queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
esimd_test::printTestLabel(q);
Passed &= test<uint32_t, true>(q);
Passed &= test<int32_t, true>(q);
Passed &= test<uint32_t, false>(q);
Passed &= test<int32_t, false>(q);
// TODO: Enable once GPU driver issue is fixed
#if 0
Passed &= test<uint8_t, true>(q);
Passed &= test<int8_t, true>(q);
Passed &= test<uint8_t, false>(q);
Passed &= test<int8_t, false>(q);

Passed &= test<uint16_t, true>(q);
Passed &= test<int16_t, true>(q);
Passed &= test<uint16_t, false>(q);
Passed &= test<int16_t, false>(q);
#endif
return !Passed;
}
Loading