Skip to content

Commit

Permalink
Test simd_for_each_n and fix it
Browse files Browse the repository at this point in the history
- use a signed integer for counting down the length (no need for modulo
arithmetic here)
- directly count down len with -= V::Size instead of using an extra
(incorrectly const) lenV variable
- the fallback to std::for_each_n is only available starting with C++17

Signed-off-by: Matthias Kretz <kretz@kde.org>
  • Loading branch information
mattkretz committed Oct 25, 2016
1 parent c4c342e commit 5b50cd9
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 64 deletions.
28 changes: 14 additions & 14 deletions common/algorithms.h
Expand Up @@ -147,20 +147,19 @@ inline enable_if<std::is_arithmetic<typename InputIt::value_type>::value &&
UnaryFunction>
simd_for_each_n(InputIt first, std::size_t count, UnaryFunction f)
{
std::size_t len = count;
typename std::make_signed<size_t>::type len = count;
typedef Vector<typename InputIt::value_type> V;
typedef Scalar::Vector<typename InputIt::value_type> V1;
for (; reinterpret_cast<std::uintptr_t>(std::addressof(*first)) &
(V::MemoryAlignment - 1) &&
len != 0;
(void) --len, ++first) {
(V::MemoryAlignment - 1) &&
len != 0;
--len, ++first) {
f(V1(std::addressof(*first), Vc::Aligned));
}
std::size_t const lenV = count - (V::Size + 1);
for (; lenV != 0; (void) --lenV, first += V::Size) {
for (; len >= int(V::Size); len -= V::Size, first += V::Size) {
f(V(std::addressof(*first), Vc::Aligned));
}
for (; len != 0; (void) --len, ++first) {
for (; len != 0; --len, ++first) {
f(V1(std::addressof(*first), Vc::Aligned));
}
return std::move(f);
Expand All @@ -173,37 +172,38 @@ inline enable_if<std::is_arithmetic<typename InputIt::value_type>::value &&
UnaryFunction>
simd_for_each_n(InputIt first, std::size_t count, UnaryFunction f)
{
std::size_t len = count;
typename std::make_signed<size_t>::type len = count;
typedef Vector<typename InputIt::value_type> V;
typedef Scalar::Vector<typename InputIt::value_type> V1;
for (; reinterpret_cast<std::uintptr_t>(std::addressof(*first)) &
(V::MemoryAlignment - 1) &&
len != 0;
(void) --len, ++first) {
(V::MemoryAlignment - 1) &&
len != 0;
--len, ++first) {
V1 tmp(std::addressof(*first), Vc::Aligned);
f(tmp);
tmp.store(std::addressof(*first), Vc::Aligned);
}
std::size_t const lenV = count - (V::Size + 1);
for (; lenV != 0; (void) --lenV, first += V::Size) {
for (; len >= int(V::Size); len -= V::Size, first += V::Size) {
V tmp(std::addressof(*first), Vc::Aligned);
f(tmp);
tmp.store(std::addressof(*first), Vc::Aligned);
}
for (; len != 0; (void) --len, ++first) {
for (; len != 0; --len, ++first) {
V1 tmp(std::addressof(*first), Vc::Aligned);
f(tmp);
tmp.store(std::addressof(*first), Vc::Aligned);
}
return std::move(f);
}

#ifdef Vc_CXX17
template <typename InputIt, typename UnaryFunction>
inline enable_if<!std::is_arithmetic<typename InputIt::value_type>::value, UnaryFunction>
simd_for_each_n(InputIt first, std::size_t count, UnaryFunction f)
{
return std::for_each_n(first, count, std::move(f));
}
#endif

} // namespace Vc

Expand Down
3 changes: 3 additions & 0 deletions include/Vc/global.h
Expand Up @@ -107,6 +107,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# error "Vc requires support for C++11."
#elif __cplusplus >= 201402L
# define Vc_CXX14 1
# if __cplusplus > 201700L
# define Vc_CXX17 1
# endif
#endif

#if defined(__GNUC__) && !defined(Vc_NO_INLINE_ASM)
Expand Down
115 changes: 65 additions & 50 deletions tests/stlcontainer.cpp
Expand Up @@ -104,57 +104,72 @@ TEST_TYPES(V, simdForEach, (ALL_VECTORS))
{
typedef typename V::EntryType T;
std::vector<T> data;
data.reserve(99);
for (int i = 0; i < 99; ++i) {
data.push_back(T(i));
}
T reference = 1;
int called_with_scalar = 0;
int called_with_V = 0;
int position = 1;
Vc::simd_for_each(std::next(data.begin()), data.end(), [&](auto &x) {
const auto ref = reference + x.IndexesFromZero();
COMPARE(ref, x);
reference += x.Size;
x += 1;
if (std::is_same<decltype(x), Vc::Scalar::Vector<T> &>::value) {
++called_with_scalar;
}
if (std::is_same<decltype(x), V &>::value) {
++called_with_V;
}
for (std::size_t i = 0; i < x.Size; ++i) {
data[position++] += T(2); // modify the container directly - if it is not
// undone by simd_for_each we have a bug
data.resize(99);

for (int variant = 0; variant < 2; ++variant) {
std::iota(data.begin(), data.end(), T(0));
T reference = 1;
int called_with_scalar = 0;
int called_with_V = 0;
int position = 1;

auto &&test1 = [&](auto &x) {
const auto ref = reference + x.IndexesFromZero();
COMPARE(ref, x);
reference += x.Size;
x += 1;
if (std::is_same<decltype(x), Vc::Scalar::Vector<T> &>::value) {
++called_with_scalar;
}
if (std::is_same<decltype(x), V &>::value) {
++called_with_V;
}
for (std::size_t i = 0; i < x.Size; ++i) {
data[position++] += T(2); // modify the container directly - if it is not
// undone by simd_for_each we have a bug
}
};
auto &&test2 = [&](auto x) {
const auto ref = reference + x.IndexesFromZero();
COMPARE(ref, x);
reference += x.Size;
x += 1;
for (std::size_t i = 0; i < x.Size; ++i) {
data[position++] += T(2); // modify the container directly - if it is
// undone by simd_for_each we have a bug
}
};
auto &&test3 = [&reference](auto x) {
const auto ref = reference + x.IndexesFromZero();
COMPARE(ref, x) << "if ref == x + 2 then simd_for_each wrote back the "
"closure argument, even though it should not have";
reference += x.Size;
};

auto &&for_each = [&](auto test) {
if (variant == 0) {
Vc::simd_for_each(std::next(data.begin()), data.end(), test);
} else {
Vc::simd_for_each_n(std::next(data.begin()), 98, test);
}
};
for_each(test1);
VERIFY(called_with_scalar > 0);
VERIFY(called_with_V > 0);
if (Vc::Scalar::is_vector<V>::value) {
// in this case called_with_V and called_with_scalar will have been
// incremented both on
// every call
COMPARE(called_with_V * V::Size + called_with_scalar, 2u * 98u);
} else {
COMPARE(called_with_V * V::Size + called_with_scalar, 98u);
}
});
VERIFY(called_with_scalar > 0);
VERIFY(called_with_V > 0);
if (std::is_same<V, Vc::Scalar::Vector<T>>::value) {
// in this case called_with_V and called_with_scalar will have been incremented both on
// every call
COMPARE(called_with_V * V::Size + called_with_scalar, 2u * 98u);
} else {
COMPARE(called_with_V * V::Size + called_with_scalar, 98u);
}

reference = 2;
position = 1;
Vc::simd_for_each(std::next(data.begin()), data.end(), [&](auto x) {
const auto ref = reference + x.IndexesFromZero();
COMPARE(ref, x);
reference += x.Size;
x += 1;
for (std::size_t i = 0; i < x.Size; ++i) {
data[position++] += T(2); // modify the container directly - if it is undone
// by simd_for_each we have a bug
}
});
reference = 4;
Vc::simd_for_each(std::next(data.begin()), data.end(), [&reference](auto x) {
const auto ref = reference + x.IndexesFromZero();
COMPARE(ref, x) << "if ref == x + 2 then simd_for_each wrote back the closure argument, even though it should not have";
reference += x.Size;
});
reference = 2;
position = 1;
for_each(test2);
reference = 4;
for_each(test3);
}
}
#endif

0 comments on commit 5b50cd9

Please sign in to comment.