Skip to content

Commit

Permalink
ICC17: Work around alignas and constexpr bugs
Browse files Browse the repository at this point in the history
* Use of the nextPowerOfTwo constexpr function in an alignas argument
failed compilation. As a workaround refactor the function to a trait.
* alignas on the class declaration is ignored by ICC17. Therefore, move
the alignas attribute down onto the first non-static data member.
* Only GCC implements an upper bound on over-alignment, so abstract that
into the BoundedAlignment trait.
* The change to nextPowerOfTwo required a subsequent change to left_size
and right_size: the function argument must now be a template parameter.

Signed-off-by: Matthias Kretz <kretz@kde.org>
  • Loading branch information
mattkretz committed Sep 15, 2016
1 parent 1ab1612 commit 15aa101
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 58 deletions.
58 changes: 32 additions & 26 deletions common/simdarray.h
Expand Up @@ -78,9 +78,7 @@ inline SimdArray<T, N, V, M> max(const SimdArray<T, N, V, M> &x,
* member to which all functions are forwarded more or less directly.
*/
template <typename T, std::size_t N, typename VectorType_>
class alignas(
((Common::nextPowerOfTwo(N) * (sizeof(VectorType_) / VectorType_::size()) - 1) & 127) +
1) SimdArray<T, N, VectorType_, N>
class SimdArray<T, N, VectorType_, N>
{
static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
std::is_same<T, int32_t>::value ||
Expand Down Expand Up @@ -452,7 +450,12 @@ class alignas(
Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));

private:
storage_type data;
// The alignas attribute attached to the class declaration above is ignored by ICC
// 17.0.0 (at least). So just move the alignas attribute down here where it works for
// all compilers.
alignas(Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value *
sizeof(VectorType_) / VectorType_::size()>::value)
storage_type data;
};
template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
template <typename T, std::size_t N, typename VectorType>
Expand Down Expand Up @@ -535,9 +538,7 @@ inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
*
* \headerfile simdarray.h <Vc/SimdArray>
*/
template <typename T, size_t N, typename V, size_t Wt>
class alignas(((Common::nextPowerOfTwo(N) * (sizeof(V) / V::size()) - 1) & 127) +
1) SimdArray
template <typename T, size_t N, typename V, size_t Wt> class SimdArray
{
static_assert(std::is_same<T, double>::value ||
std::is_same<T, float>::value ||
Expand Down Expand Up @@ -1244,7 +1245,8 @@ public:
///\internal
Vc_INTRINSIC SimdArray sortedImpl(std::false_type) const
{
using SortableArray = SimdArray<value_type, Common::nextPowerOfTwo(size())>;
using SortableArray =
SimdArray<value_type, Common::NextPowerOfTwo<size()>::value>;
auto sortable = simd_cast<SortableArray>(*this);
for (std::size_t i = Size; i < SortableArray::Size; ++i) {
using limits = std::numeric_limits<value_type>;
Expand Down Expand Up @@ -1326,7 +1328,11 @@ public:
Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));

private: //{{{2
storage_type0 data0;
// The alignas attribute attached to the class declaration above is ignored by ICC
// 17.0.0 (at least). So just move the alignas attribute down here where it works for
// all compilers.
alignas(Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
V::size()>::value) storage_type0 data0;
storage_type1 data1;
};
#undef Vc_CURRENT_CLASS_NAME
Expand Down Expand Up @@ -1855,7 +1861,7 @@ Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &..
!Traits::isAtomic##SimdArrayType_<Return>::value && \
!Traits::is##SimdArrayType_<From>::value && \
Traits::is_simd_##trait_name_<From>::value && \
Common::left_size(Return::Size) < \
Common::left_size<Return::Size>() < \
From::Size * (1 + sizeof...(Froms)) && \
are_all_types_equal<From, Froms...>::value), \
Return> \
Expand All @@ -1872,7 +1878,7 @@ Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &..
!Traits::isAtomic##SimdArrayType_<Return>::value && \
!Traits::is##SimdArrayType_<From>::value && \
Traits::is_simd_##trait_name_<From>::value && \
Common::left_size(Return::Size) >= \
Common::left_size<Return::Size>() >= \
From::Size * (1 + sizeof...(Froms)) && \
are_all_types_equal<From, Froms...>::value), \
Return> \
Expand Down Expand Up @@ -1908,7 +1914,7 @@ Vc_SIMDARRAY_CASTS(SimdMaskArray, mask);
!Traits::isAtomic##SimdArrayType_<Return>::value && \
!Traits::is##SimdArrayType_<From>::value && \
Traits::is_simd_##trait_name_<From>::value && \
Return::Size * offset + Common::left_size(Return::Size) < From::Size), \
Return::Size * offset + Common::left_size<Return::Size>() < From::Size), \
Return> \
simd_cast(From x) \
{ \
Expand All @@ -1929,7 +1935,7 @@ Vc_SIMDARRAY_CASTS(SimdMaskArray, mask);
!Traits::isAtomic##SimdArrayType_<Return>::value && \
!Traits::is##SimdArrayType_<From>::value && \
Traits::is_simd_##trait_name_<From>::value && \
Return::Size * offset + Common::left_size(Return::Size) >= From::Size), \
Return::Size * offset + Common::left_size<Return::Size>() >= From::Size), \
Return> \
simd_cast(From x) \
{ \
Expand Down Expand Up @@ -2074,38 +2080,38 @@ Vc_SIMDARRAY_CASTS(SimdMaskArray);
template <typename Return, int offset, typename T, std::size_t N, typename V, \
std::size_t M> \
Vc_INTRINSIC Vc_CONST \
enable_if<(N != M && offset * Return::Size >= Common::left_size(N) && \
offset != 0 && Common::left_size(N) % Return::Size == 0), \
enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
offset != 0 && Common::left_size<N>() % Return::Size == 0), \
Return> \
simd_cast(const SimdArrayType_<T, N, V, M> &x) \
{ \
vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
return simd_cast<Return, offset - Common::left_size(N) / Return::Size>( \
return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
internal_data1(x)); \
} \
/* same as above except for odd cases where offset * Return::Size doesn't fit the \
* left side of the SimdArray */ \
template <typename Return, int offset, typename T, std::size_t N, typename V, \
std::size_t M> \
Vc_INTRINSIC Vc_CONST \
enable_if<(N != M && offset * Return::Size >= Common::left_size(N) && \
offset != 0 && Common::left_size(N) % Return::Size != 0), \
enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
offset != 0 && Common::left_size<N>() % Return::Size != 0), \
Return> \
simd_cast(const SimdArrayType_<T, N, V, M> &x) \
{ \
vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
return simd_cast_with_offset<Return, \
offset * Return::Size - Common::left_size(N)>( \
offset * Return::Size - Common::left_size<N>()>( \
internal_data1(x)); \
} \
/* convert from left member of SimdArray */ \
template <typename Return, int offset, typename T, std::size_t N, typename V, \
std::size_t M> \
Vc_INTRINSIC Vc_CONST \
enable_if<(N != M && /*offset * Return::Size < Common::left_size(N) &&*/ \
offset != 0 && (offset + 1) * Return::Size <= Common::left_size(N)), \
Return> \
simd_cast(const SimdArrayType_<T, N, V, M> &x) \
Vc_INTRINSIC Vc_CONST enable_if< \
(N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \
offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
Return> \
simd_cast(const SimdArrayType_<T, N, V, M> &x) \
{ \
vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
return simd_cast<Return, offset>(internal_data0(x)); \
Expand All @@ -2114,8 +2120,8 @@ Vc_SIMDARRAY_CASTS(SimdMaskArray);
template <typename Return, int offset, typename T, std::size_t N, typename V, \
std::size_t M> \
Vc_INTRINSIC Vc_CONST \
enable_if<(N != M && (offset * Return::Size < Common::left_size(N)) && \
offset != 0 && (offset + 1) * Return::Size > Common::left_size(N)), \
enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
Return> \
simd_cast(const SimdArrayType_<T, N, V, M> &x) \
{ \
Expand Down
4 changes: 2 additions & 2 deletions common/simdarrayfwd.h
Expand Up @@ -129,8 +129,8 @@ class SimdMaskArray;
* types.
*/
template <typename T, std::size_t N> struct SimdArrayTraits {
static constexpr std::size_t N0 = Common::left_size(N);
static constexpr std::size_t N1 = Common::right_size(N);
static constexpr std::size_t N0 = Common::left_size<N>();
static constexpr std::size_t N1 = Common::right_size<N>();

using storage_type0 = SimdArray<T, N0>;
using storage_type1 = SimdArray<T, N1>;
Expand Down
22 changes: 14 additions & 8 deletions common/simdmaskarray.h
Expand Up @@ -51,9 +51,7 @@ namespace Vc_VERSIONED_NAMESPACE
* member to which all functions are forwarded more or less directly.
*/
template <typename T, std::size_t N, typename VectorType_>
class alignas(
((Common::nextPowerOfTwo(N) * (sizeof(VectorType_) / VectorType_::size()) - 1) & 127) +
1) SimdMaskArray<T, N, VectorType_, N>
class SimdMaskArray<T, N, VectorType_, N>
{
public:
using VectorType = VectorType_;
Expand Down Expand Up @@ -263,7 +261,12 @@ class alignas(
Vc_INTRINSIC SimdMaskArray(mask_type &&x) : data(std::move(x)) {}

private:
storage_type data;
// The alignas attribute attached to the class declaration above is ignored by ICC
// 17.0.0 (at least). So just move the alignas attribute down here where it works for
// all compilers.
alignas(Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value *
sizeof(VectorType_) / VectorType_::size()>::value)
storage_type data;
};

template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdMaskArray<T, N, VectorType, N>::Size;
Expand Down Expand Up @@ -295,10 +298,9 @@ constexpr std::size_t SimdMaskArray<T, N, VectorType, N>::MemoryAlignment;
* \headerfile simdmaskarray.h <Vc/SimdArray>
*/
template <typename T, size_t N, typename V, size_t Wt>
class alignas(((Common::nextPowerOfTwo(N) * (sizeof(V) / V::size()) - 1) & 127) +
1) SimdMaskArray
class SimdMaskArray
{
static constexpr std::size_t N0 = Common::nextPowerOfTwo(N - N / 2);
static constexpr std::size_t N0 = Common::left_size<N>();

using Split = Common::Split<N0>;

Expand Down Expand Up @@ -650,7 +652,11 @@ class alignas(((Common::nextPowerOfTwo(N) * (sizeof(V) / V::size()) - 1) & 127)
}

private:
storage_type0 data0;
// The alignas attribute attached to the class declaration above is ignored by ICC
// 17.0.0 (at least). So just move the alignas attribute down here where it works for
// all compilers.
alignas(Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
V::size()>::value) storage_type0 data0;
storage_type1 data1;
};
template <typename T, std::size_t N, typename V, std::size_t M>
Expand Down
42 changes: 34 additions & 8 deletions common/utility.h
Expand Up @@ -38,26 +38,52 @@ namespace Common
* \internal
* Returns the next power of 2 larger than or equal to \p x.
*/
static constexpr std::size_t nextPowerOfTwo(std::size_t x)
{
return (x & (x - 1)) == 0 ? x : nextPowerOfTwo((x | (x >> 1)) + 1);
}
template <size_t x, bool = (x & (x - 1)) == 0> struct NextPowerOfTwo;
template <size_t x>
struct NextPowerOfTwo<x, true> : public std::integral_constant<size_t, x> {
};
template <size_t x>
struct NextPowerOfTwo<x, false>
: public std::integral_constant<
size_t, NextPowerOfTwo<(x | (x >> 1) | (x >> 2) | (x >> 5)) + 1>::value> {
};

/**
* \internal
* Enforce an upper bound to an alignment value. This is necessary because some compilers
* implement such an upper bound and emit a warning if it is encountered.
*/
template <size_t A>
struct BoundedAlignment : public std::integral_constant<size_t,
#ifdef Vc_GCC
((A - 1) &
#ifdef __AVX__
255
#else
127
#endif
) + 1
#else
A
#endif
> {
};

/**
* \internal
* Returns the size of the left/first SimdArray member.
*/
static constexpr std::size_t left_size(std::size_t N)
template <std::size_t N> static constexpr std::size_t left_size()
{
return Common::nextPowerOfTwo(N - N / 2);
return Common::NextPowerOfTwo<(N + 1) / 2>::value;
}
/**
* \internal
* Returns the size of the right/second SimdArray member.
*/
static constexpr std::size_t right_size(std::size_t N)
template <std::size_t N> static constexpr std::size_t right_size()
{
return N - left_size(N);
return N - left_size<N>();
}

} // namespace Common
Expand Down
37 changes: 23 additions & 14 deletions tests/simdarray.cpp
Expand Up @@ -45,27 +45,36 @@ TEST_TYPES(V, createArray, (SIMD_ARRAY_LIST))
VERIFY(sizeof(array) <= 2 * array.size() * sizeof(VT));
}

template <typename T, typename U> constexpr T bound(T x, U max)
{
return x > max ? max : x;
}

TEST_TYPES(V, checkArrayAlignment, (SIMD_ARRAY_LIST))
{
using T = typename V::value_type;
using M = typename V::mask_type;

COMPARE(alignof(V), bound(sizeof(V), 128u)); // sizeof must be at least as large as alignof to
// ensure proper padding in arrays. alignof is
// supposed to be at least as large as the actual
// data size requirements
COMPARE(alignof(M), bound(sizeof(M), 128u));
VERIFY(alignof(V) >= bound(V::Size * sizeof(T), 128u));
auto &&bound_to_max_alignment = [](std::size_t n) {
#ifdef Vc_GCC
#ifdef __AVX__
return n > 256u ? 256u : n;
#else
return n > 128u ? 128u : n;
#endif
#else
return n;
#endif
};

COMPARE(alignof(V),
bound_to_max_alignment(
sizeof(V))); // sizeof must be at least as large as alignof to
// ensure proper padding in arrays. alignof is
// supposed to be at least as large as the actual
// data size requirements
COMPARE(alignof(M), bound_to_max_alignment(sizeof(M)));
VERIFY(alignof(V) >= bound_to_max_alignment(V::Size * sizeof(T)));
if (V::Size > 1) {
using V2 = SimdArray<T, Vc::Common::left_size(V::Size)>;
using V2 = SimdArray<T, Vc::Common::left_size<V::Size>()>;
using M2 = typename V2::mask_type;
VERIFY(alignof(V) >= bound(2 * alignof(V2), 128u));
VERIFY(alignof(M) >= bound(2 * alignof(M2), 128u));
VERIFY(alignof(V) >= bound_to_max_alignment(2 * alignof(V2)));
VERIFY(alignof(M) >= bound_to_max_alignment(2 * alignof(M2)));
}
}

Expand Down

0 comments on commit 15aa101

Please sign in to comment.