Skip to content

Commit

Permalink
Port to latest virtest; add some std::simd compat
Browse files Browse the repository at this point in the history
* port test code to mattkretz/virtest
* drop MIC support (initial change, more cleanup to come): ICC 18
  dropped support for -mmic
* drop most ICC -diag-disable flags
* require ICC 18 (17 fails on simdize)
* add simd<T, Abi> alias
* add simd_abi::scalar, fixed_size, __sse, and __avx
* add Vector<T, fixed_size<N>> specialization that derives from
  SimdArray<T, N>
* add generator ctor (similar to std::experimental::simd generator)
* modify math functions to SFINAE for fixed_size, since we want the
SimdArray overload
* clean up internal construction of Simd(Mask)Array from storage_type to
  be unusable for normal users
* fix operator overloads to work correctly after SimdArray and Vector<T,
  fixed_size> became "similar"
* refactor simdize Adaptor construction logic that determines whether to
  use parens, braces, or double braces
* deprecate IndexesFromZero; use generator ctor instead

Signed-off-by: Matthias Kretz <kretz@kde.org>
  • Loading branch information
mattkretz committed Sep 3, 2018
1 parent 1894840 commit 6b77615
Show file tree
Hide file tree
Showing 62 changed files with 1,251 additions and 2,372 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -6,4 +6,4 @@ vc-benchmarks
*~
.makeApidox.stamp
.makeApidox.stamp.new
build
build-*
1 change: 0 additions & 1 deletion CMakeLists.txt
Expand Up @@ -23,7 +23,6 @@ vc_determine_compiler()

if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(x86|AMD64|amd64)")
set(Vc_X86 TRUE)
find_package(MIC)
elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)")
message(WARNING "No optimized implementation of the Vc types available for ${CMAKE_SYSTEM_PROCESSOR}")
set(Vc_ARM TRUE)
Expand Down
9 changes: 5 additions & 4 deletions README.md
Expand Up @@ -34,10 +34,11 @@ sets. Thus an application written with Vc can be compiled for:
* AVX and AVX2
* SSE2 up to SSE4.2 or SSE4a
* Scalar
* MIC
* AVX-512 (in development)
* AVX-512 (Vc 2 development)
* NEON (in development)
* NVIDIA GPUs / CUDA (in development)
* NVIDIA GPUs / CUDA (research)

After Intel dropped MIC support with ICC 18, Vc 1.4 also removes support for it.

## Examples

Expand Down Expand Up @@ -80,7 +81,7 @@ C++11 Compiler:
* GCC >= 4.8.1
* clang >= 3.4
* ICC >= 15.0.3
* ICC >= 18
* Visual Studio 2015 (64-bit target)
Expand Down
16 changes: 14 additions & 2 deletions avx/const.h
Expand Up @@ -118,7 +118,13 @@ namespace AVX
template <> Vc_ALWAYS_INLINE Vc_CONST Vector<float> Const<float>::highMask(int bits)
{
#ifdef Vc_IMPL_AVX2
return _mm256_castsi256_ps(_mm256_slli_epi32(~__m256i(), bits));
#if defined Vc_ICC || defined Vc_MSVC
__m256i allone;
allone = _mm256_cmpeq_epi8(allone, allone);
#else
auto allone = ~__m256i();
#endif
return _mm256_castsi256_ps(_mm256_slli_epi32(allone, bits));
#else
__m128 tmp = _mm_castsi128_ps(_mm_slli_epi32(_mm_setallone_si128(), bits));
return concat(tmp, tmp);
Expand All @@ -127,7 +133,13 @@ namespace AVX
template <> Vc_ALWAYS_INLINE Vc_CONST Vector<double> Const<double>::highMask(int bits)
{
#ifdef Vc_IMPL_AVX2
return _mm256_castsi256_pd(_mm256_slli_epi64(~__m256i(), bits));
#if defined Vc_ICC || defined Vc_MSVC
__m256i allone;
allone = _mm256_cmpeq_epi8(allone, allone);
#else
auto allone = ~__m256i();
#endif
return _mm256_castsi256_pd(_mm256_slli_epi64(allone, bits));
#else
__m128d tmp = _mm_castsi128_pd(_mm_slli_epi64(_mm_setallone_si128(), bits));
return concat(tmp, tmp);
Expand Down
5 changes: 0 additions & 5 deletions cmake/FindMIC.cmake
Expand Up @@ -168,11 +168,6 @@ endif(ENABLE_MIC)

if(MIC_NATIVE_FOUND OR MIC_OFFLOAD_FOUND)
set(MIC_FOUND true)
list(APPEND CMAKE_MIC_CXX_FLAGS "-diag-disable 2338") # this switch statement does not have a default clause
list(APPEND CMAKE_MIC_CXX_FLAGS "-diag-disable 193") # zero used for undefined preprocessing identifier "Vc_GCC"
list(APPEND CMAKE_MIC_CXX_FLAGS "-diag-disable 61") # warning #61: integer operation result is out of range
list(APPEND CMAKE_MIC_CXX_FLAGS "-diag-disable 173") # warning #173: floating-point value does not fit in required integral type
list(APPEND CMAKE_MIC_CXX_FLAGS "-diag-disable 264") # warning #264: floating-point value does not fit in required floating-point type

list(APPEND CMAKE_MIC_CXX_FLAGS "-fp-model source") # fix IEEE FP comliance

Expand Down
10 changes: 2 additions & 8 deletions cmake/VcMacros.cmake
Expand Up @@ -53,8 +53,8 @@ macro(vc_determine_compiler)
message(STATUS "Detected Compiler: Intel ${Vc_ICC_VERSION}")

# break build with too old clang as early as possible.
if(Vc_ICC_VERSION VERSION_LESS 15.0.3)
message(FATAL_ERROR "Vc 1.x requires C++11 support. This requires at least ICC 15.0.3")
if(Vc_ICC_VERSION VERSION_LESS 18.0.0)
message(FATAL_ERROR "Vc 1.4 requires least ICC 18")
endif()
elseif(CMAKE_CXX_COMPILER MATCHES "(opencc|openCC)$")
set(Vc_COMPILER_IS_OPEN64 true)
Expand Down Expand Up @@ -300,12 +300,6 @@ int main() { return 0; }
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -DNDEBUG -O3")
endif()
vc_add_compiler_flag(Vc_COMPILE_FLAGS "-diag-disable 913")
# Disable warning #13211 "Immediate parameter to intrinsic call too large". (sse/vector.tcc rotated(int))
vc_add_compiler_flag(Vc_COMPILE_FLAGS "-diag-disable 13211")
vc_add_compiler_flag(Vc_COMPILE_FLAGS "-diag-disable 61") # warning #61: integer operation result is out of range
vc_add_compiler_flag(Vc_COMPILE_FLAGS "-diag-disable 173") # warning #173: floating-point value does not fit in required integral type
vc_add_compiler_flag(Vc_COMPILE_FLAGS "-diag-disable 264") # warning #264: floating-point value does not fit in required floating-point type
if(CMAKE_BUILD_TYPE STREQUAL "Release" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
set(ENABLE_STRICT_ALIASING true CACHE BOOL "Enables strict aliasing rules for more aggressive optimizations")
if(ENABLE_STRICT_ALIASING)
Expand Down
2 changes: 1 addition & 1 deletion common/elementreference.h
Expand Up @@ -36,7 +36,6 @@ namespace Detail
{
template <typename U, typename Accessor = U> class ElementReference
{
using value_type = typename U::value_type;
friend U;
friend Accessor;
Vc_INTRINSIC ElementReference(U &o, int i) noexcept : index(i), obj(o) {}
Expand All @@ -49,6 +48,7 @@ template <typename U, typename Accessor = U> class ElementReference
}

public:
using value_type = typename U::value_type;
Vc_INTRINSIC ElementReference(const ElementReference &) = delete;

/**
Expand Down
2 changes: 1 addition & 1 deletion common/exponential.h
Expand Up @@ -43,7 +43,7 @@ constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f;

template <typename Abi, typename = enable_if<std::is_same<Abi, VectorAbi::Sse>::value ||
std::is_same<Abi, VectorAbi::Avx>::value>>
inline Vector<float, Abi> exp(Vector<float, Abi> x)
inline Vector<float, detail::not_fixed_size_abi<Abi>> exp(Vector<float, Abi> x)
{
using V = Vector<float, Abi>;
typedef typename V::Mask M;
Expand Down
10 changes: 10 additions & 0 deletions common/generalinterface.h
Expand Up @@ -49,4 +49,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
return Vector(Vc::IndexesFromZero);
}

///////////////////////////////////////////////////////////////////////////
// generator ctor
template <class G, class...,
class = typename std::enable_if<std::is_convertible<
decltype(std::declval<G>()(size_t())), value_type>::value>::type>
explicit Vector(G &&g) : Vector(generate(std::forward<G>(g)))
{
}


// vim: foldmethod=marker
9 changes: 6 additions & 3 deletions common/logarithm.h
Expand Up @@ -255,17 +255,20 @@ static inline Vector<T, Abi> calc(V _x)
} // namespace Detail

template <typename T, typename Abi>
Vc_INTRINSIC Vc_CONST Vector<T, Abi> log(const Vector<T, Abi> &x)
Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log(
const Vector<T, Abi> &x)
{
return Detail::LogImpl<BaseE>::calc<T, Abi>(x);
}
template <typename T, typename Abi>
Vc_INTRINSIC Vc_CONST Vector<T, Abi> log10(const Vector<T, Abi> &x)
Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log10(
const Vector<T, Abi> &x)
{
return Detail::LogImpl<Base10>::calc<T, Abi>(x);
}
template <typename T, typename Abi>
Vc_INTRINSIC Vc_CONST Vector<T, Abi> log2(const Vector<T, Abi> &x)
Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log2(
const Vector<T, Abi> &x)
{
return Detail::LogImpl<Base2>::calc<T, Abi>(x);
}
Expand Down
44 changes: 19 additions & 25 deletions common/operators.h
Expand Up @@ -139,41 +139,35 @@ template <typename T> struct is_a_type : public std::true_type {
} // namespace Detail

#define Vc_GENERIC_OPERATOR(op_) \
template <typename T, typename Abi, typename U> \
Vc_ALWAYS_INLINE enable_if< \
Vc_TEST_FOR_BUILTIN_OPERATOR(op_) && \
std::is_convertible<Vector<T, Abi>, typename Detail::ReturnType< \
Vector<T, Abi>, U>::type>::value && \
std::is_convertible< \
U, typename Detail::ReturnType<Vector<T, Abi>, U>::type>::value, \
typename Detail::ReturnType<Vector<T, Abi>, U>::type> \
template <typename T, typename Abi, typename U, \
class R = typename Detail::ReturnType<Vector<T, Abi>, U>::type> \
Vc_ALWAYS_INLINE enable_if<Vc_TEST_FOR_BUILTIN_OPERATOR(op_) && \
std::is_convertible<Vector<T, Abi>, R>::value && \
std::is_convertible<U, R>::value, \
R> \
operator op_(Vector<T, Abi> x, const U &y) \
{ \
using V = typename Detail::ReturnType<Vector<T, Abi>, U>::type; \
return Detail::operator op_(V(x), V(y)); \
} \
template <typename T, typename Abi, typename U> \
Vc_ALWAYS_INLINE enable_if< \
Vc_TEST_FOR_BUILTIN_OPERATOR(op_) && \
!Traits::is_simd_vector_internal<U>::value && \
std::is_convertible<Vector<T, Abi>, typename Detail::ReturnType< \
Vector<T, Abi>, U>::type>::value && \
std::is_convertible< \
U, typename Detail::ReturnType<Vector<T, Abi>, U>::type>::value, \
typename Detail::ReturnType<Vector<T, Abi>, U>::type> \
template <typename T, typename Abi, typename U, \
class R = typename Detail::ReturnType<Vector<T, Abi>, U>::type> \
Vc_ALWAYS_INLINE enable_if<Vc_TEST_FOR_BUILTIN_OPERATOR(op_) && \
!Traits::is_simd_vector_internal<U>::value && \
std::is_convertible<Vector<T, Abi>, R>::value && \
std::is_convertible<U, R>::value, \
R> \
operator op_(const U &x, Vector<T, Abi> y) \
{ \
using V = typename Detail::ReturnType<Vector<T, Abi>, U>::type; \
return Detail::operator op_(V(x), V(y)); \
} \
template <typename T, typename Abi, typename U> \
Vc_ALWAYS_INLINE enable_if< \
Vc_TEST_FOR_BUILTIN_OPERATOR(op_) && \
std::is_convertible<Vector<T, Abi>, typename Detail::ReturnType< \
Vector<T, Abi>, U>::type>::value && \
std::is_convertible< \
U, typename Detail::ReturnType<Vector<T, Abi>, U>::type>::value, \
Vector<T, Abi> &> \
template <typename T, typename Abi, typename U, \
class R = typename Detail::ReturnType<Vector<T, Abi>, U>::type> \
Vc_ALWAYS_INLINE enable_if<Vc_TEST_FOR_BUILTIN_OPERATOR(op_) && \
std::is_convertible<Vector<T, Abi>, R>::value && \
std::is_convertible<U, R>::value, \
Vector<T, Abi> &> \
operator op_##=(Vector<T, Abi> &x, const U &y) \
{ \
using V = typename Detail::ReturnType<Vector<T, Abi>, U>::type; \
Expand Down

0 comments on commit 6b77615

Please sign in to comment.