Skip to content

Commit

Permalink
support other platforms in TargetName and HWY_EXPORT. Fixes #183, tha…
Browse files Browse the repository at this point in the history
…nks @saschanaz

PiperOrigin-RevId: 372515215
  • Loading branch information
jan-wassenberg authored and Copybara-Service committed May 7, 2021
1 parent 79dd488 commit d81d8bc
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 19 deletions.
10 changes: 8 additions & 2 deletions hwy/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,10 @@
#define HWY_ARCH_X86_64 0
#endif

#if HWY_ARCH_X86_32 && HWY_ARCH_X86_64
#error "Cannot have both x86-32 and x86-64"
#endif

#if HWY_ARCH_X86_32 || HWY_ARCH_X86_64
#define HWY_ARCH_X86 1
#else
Expand Down Expand Up @@ -249,9 +253,11 @@
#define HWY_ARCH_RVV 0
#endif

// It is an error to detect multiple architectures at the same time, but OK to
// detect none of the above.
#if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_WASM + \
HWY_ARCH_RVV) != 1
#error "Must detect exactly one platform"
HWY_ARCH_RVV) > 1
#error "Must not detect more than one architecture"
#endif

//------------------------------------------------------------------------------
Expand Down
3 changes: 1 addition & 2 deletions hwy/ops/x86_256-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2551,8 +2551,7 @@ HWY_API uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/, const Mask256<T> mask) {
const auto compressed =
_mm256_permute4x64_epi64(sign_bits, _MM_SHUFFLE(3, 1, 2, 0));
return static_cast<unsigned>(_mm256_movemask_epi8(compressed));

#endif
#endif // HWY_ARCH_X86_64
}

template <typename T>
Expand Down
2 changes: 1 addition & 1 deletion hwy/ops/x86_512-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ HWY_API Mask512<T> FirstN(const Full512<T> /*tag*/, size_t n) {
return Mask512<T>{static_cast<Bits>(_bzhi_u64(~uint64_t(0), n))};
#else
return detail::FirstN<T>(n);
#endif
#endif // HWY_ARCH_X86_64
}

// ------------------------------ IfThenElse
Expand Down
6 changes: 3 additions & 3 deletions hwy/targets.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
#include <intrin.h>
#else // HWY_COMPILER_MSVC
#include <cpuid.h>
#endif // HWY_COMPILER_MSVC
#endif
#endif // HWY_COMPILER_MSVC
#endif // HWY_ARCH_X86

namespace hwy {
namespace {
Expand Down Expand Up @@ -126,7 +126,7 @@ constexpr uint32_t kAVX512VL = 1u << 13;
constexpr uint32_t kAVX512DQ = 1u << 14;
constexpr uint32_t kAVX512BW = 1u << 15;
constexpr uint32_t kGroupAVX3 = kAVX512F | kAVX512VL | kAVX512DQ | kAVX512BW;
#endif
#endif // HWY_ARCH_X86

} // namespace

Expand Down
35 changes: 24 additions & 11 deletions hwy/targets.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@
// 0x2000000, 0x4000000, 0x8000000, 0x10000000 reserved

#define HWY_SCALAR 0x20000000

#define HWY_HIGHEST_TARGET_BIT_SCALAR 29

// Cannot use higher values, otherwise HWY_TARGETS computation might overflow.

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -150,33 +153,37 @@
// user to override this without any guarantee of success.
#ifndef HWY_BASELINE_TARGETS

#ifdef __wasm_simd128__
// Also check HWY_ARCH to ensure that simulating unknown platforms ends up with
// HWY_TARGET == HWY_SCALAR.

#if HWY_ARCH_WASM && defined(__wasm_simd128__)
#define HWY_BASELINE_WASM HWY_WASM
#else
#define HWY_BASELINE_WASM 0
#endif

// Avoid choosing the PPC target until we have an implementation.
#if defined(__VSX__) && 0
#if HWY_ARCH_PPC && defined(__VSX__) && 0
#define HWY_BASELINE_PPC8 HWY_PPC8
#else
#define HWY_BASELINE_PPC8 0
#endif

#if defined(__ARM_FEATURE_SVE2) && 0
// Avoid choosing the SVE[2] targets the implementation is ready.
#if HWY_ARCH_ARM && defined(__ARM_FEATURE_SVE2) && 0
#define HWY_BASELINE_SVE2 HWY_SVE2
#else
#define HWY_BASELINE_SVE2 0
#endif

#if defined(__ARM_FEATURE_SVE) && 0
#if HWY_ARCH_ARM && defined(__ARM_FEATURE_SVE) && 0
#define HWY_BASELINE_SVE HWY_SVE
#else
#define HWY_BASELINE_SVE 0
#endif

// GCC 4.5.4 only defines the former; 5.4 defines both.
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#if HWY_ARCH_ARM && (defined(__ARM_NEON__) || defined(__ARM_NEON))
#define HWY_BASELINE_NEON HWY_NEON
#else
#define HWY_BASELINE_NEON 0
Expand All @@ -185,25 +192,26 @@
// MSVC does not set SSE4_1, but it does set AVX; checking for the latter means
// we at least get SSE4 on machines supporting AVX but not AVX2.
// https://stackoverflow.com/questions/18563978/
#if defined(__SSE4_1__) || (HWY_COMPILER_MSVC != 0 && defined(__AVX__))
#if HWY_ARCH_X86 && \
(defined(__SSE4_1__) || (HWY_COMPILER_MSVC != 0 && defined(__AVX__)))
#define HWY_BASELINE_SSE4 HWY_SSE4
#else
#define HWY_BASELINE_SSE4 0
#endif

#ifdef __AVX2__
#if HWY_ARCH_X86 && defined(__AVX2__)
#define HWY_BASELINE_AVX2 HWY_AVX2
#else
#define HWY_BASELINE_AVX2 0
#endif

#ifdef __AVX512F__
#if HWY_ARCH_X86 && defined(__AVX512F__)
#define HWY_BASELINE_AVX3 HWY_AVX3
#else
#define HWY_BASELINE_AVX3 0
#endif

#ifdef __riscv_vector
#if HWY_ARCH_RVV && defined(__riscv_vector)
#define HWY_BASELINE_RVV HWY_RVV
#else
#define HWY_BASELINE_RVV 0
Expand Down Expand Up @@ -371,7 +379,7 @@ static inline HWY_MAYBE_UNUSED const char* TargetName(uint32_t target) {
return "Scalar";

default:
return "?";
return "Unknown"; // must satisfy gtest IsValidParamName()
}
}

Expand Down Expand Up @@ -471,7 +479,12 @@ static inline HWY_MAYBE_UNUSED const char* TargetName(uint32_t target) {
nullptr, /* reserved */ \
HWY_CHOOSE_RVV(func_name) /* RVV */

#endif // HWY_ARCH_RVV
#else
// Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
// still creating single-entry tables in HWY_EXPORT to ensure portability.
#define HWY_MAX_DYNAMIC_TARGETS 1
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
#endif

struct ChosenTarget {
public:
Expand Down

0 comments on commit d81d8bc

Please sign in to comment.