diff --git a/CMakeLists.txt b/CMakeLists.txt index 36772ce..77fad8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(BUILD_UNITTEST "Build unittest." ON) option(BUILD_FUZZ "Build fuzz." OFF) option(BUILD_BENCH "Build benchmark." OFF) option(ENABLE_SVE2_128 "Build for Arm SVE2 with 128 bit vector size" OFF) +option(ENABLE_SVE_256 "Build for Arm SVE vector size" OFF) set(CMAKE_CXX_EXTENSIONS OFF) if(BUILD_UNITTEST) diff --git a/cmake/set_arch_flags.cmake b/cmake/set_arch_flags.cmake index 7975cd0..26803e1 100644 --- a/cmake/set_arch_flags.cmake +++ b/cmake/set_arch_flags.cmake @@ -5,6 +5,8 @@ function(set_arch_flags target arch) elseif(arch MATCHES "aarch64") if(ENABLE_SVE2_128) target_compile_options(${target} PRIVATE -march=armv8-a+sve2 -msve-vector-bits=128) + elseif(ENABLE_SVE_256) + target_compile_options(${target} PRIVATE -march=armv8-a+sve -msve-vector-bits=256) else() target_compile_options(${target} PRIVATE -march=armv8-a) endif() diff --git a/include/sonic/internal/arch/simd_dispatch.h b/include/sonic/internal/arch/simd_dispatch.h index 0c474ce..1c506e8 100644 --- a/include/sonic/internal/arch/simd_dispatch.h +++ b/include/sonic/internal/arch/simd_dispatch.h @@ -36,6 +36,9 @@ #if defined(SONIC_HAVE_SVE2_128) #define SONIC_USING_ARCH_FUNC(func) using sve2_128::func #define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(sve2-128/file) +#elif defined(SONIC_HAVE_SVE_256) +#define SONIC_USING_ARCH_FUNC(func) using sve_256::func +#define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(sve-256/file) #elif defined(SONIC_HAVE_NEON) #define SONIC_USING_ARCH_FUNC(func) using neon::func #define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(neon/file) diff --git a/include/sonic/internal/arch/sonic_cpu_feature.h b/include/sonic/internal/arch/sonic_cpu_feature.h index 21d2707..af0a4a7 100644 --- a/include/sonic/internal/arch/sonic_cpu_feature.h +++ b/include/sonic/internal/arch/sonic_cpu_feature.h @@ -44,4 +44,7 @@ #if defined(__ARM_FEATURE_SVE2) && (__ARM_FEATURE_SVE_BITS == 128) #define SONIC_HAVE_SVE2_128 #endif +#if defined(__ARM_FEATURE_SVE) && (__ARM_FEATURE_SVE_BITS == 256) +#define SONIC_HAVE_SVE_256 +#endif #endif diff --git a/include/sonic/internal/arch/sve-256/base.h b/include/sonic/internal/arch/sve-256/base.h new file mode 100644 index 0000000..d5c5693 --- /dev/null +++ b/include/sonic/internal/arch/sve-256/base.h @@ -0,0 +1,157 @@ +#pragma once + +#include "../common/arm_common/base.h" +#include + +namespace sonic_json { +namespace internal { +namespace sve_256 { + +using sonic_json::internal::arm_common::ClearLowestBit; +using sonic_json::internal::arm_common::CountOnes; +using sonic_json::internal::arm_common::InlinedMemcmp; +using sonic_json::internal::arm_common::LeadingZeroes; +using sonic_json::internal::arm_common::PrefixXor; +using sonic_json::internal::arm_common::TrailingZeroes; + +static inline bool is_eq_lt_32(const void* a, const void* b, size_t s) { + auto lhs = static_cast(a); + auto rhs = static_cast(b); + svbool_t pg = svwhilelt_b8((size_t)0, s); + svbool_t ptrue = svptrue_b8(); + svuint8_t va = svld1(pg, lhs); + svuint8_t vb = svld1(pg, rhs); + svbool_t neq_mask = svcmpne(ptrue, va, vb); + return svptest_any(pg, neq_mask) == 0; +} + +sonic_force_inline bool InlinedMemcmpEq(const void* _a, const void* _b, size_t s) { + const uint8_t* a = static_cast(_a); + const uint8_t* b = static_cast(_b); + + if (s == 0) return true; + if (s < 32) return is_eq_lt_32(a, b, s); + + svbool_t ptrue = svptrue_b8(); + svbool_t pg_head = svwhilelt_b8(0, 32); + svuint8_t head_a = svld1(pg_head, a); + svuint8_t head_b = svld1(pg_head, b); + + svbool_t cmp_head = svcmpne(pg_head, head_a, head_b); + if (svptest_any(pg_head, cmp_head)) { + return false; + } + + if (s > 32) { + size_t tail_offset = s - 32; + svuint8_t tail_a = svld1(pg_head, a + tail_offset); + svuint8_t tail_b = svld1(pg_head, b + tail_offset); + svbool_t cmp_tail = svcmpne(pg_head, tail_a, tail_b); + if (svptest_any(pg_head, cmp_tail)) { + return false; + } + } + + if (s > 64) { + for (size_t offset = 32; offset < s - 32; offset += 32) { + svuint8_t va = svld1(ptrue, a + offset); + svuint8_t vb = svld1(ptrue, b + offset); + svbool_t neq_mask = svcmpne(ptrue, va, vb); + if (svptest_any(ptrue, neq_mask)) { + return false; + } + } + } + return true; +} + +template +sonic_force_inline void Xmemcpy(void* dst_, const void* src_, size_t chunks) { + std::memcpy(dst_, src_, chunks * ChunkSize); +} + +template <> +sonic_force_inline void Xmemcpy<32>(void* dst_, const void* src_, size_t chunks) { + uint8_t* dst = reinterpret_cast(dst_); + const uint8_t* src = reinterpret_cast(src_); + svbool_t pg = svptrue_b8(); + size_t blocks = chunks / 4; + for (size_t i = 0; i < blocks; i++) { + for (size_t j = 0; j < 4; j++) { + svuint8_t vsrc = svld1_u8(pg, src); + svst1_u8(pg, dst, vsrc); + src += 32; + dst += 32; + } + } + + switch (chunks & 3) { + case 3: { + svuint8_t vsrc = svld1_u8(pg, src); + svst1_u8(pg, dst, vsrc); + src += 32; + dst += 32; + } + /* fall through */ + case 2: { + svuint8_t vsrc = svld1_u8(pg, src); + svst1_u8(pg, dst, vsrc); + src += 32; + dst += 32; + } + /* fall through */ + case 1: { + svuint8_t vsrc = svld1_u8(pg, src); + svst1_u8(pg, dst, vsrc); + } + } +} + +template <> +sonic_force_inline void Xmemcpy<16>(void* dst_, const void* src_, size_t chunks) { + uint8_t* dst = reinterpret_cast(dst_); + const uint8_t* src = reinterpret_cast(src_); + svbool_t pg = svptrue_b8(); + size_t blocks = chunks / 8; + for (size_t i = 0; i < blocks; i++) { + for (size_t j = 0; j < 4; j++) { + svuint8_t vsrc = svld1_u8(pg, src); + svst1_u8(pg, dst, vsrc); + src += 32; + dst += 32; + } + } + + switch ((chunks / 2) & 3) { + case 3: { + svuint8_t vsrc = svld1_u8(pg, src); + svst1_u8(pg, dst, vsrc); + src += 32; + dst += 32; + } + /* fall through */ + case 2: { + svuint8_t vsrc = svld1_u8(pg, src); + svst1_u8(pg, dst, vsrc); + src += 32; + dst += 32; + } + /* fall through */ + case 1: { + svuint8_t vsrc = svld1_u8(pg, src); + svst1_u8(pg, dst, vsrc); + src += 32; + dst += 32; + } + } + + if (chunks & 1) { + svbool_t pg = svwhilelt_b8(0, 16); + svuint8_t vsrc = svld1_u8(pg, src); + svst1_u8(pg, dst, vsrc); + } +} + +} // namespace sve_256 +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/sve-256/itoa.h b/include/sonic/internal/arch/sve-256/itoa.h new file mode 100644 index 0000000..e3b239f --- /dev/null +++ b/include/sonic/internal/arch/sve-256/itoa.h @@ -0,0 +1,14 @@ +#pragma once + +#include "../common/arm_common/itoa.h" + +namespace sonic_json { +namespace internal { +namespace sve_256 { + +using sonic_json::internal::arm_common::Utoa_16; +using sonic_json::internal::arm_common::Utoa_8; + +} // namespace sve_256 +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/sve-256/quote.h b/include/sonic/internal/arch/sve-256/quote.h new file mode 100644 index 0000000..3d77784 --- /dev/null +++ b/include/sonic/internal/arch/sve-256/quote.h @@ -0,0 +1,187 @@ +#pragma once + +#define VEC_LEN 16 + +#include "../common/arm_common/quote.h" +#include "unicode.h" + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#ifdef __GNUC__ +#if defined(__SANITIZE_THREAD__) || defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_LEAK__) || \ + defined(__SANITIZE_UNDEFINED__) +#ifndef SONIC_USE_SANITIZE +#define SONIC_USE_SANITIZE +#endif +#endif +#endif + +#if defined(__clang__) +#if defined(__has_feature) +#if __has_feature(address_sanitizer) || __has_feature(thread_sanitizer) || __has_feature(memory_sanitizer) || \ + __has_feature(undefined_behavior_sanitizer) || __has_feature(leak_sanitizer) +#ifndef SONIC_USE_SANITIZE +#define SONIC_USE_SANITIZE +#endif +#endif +#endif +#endif + +#ifndef VEC_LEN +#error "You should define VEC_LEN before including quote.h!" +#endif + +#define MOVE_N_CHARS(src, N) \ + do { \ + (src) += (N); \ + nb -= (N); \ + dst += (N); \ + } while (0) + +namespace sonic_json { +namespace internal { +namespace sve_256 { + +sonic_force_inline svbool_t copy_get_escaped_mask_predicate(svbool_t pg, const char *src, char *dst) +{ + svuint8_t v = svld1_u8(pg, reinterpret_cast(src)); + svst1_u8(pg, reinterpret_cast(dst), v); + svbool_t m1 = svcmpeq_n_u8(pg, v, '\\'); + svbool_t m2 = svcmpeq_n_u8(pg, v, '"'); + svbool_t m3 = svcmplt_n_u8(pg, v, '\x20'); + svbool_t m4 = svorr_b_z(pg, m1, m2); + svbool_t m5 = svorr_b_z(pg, m3, m4); + return m5; +} + +// The function returns the index of first (to the rigth) active elem +sonic_force_inline int get_first_active_index(svbool_t input) +{ + return svlastb_u8(svbrka_b_z(input, input), svindex_u8(0, 1)); +} + + +sonic_force_inline size_t parseStringInplace(uint8_t *&src, SonicError &err) { +#define SONIC_REPEAT8(v) {v v v v v v v v} + + uint8_t *dst = src; + uint8_t *sdst = src; + while (1) { + find: + auto block = StringBlock::Find(src); + if (block.HasQuoteFirst()) { + int idx = block.QuoteIndex(); + src += idx; + *src++ = '\0'; + return src - sdst - 1; + } + if (block.HasUnescaped()) { + err = kParseErrorUnEscaped; + return 0; + } + if (!block.HasBackslash()) { + src += VEC_LEN; + goto find; + } + + /* find out where the backspace is */ + auto bs_dist = block.BsIndex(); + src += bs_dist; + dst = src; + cont: + uint8_t escape_char = src[1]; + if (sonic_unlikely(escape_char == 'u')) { + if (!handle_unicode_codepoint(const_cast(&src), &dst)) { + err = kParseErrorEscapedUnicode; + return 0; + } + } else { + *dst = kEscapedMap[escape_char]; + if (sonic_unlikely(*dst == 0u)) { + err = kParseErrorEscapedFormat; + return 0; + } + src += 2; + dst += 1; + } + // fast path for continous escaped chars + if (*src == '\\') { + bs_dist = 0; + goto cont; + } + + find_and_move: + // Copy the next n bytes, and find the backslash and quote in them. + uint8x16_t v = vld1q_u8(src); + block = StringBlock::Find(v); + // If the next thing is the end quote, copy and return + if (block.HasQuoteFirst()) { + // we encountered quotes first. Move dst to point to quotes and exit + while (1) { + SONIC_REPEAT8(if (sonic_unlikely(*src == '"')) break; + else { *dst++ = *src++; }); + } + *dst = '\0'; + src++; + return dst - sdst; + } + if (block.HasUnescaped()) { + err = kParseErrorUnEscaped; + return 0; + } + if (!block.HasBackslash()) { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + vst1q_u8(dst, v); + src += VEC_LEN; + dst += VEC_LEN; + goto find_and_move; + } + while (1) { + SONIC_REPEAT8(if (sonic_unlikely(*src == '\\')) break; + else { *dst++ = *src++; }); + } + goto cont; + } + sonic_assert(false); +#undef SONIC_REPEAT8 +} + +sonic_force_inline char *Quote(const char *src, size_t nb, char *dst) +{ + *dst++ = '"'; + sonic_assert(nb < (1LL << 0x20)); + auto svelen = svcntb(); + svbool_t ptrue = svptrue_b8(); + while (nb > svelen) { + svbool_t mask = copy_get_escaped_mask_predicate(ptrue, src, dst); + if (svptest_any(ptrue, mask)) { + auto cn = get_first_active_index(mask); + MOVE_N_CHARS(src, cn); + DoEscape(src, dst, nb); + } else { + MOVE_N_CHARS(src, svelen); + } + } + while (nb > 0) { + svbool_t predicate = svwhilelt_b8_u64(0, nb); + svbool_t mask = copy_get_escaped_mask_predicate(predicate, src, dst); + if (svptest_any(predicate, mask)) { + auto cn = get_first_active_index(mask); + MOVE_N_CHARS(src, cn); + DoEscape(src, dst, nb); + } else { + auto active_elems = svcntp_b8(predicate, predicate); + MOVE_N_CHARS(src, active_elems); + } + } + *dst++ = '"'; + return dst; +} +} // namespace sve_256 +} // namespace internal +} // namespace sonic_json + +#undef VEC_LEN diff --git a/include/sonic/internal/arch/sve-256/skip.h b/include/sonic/internal/arch/sve-256/skip.h new file mode 100644 index 0000000..6c1301d --- /dev/null +++ b/include/sonic/internal/arch/sve-256/skip.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include "../neon/skip.h" +#include "base.h" + +#define VEC_LEN 16 +#define VEC_LEN_SVE svcntb() + +namespace sonic_json { +namespace internal { +namespace sve_256 { + +using sonic_json::internal::common::EqBytes4; +using sonic_json::internal::common::SkipLiteral; + +using neon::to_bitmask; +#include "../common/arm_common/skip.inc.h" + +using neon::SkipContainer; + +sonic_force_inline uint8_t skip_space(const uint8_t *data, size_t &pos, size_t &, uint64_t &) { + // fast path for single space + if (!IsSpace(data[pos++])) + return data[pos - 1]; + if (!IsSpace(data[pos++])) + return data[pos - 1]; + + svbool_t ptrue = svptrue_b8(); + // current pos is out of block + while (1) { + svuint8_t v = svld1_u8(ptrue, reinterpret_cast(data + pos)); + svbool_t m1 = svcmpeq_n_u8(ptrue, v, '\r'); + svbool_t m2 = svcmpeq_n_u8(ptrue, v, '\n'); + svbool_t m3 = svcmpeq_n_u8(ptrue, v, '\t'); + svbool_t m4 = svcmpeq_n_u8(ptrue, v, ' '); + svbool_t m5 = svorr_b_z(ptrue, m1, m2); + svbool_t m6 = svorr_b_z(ptrue, m3, m4); + svbool_t mask = svnor_b_z(ptrue, m5, m6); + if (svptest_any(ptrue, mask)) { + pos += get_first_active_index(mask); + return data[pos++]; + } else { + pos += VEC_LEN_SVE; + } + } + + sonic_assert(false && "!should not happen"); +} + +} // namespace sve_256 +} // namespace internal +} // namespace sonic_json + +#undef VEC_LEN diff --git a/include/sonic/internal/arch/sve-256/str2int.h b/include/sonic/internal/arch/sve-256/str2int.h new file mode 100644 index 0000000..b2c8049 --- /dev/null +++ b/include/sonic/internal/arch/sve-256/str2int.h @@ -0,0 +1,127 @@ +#pragma once + +#define STR2INT_LOWEST_VALUE_MUL_FACTOR 10 +#define STR2INT_CASE_TEN_MUL_FACTOR 100 +#define STR2INT_CASE_ELEVEN_MUL_FACTOR 1000 +#define STR2INT_CASE_TWELEVE_MUL_FACTOR 10000 +#define STR2INT_CASE_THIRTEEN_MUL_FACTOR 100000 +#define STR2INT_CASE_FOURTEEN_MUL_FACTOR 1000000 +#define STR2INT_CASE_FIFTEEN_MUL_FACTOR 10000000 +#define STR2INT_CASE_SIXTEEN_MUL_FACTOR 100000000 + +namespace sonic_json { +namespace internal { +namespace sve_256 { +sonic_force_inline uint32_t low_half_simd_str2int(svuint32_t data, svbool_t curPg, uint32_t *mulFactor) { + svuint32_t wideData = svrev_u32(data); + wideData = svcompact_u32(curPg, wideData); + svuint32_t vecFactor = svld1_u32(svptrue_pat_b32(SV_VL8), mulFactor); + uint32_t lowSum = svaddv_u32(svptrue_pat_b32(SV_VL8), svmul_u32_x(svptrue_pat_b32(SV_VL8), vecFactor, wideData)); + return lowSum; +} + +sonic_force_inline uint32_t low_full_half_simd_str2int(svuint32_t data, svbool_t curPg, uint32_t *mulFactor) { + svuint32_t wideData = svrev_u32(data); + svuint32_t vecFactor = svld1_u32(curPg, mulFactor); + uint32_t lowSum = svaddv_u32(curPg, svmul_u32_x(curPg, vecFactor, wideData)); + return lowSum; +} + +sonic_force_inline uint64_t process_low_case_wide_data(svuint16_t data, uint32_t *mulFactor, uint32_t num) { + svbool_t curPg = svnot_z(svptrue_pat_b16(SV_VL16), svwhilelt_b32_u32(0, num)); + svuint32_t wideData = svunpklo_u32(data); + return low_half_simd_str2int(wideData, curPg, mulFactor); +} + +sonic_force_inline uint64_t process_low_case_8_data(svuint16_t data, uint32_t *mulFactor) { + svbool_t pgWide = svptrue_pat_b32(SV_VL8); + svuint32_t wideData = svunpklo_u32(data); + return low_full_half_simd_str2int(wideData, pgWide, mulFactor); +} + +sonic_force_inline uint64_t process_low_case_9_data(svuint16_t data, uint32_t *mulFactor) { + svuint32_t wideData = svunpklo_u32(data); + uint64_t lowSum = low_full_half_simd_str2int(wideData, svptrue_pat_b32(SV_VL8), mulFactor); + svbool_t curPg = svwhilelt_b16_u32(0, 0x9); + return svlastb_u16(curPg, data) + lowSum * STR2INT_LOWEST_VALUE_MUL_FACTOR; +} + +sonic_force_inline uint64_t process_low_case_10_data(svuint16_t data, uint32_t *mulFactor) { + svuint32_t wideData = svunpklo_u32(data); + uint64_t lowSum = low_full_half_simd_str2int(wideData, svptrue_pat_b32(SV_VL8), mulFactor); + svbool_t curPg = svwhilelt_b16_u32(0, 0x9); + return svlasta_u16(curPg, data) + svlastb_u16(curPg, data) * STR2INT_LOWEST_VALUE_MUL_FACTOR + + lowSum * STR2INT_CASE_TEN_MUL_FACTOR; +} + +sonic_force_inline uint64_t process_high_case_wide_data(svuint16_t data, uint32_t *mulFactor, uint32_t num1, + uint32_t num2) { + svbool_t pgWide = svnot_z(svptrue_pat_b16(SV_VL16), svwhilelt_b32_u32(0, num1)); + svuint32_t wideData = svunpklo_u32(data); + uint64_t lowSum = low_full_half_simd_str2int(wideData, svptrue_pat_b32(SV_VL8), mulFactor); + wideData = svunpkhi_u32(data); + uint64_t highSum = low_half_simd_str2int(wideData, pgWide, mulFactor); + return lowSum * num2 + highSum; +} + +sonic_force_inline uint64_t process_low_case_16_data(svuint16_t data, uint32_t *mulFactor) { + svuint32_t wideData = svunpklo_u32(data); + uint64_t lowSum = low_full_half_simd_str2int(wideData, svptrue_pat_b32(SV_VL8), mulFactor); + wideData = svunpkhi_u32(data); + uint64_t highSum = low_full_half_simd_str2int(wideData, svptrue_pat_b32(SV_VL8), mulFactor); + return lowSum * STR2INT_CASE_SIXTEEN_MUL_FACTOR + highSum; +} + +sonic_force_inline uint64_t simd_str2int(const char *c, int &man_nd) { + uint32_t mulFactor[8] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000}; + svbool_t pgAll = svptrue_pat_b16(SV_VL16); + svuint16_t data = svld1sb_u16(pgAll, reinterpret_cast(&c[0])); + data = svsub_n_u16_x(pgAll, data, '0'); + svbool_t gt_nine = svcmpgt_n_u16(pgAll, data, 9); + int num_end_idx = 16; + if (svptest_any(pgAll, gt_nine)) { + num_end_idx = svcntp_b16(pgAll, svbrkb_z(pgAll, gt_nine)); + } + man_nd = man_nd < num_end_idx ? man_nd : num_end_idx; + switch (man_nd) { + case 1: + return svlastb_u16(svwhilelt_b16_u32(0, 1), data); + case 0x2: + return svlastb_u16(svwhilelt_b16_u32(0, 1), data) * 0xa + svlasta_u16(svwhilelt_b16_u32(0, 1), data); + case 0x3: + return process_low_case_wide_data(data, mulFactor, 0x5); + case 0x4: + return process_low_case_wide_data(data, mulFactor, 0x4); + case 0x5: + return process_low_case_wide_data(data, mulFactor, 0x3); + case 0x6: + return process_low_case_wide_data(data, mulFactor, 0x2); + case 0x7: + return process_low_case_wide_data(data, mulFactor, 0x1); + case 0x8: + return process_low_case_8_data(data, mulFactor); + case 0x9: + return process_low_case_9_data(data, mulFactor); + case 0xa: + return process_low_case_10_data(data, mulFactor); + case 0xb: + return process_high_case_wide_data(data, mulFactor, 0x5, STR2INT_CASE_ELEVEN_MUL_FACTOR); + case 0xc: + return process_high_case_wide_data(data, mulFactor, 0x4, STR2INT_CASE_TWELEVE_MUL_FACTOR); + case 0xd: + return process_high_case_wide_data(data, mulFactor, 0x3, STR2INT_CASE_THIRTEEN_MUL_FACTOR); + case 0xe: + return process_high_case_wide_data(data, mulFactor, 0x2, STR2INT_CASE_FOURTEEN_MUL_FACTOR); + case 0xf: + return process_high_case_wide_data(data, mulFactor, 1, STR2INT_CASE_FIFTEEN_MUL_FACTOR); + case 0x10: + return process_low_case_16_data(data, mulFactor); + default: + return 0; + } + return 1; +} + +} // namespace sve_256 +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/sve-256/unicode.h b/include/sonic/internal/arch/sve-256/unicode.h new file mode 100644 index 0000000..2b29319 --- /dev/null +++ b/include/sonic/internal/arch/sve-256/unicode.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +#include +#include + +#include "../common/unicode_common.h" +#include "../neon/unicode.h" +#include "base.h" + +namespace sonic_json { +namespace internal { +namespace sve_256 { + +using neon::handle_unicode_codepoint; + +using neon::StringBlock; + +using neon::GetNonSpaceBits; + +} // namespace sve_256 +} // namespace internal +} // namespace sonic_json diff --git a/tests/memcmp_test.cpp b/tests/memcmp_test.cpp index b81d26d..c4f2cbd 100644 --- a/tests/memcmp_test.cpp +++ b/tests/memcmp_test.cpp @@ -19,10 +19,11 @@ #include #include "gtest/gtest.h" -#include "include/sonic/internal/arch/avx2/base.h" #include "include/sonic/internal/arch/sonic_cpu_feature.h" #if defined(SONIC_HAVE_AVX2) && !defined(SONIC_DYNAMIC_DISPATCH) +#include "include/sonic/internal/arch/avx2/base.h" + namespace { using namespace sonic_json::internal::avx2;