diff --git a/BUILD.bazel b/BUILD.bazel index 06611a3d..cfac4f58 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -18,6 +18,26 @@ cc_library( includes = ["include"], ) +cc_binary( + name = "benchmark-arm", + srcs = glob([ + "benchmark/*.cpp", + "benchmark/*.h", + "benchmark/*.hpp", + ]), + data = glob(["testdata/*.json"]), + deps = [ + ":sonic-cpp", + "@google_benchmark//:benchmark", + "@rapidjson", + "@cJSON", + "@yyjson", + "@simdjson", + ], + copts = ['-O3', ' -march=armv8-a', '-DNDEBUG', '-std=c++17'], + linkopts = ['-lstdc++fs'], +) + cc_binary( name = "benchmark", srcs = glob([ @@ -87,6 +107,56 @@ cc_test( ], ) +cc_test( + name = "unittest-arm", + srcs = glob([ + "tests/*.h", + "tests/*.cpp", + "include/sonic/*", + "include/sonic/**/*", + ]), + deps = [ + ":string_view", + "@gtest//:gtest_main", + ], + data = glob([ "testdata/*.json"]), + linkopts = sanitize_copts + [ + '-lstdc++fs', + '-fstack-protector-all', + '-fsanitize-link-c++-runtime' + ], + copts = sanitize_copts + [ + '-O3', '-g', '-UNDEBUG', '-std=c++14', '-march=armv8-a', + '-fstack-protector-all', + '-Iinclude', '-Wall', '-Wextra', '-Werror', + ], +) + +cc_test( + name = "unittest-sse", + srcs = glob([ + "tests/*.h", + "tests/*.cpp", + "include/sonic/*", + "include/sonic/**/*", + ]), + deps = [ + ":string_view", + "@gtest//:gtest_main", + ], + data = glob([ "testdata/*.json"]), + linkopts = sanitize_copts + [ + '-lstdc++fs', + '-fstack-protector-all', + '-fsanitize-link-c++-runtime' + ], + copts = sanitize_copts + [ + '-O3', '-g', '-UNDEBUG', '-std=c++14', '-march=westmere', + '-fstack-protector-all', + '-Iinclude', '-Wall', '-Wextra', '-Werror', + ], +) + cc_test( name = "unittest-gcc-coverage", srcs = glob([ diff --git a/bazel/cJSON.BUILD b/bazel/cJSON.BUILD index b9eca446..1115e928 100644 --- a/bazel/cJSON.BUILD +++ b/bazel/cJSON.BUILD @@ -3,5 +3,6 @@ cc_library( name = "cJSON", srcs = ["cJSON.c"], hdrs = ["cJSON.h"], - copts = ['-O3' ,'-DNDEBUG', '-march=haswell'], + copts = ['-O3' ,'-DNDEBUG',], ) + diff --git a/bazel/yyjson.BUILD b/bazel/yyjson.BUILD index 2b499f3f..3e812cb6 100644 --- a/bazel/yyjson.BUILD +++ b/bazel/yyjson.BUILD @@ -4,5 +4,6 @@ cc_library( srcs = ["src/yyjson.c"], hdrs = ["src/yyjson.h"], includes = ["src"], - copts = ['-O3', '-DNDEBUG', '-march=haswell', '-g'], + copts = ['-O3', '-DNDEBUG', '-g'], ) + diff --git a/include/sonic/allocator.h b/include/sonic/allocator.h index eb8230da..d27e2d15 100644 --- a/include/sonic/allocator.h +++ b/include/sonic/allocator.h @@ -89,7 +89,12 @@ class SpinLock { break; } while (lock_.load(std::memory_order_relaxed)) { +#if defined(__x86_64__) || defined(_M_AMD64) __builtin_ia32_pause(); +#elif defined(__aarch64__) || defined(_M_ARM64) + asm volatile("yield"); +#else +#endif } } } diff --git a/include/sonic/dom/dynamicnode.h b/include/sonic/dom/dynamicnode.h index 4c95ca8f..24f73a8e 100644 --- a/include/sonic/dom/dynamicnode.h +++ b/include/sonic/dom/dynamicnode.h @@ -27,8 +27,6 @@ #include "sonic/dom/type.h" #include "sonic/error.h" #include "sonic/internal/ftoa.h" -#include "sonic/internal/itoa.h" -#include "sonic/internal/quote.h" #include "sonic/writebuffer.h" namespace sonic_json { diff --git a/include/sonic/dom/handler.h b/include/sonic/dom/handler.h index 0d0e15cf..478fd20e 100644 --- a/include/sonic/dom/handler.h +++ b/include/sonic/dom/handler.h @@ -19,7 +19,7 @@ #include #include "sonic/dom/type.h" -#include "sonic/internal/haswell.h" +#include "sonic/internal/arch/simd_base.h" #include "sonic/string_view.h" #include "sonic/writebuffer.h" @@ -153,7 +153,7 @@ class SAXHandler { if (pairs) { void *mem = obj.template containerMalloc(pairs, *alloc_); obj.setChildren(mem); - internal::haswell::xmemcpy( + internal::Xmemcpy( (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); } else { obj.setChildren(nullptr); @@ -169,7 +169,7 @@ class SAXHandler { arr.setLength(count, kArray); if (count) { arr.setChildren(arr.template containerMalloc(count, *alloc_)); - internal::haswell::xmemcpy( + internal::Xmemcpy( (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); } else { arr.setChildren(nullptr); @@ -239,7 +239,7 @@ class LazySAXHandler { arr.setLength(count, kArray); if (count) { arr.setChildren(arr.template containerMalloc(count, *alloc_)); - internal::haswell::xmemcpy( + internal::Xmemcpy( (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); stack_.Pop(count); } else { @@ -254,7 +254,7 @@ class LazySAXHandler { if (pairs) { void *mem = obj.template containerMalloc(pairs, *alloc_); obj.setChildren(mem); - internal::haswell::xmemcpy( + internal::Xmemcpy( (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); stack_.Pop(pairs); } else { diff --git a/include/sonic/dom/parser.h b/include/sonic/dom/parser.h index ef8f3f7a..82c0c61a 100644 --- a/include/sonic/dom/parser.h +++ b/include/sonic/dom/parser.h @@ -24,12 +24,12 @@ #include "sonic/dom/handler.h" #include "sonic/dom/json_pointer.h" #include "sonic/error.h" +#include "sonic/internal/arch/simd_quote.h" +#include "sonic/internal/arch/simd_skip.h" +#include "sonic/internal/arch/simd_str2int.h" #include "sonic/internal/atof_native.h" -#include "sonic/internal/haswell.h" #include "sonic/internal/parse_number_normal_fast.h" -#include "sonic/internal/simd_str2int.h" -#include "sonic/internal/skip.h" -#include "sonic/internal/unicode.h" +#include "sonic/internal/utils.h" #include "sonic/writebuffer.h" namespace sonic_json { @@ -352,24 +352,27 @@ class Parser { double_fract : { int fract_len = FLOATING_LONGEST_DIGITS - man_nd; if (fract_len > 0) { - uint64_t sum = internal::simd_str2int_sse(s + i, fract_len); - const uint64_t pow10[17] = {1, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000, - 1000000000, - 10000000000, - 100000000000, - 1000000000000, - 10000000000000, - 100000000000000, - 1000000000000000, - 10000000000000000}; + uint64_t sum = internal::simd_str2int(s + i, fract_len); + const uint64_t pow10[18] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + }; man = man * pow10[fract_len] + sum; man_nd += fract_len; i += fract_len; diff --git a/include/sonic/dom/serialize.h b/include/sonic/dom/serialize.h index 3a93c4da..5c35858f 100644 --- a/include/sonic/dom/serialize.h +++ b/include/sonic/dom/serialize.h @@ -20,9 +20,9 @@ #include "sonic/dom/flags.h" #include "sonic/dom/type.h" #include "sonic/error.h" +#include "sonic/internal/arch/simd_quote.h" #include "sonic/internal/ftoa.h" #include "sonic/internal/itoa.h" -#include "sonic/internal/quote.h" #include "sonic/writebuffer.h" namespace sonic_json { diff --git a/include/sonic/internal/haswell.h b/include/sonic/internal/arch/avx2/base.h similarity index 82% rename from include/sonic/internal/haswell.h rename to include/sonic/internal/arch/avx2/base.h index b24ffdf2..2988a746 100644 --- a/include/sonic/internal/haswell.h +++ b/include/sonic/internal/arch/avx2/base.h @@ -17,12 +17,13 @@ #pragma once -#include "sonic/internal/simd.h" -#include "sonic/macro.h" +#include + +#include "simd.h" namespace sonic_json { namespace internal { -namespace haswell { +namespace avx2 { using namespace simd; @@ -30,7 +31,7 @@ using namespace simd; // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. -sonic_force_inline int trailing_zeroes(uint64_t input_num) { +sonic_force_inline int TrailingZeroes(uint64_t input_num) { //////// // You might expect the next line to be equivalent to // return (int)_tzcnt_u64(input_num); @@ -40,7 +41,7 @@ sonic_force_inline int trailing_zeroes(uint64_t input_num) { } /* result might be undefined when input_num is zero */ -sonic_force_inline uint64_t clear_lowest_bit(uint64_t input_num) { +sonic_force_inline uint64_t ClearLowestBit(uint64_t input_num) { #if __BMI__ return _blsr_u64(input_num); #else @@ -49,21 +50,21 @@ sonic_force_inline uint64_t clear_lowest_bit(uint64_t input_num) { } /* result might be undefined when input_num is zero */ -sonic_force_inline int leading_zeroes(uint64_t input_num) { +sonic_force_inline int LeadingZeroes(uint64_t input_num) { return __builtin_clzll(input_num); } -sonic_force_inline long long int count_ones(uint64_t input_num) { +sonic_force_inline long long int CountOnes(uint64_t input_num) { return __builtin_popcountll(input_num); } -sonic_force_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t* result) { +sonic_force_inline bool AddOverflow(uint64_t value1, uint64_t value2, + uint64_t* result) { return __builtin_uaddll_overflow( value1, value2, reinterpret_cast(result)); } -sonic_force_inline uint64_t prefix_xor(const uint64_t bitmask) { +sonic_force_inline uint64_t PrefixXor(const uint64_t bitmask) { // There should be no such thing with a processor supporting avx2 // but not clmul. #if __PCLMUL__ @@ -77,17 +78,17 @@ sonic_force_inline uint64_t prefix_xor(const uint64_t bitmask) { #endif } -sonic_force_inline bool is_ascii(const simd8x64& input) { +sonic_force_inline bool IsAscii(const simd8x64& input) { return input.reduce_or().is_ascii(); } template -sonic_force_inline void xmemcpy(void* dst_, const void* src_, size_t chunks) { +sonic_force_inline void Xmemcpy(void* dst_, const void* src_, size_t chunks) { std::memcpy(dst_, src_, chunks * ChunkSize); } template <> -sonic_force_inline void xmemcpy<32>(void* dst_, const void* src_, +sonic_force_inline void Xmemcpy<32>(void* dst_, const void* src_, size_t chunks) { uint8_t* dst = reinterpret_cast(dst_); const uint8_t* src = reinterpret_cast(src_); @@ -121,7 +122,7 @@ sonic_force_inline void xmemcpy<32>(void* dst_, const void* src_, } template <> -sonic_force_inline void xmemcpy<16>(void* dst_, const void* src_, +sonic_force_inline void Xmemcpy<16>(void* dst_, const void* src_, size_t chunks) { uint8_t* dst = reinterpret_cast(dst_); const uint8_t* src = reinterpret_cast(src_); @@ -160,6 +161,6 @@ sonic_force_inline void xmemcpy<16>(void* dst_, const void* src_, } } -} // namespace haswell +} // namespace avx2 } // namespace internal } // namespace sonic_json diff --git a/include/sonic/internal/arch/avx2/itoa.h b/include/sonic/internal/arch/avx2/itoa.h new file mode 100644 index 00000000..d608bf5c --- /dev/null +++ b/include/sonic/internal/arch/avx2/itoa.h @@ -0,0 +1,31 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../common/x86_common/itoa.h" + +namespace sonic_json { +namespace internal { +namespace avx2 { + +using sonic_json::internal::x86_common::Utoa_16; +using sonic_json::internal::x86_common::Utoa_8; +using sonic_json::internal::x86_common::UtoaSSE; + +} // namespace avx2 +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/avx2/quote.h b/include/sonic/internal/arch/avx2/quote.h new file mode 100644 index 00000000..cc383c08 --- /dev/null +++ b/include/sonic/internal/arch/avx2/quote.h @@ -0,0 +1,63 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "../common/quote_common.h" +#include "../common/quote_tables.h" +#include "base.h" +#include "simd.h" +#include "unicode.h" + +#ifndef VEC_FULL_MASK +#define VEC_FULL_MASK 0xFFFFFFFF +#endif + +namespace sonic_json { +namespace internal { +namespace x86_common { + +using StringBlock = avx2::StringBlock; +using VecType = simd::simd256; + +static sonic_force_inline int CopyAndGetEscapMask(const char *src, char *dst) { + avx2::simd256 v(reinterpret_cast(src)); + v.store(reinterpret_cast(dst)); + return ((v < '\x20') | (v == '\\') | (v == '"')).to_bitmask(); +} + +} // namespace x86_common +} // namespace internal +} // namespace sonic_json + +#include "../common/x86_common/quote.h" + +namespace sonic_json { +namespace internal { +namespace avx2 { + +using sonic_json::internal::x86_common::parseStringInplace; +using sonic_json::internal::x86_common::Quote; + +} // namespace avx2 +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/simd.h b/include/sonic/internal/arch/avx2/simd.h similarity index 99% rename from include/sonic/internal/simd.h rename to include/sonic/internal/arch/avx2/simd.h index 2df3f5b5..7feb29b6 100644 --- a/include/sonic/internal/simd.h +++ b/include/sonic/internal/arch/avx2/simd.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include @@ -25,7 +26,9 @@ #error "AVX2 instruction set required. Missing option -mavx2 ?" #endif -#include "sonic/macro.h" +#ifndef VEC_LEN +#define VEC_LEN 32 +#endif namespace sonic_json { namespace internal { @@ -108,7 +111,7 @@ struct base128 { return Child(_mm_set1_epi8(_value)); } static sonic_force_inline Child repeat_16(REPEAT16_ARGS(T)) { - return Child(REPEAT16_ARGS(), REPEAT16_ARGS()); + return Child(REPEAT16_ARGS()); } template sonic_force_inline Child prev(const Child prev_chunk) const { diff --git a/include/sonic/internal/arch/avx2/skip.h b/include/sonic/internal/arch/avx2/skip.h new file mode 100644 index 00000000..15aa24e2 --- /dev/null +++ b/include/sonic/internal/arch/avx2/skip.h @@ -0,0 +1,62 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "base.h" +#include "quote.h" +#include "simd.h" +#include "unicode.h" + +namespace sonic_json { +namespace internal { +namespace x86_common { + +using avx2::GetEscapedBranchless; +using avx2::GetNonSpaceBits; +using VecUint8Type = simd::simd256; +using VecBoolType = simd::simd256; + +} // namespace x86_common +} // namespace internal +} // namespace sonic_json + +#include "../common/x86_common/skip.h" + +namespace sonic_json { +namespace internal { +namespace avx2 { + +using sonic_json::internal::common::EqBytes4; +using sonic_json::internal::common::SkipLiteral; +using sonic_json::internal::x86_common::GetNextToken; +using sonic_json::internal::x86_common::GetStringBits; +using sonic_json::internal::x86_common::SkipArray; +using sonic_json::internal::x86_common::SkipContainer; +using sonic_json::internal::x86_common::SkipNumber; +using sonic_json::internal::x86_common::SkipObject; +using sonic_json::internal::x86_common::SkipScanner; +using sonic_json::internal::x86_common::SkipString; + +} // namespace avx2 +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/avx2/str2int.h b/include/sonic/internal/arch/avx2/str2int.h new file mode 100644 index 00000000..976ac5c9 --- /dev/null +++ b/include/sonic/internal/arch/avx2/str2int.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +#include + +#include "../sse/str2int.h" + +namespace sonic_json { +namespace internal { +namespace avx2 { + +using sse::simd_str2int; + +} // namespace avx2 +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/avx2/unicode.h b/include/sonic/internal/arch/avx2/unicode.h new file mode 100644 index 00000000..a1459e38 --- /dev/null +++ b/include/sonic/internal/arch/avx2/unicode.h @@ -0,0 +1,94 @@ +// Copyright 2018-2019 The simdjson authors + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file may have been modified by ByteDance authors. All ByteDance +// Modifications are Copyright 2022 ByteDance Authors. + +#pragma once + +#include + +#include +#include + +#include "../common/unicode_common.h" +#include "base.h" +#include "simd.h" + +namespace sonic_json { +namespace internal { +namespace avx2 { + +using namespace simd; +using sonic_json::internal::common::handle_unicode_codepoint; + +struct StringBlock { + public: + sonic_force_inline static StringBlock Find(const uint8_t *src); + sonic_force_inline bool HasQuoteFirst() { + return (((bs_bits - 1) & quote_bits) != 0) && !HasUnescaped(); + } + sonic_force_inline bool HasBackslash() { + return ((quote_bits - 1) & bs_bits) != 0; + } + sonic_force_inline bool HasUnescaped() { + return ((quote_bits - 1) & unescaped_bits) != 0; + } + sonic_force_inline int QuoteIndex() { return TrailingZeroes(quote_bits); } + sonic_force_inline int BsIndex() { return TrailingZeroes(bs_bits); } + sonic_force_inline int UnescapedIndex() { + return TrailingZeroes(unescaped_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; + uint32_t unescaped_bits; +}; + +sonic_force_inline StringBlock StringBlock::Find(const uint8_t *src) { + simd256 v(src); + return { + static_cast((v == '\\').to_bitmask()), + static_cast((v == '"').to_bitmask()), + static_cast((v <= '\x1f').to_bitmask()), + }; +} + +sonic_force_inline uint64_t GetNonSpaceBits(const uint8_t *data) { + const simd::simd8x64 v(data); + const auto whitespace_table = + simd256::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, + '\t', '\n', 112, 100, '\r', 100, 100); + + uint64_t space = v.eq({_mm256_shuffle_epi8(whitespace_table, v.chunks[0]), + _mm256_shuffle_epi8(whitespace_table, v.chunks[1])}); + return ~space; +} + +sonic_force_inline uint64_t GetEscapedBranchless(uint64_t &prev_escaped, + uint64_t backslash) { + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = AddOverflow(odd_sequence_starts, backslash, + &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; + return (even_bits ^ invert_mask) & follows_escape; +} + +} // namespace avx2 +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/common/quote_common.h b/include/sonic/internal/arch/common/quote_common.h new file mode 100644 index 00000000..b6258f0c --- /dev/null +++ b/include/sonic/internal/arch/common/quote_common.h @@ -0,0 +1,80 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "quote_tables.h" + +// Not check the buffer size of dst, src must be a valid UTF-8 string with +// null-terminator. +#define MOVE_N_CHARS(src, N) \ + { \ + (src) += (N); \ + nb -= (N); \ + dst += (N); \ + } + +#ifdef __GNUC__ +#if defined(__SANITIZE_THREAD__) || defined(__SANITIZE_ADDRESS__) || \ + defined(__SANITIZE_LEAK__) || defined(__SANITIZE_UNDEFINED__) +#ifndef SONIC_USE_SANITIZE +#define SONIC_USE_SANITIZE +#endif +#endif +#endif + +#if defined(__clang__) +#if defined(__has_feature) +#if __has_feature(address_sanitizer) || __has_feature(thread_sanitizer) || \ + __has_feature(memory_sanitizer) || \ + __has_feature(undefined_behavior_sanitizer) || \ + __has_feature(leak_sanitizer) +#ifndef SONIC_USE_SANITIZE +#define SONIC_USE_SANITIZE +#endif +#endif +#endif +#endif + +namespace sonic_json { +namespace internal { + +static sonic_force_inline uint8_t GetEscapeMask4(const char *src) { + return kNeedEscaped[*(uint8_t *)(src)] | + (kNeedEscaped[*(uint8_t *)(src + 1)] << 1) | + (kNeedEscaped[*(uint8_t *)(src + 2)] << 2) | + (kNeedEscaped[*(uint8_t *)(src + 3)] << 3); +} + +sonic_static_inline void DoEscape(const char *&src, char *&dst, size_t &nb) { + /* get the escape entry, handle consecutive quotes */ + do { + uint8_t ch = *(uint8_t *)src; + int nc = kQuoteTab[ch].n; + std::memcpy(dst, kQuoteTab[ch].s, 8); + src++; + nb--; + dst += nc; + if (nb <= 0) return; + /* copy and find escape chars */ + if (kNeedEscaped[*(uint8_t *)(src)] == 0) { + return; + } + } while (true); +} + +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/common/quote_tables.h b/include/sonic/internal/arch/common/quote_tables.h new file mode 100644 index 00000000..6d7543a4 --- /dev/null +++ b/include/sonic/internal/arch/common/quote_tables.h @@ -0,0 +1,179 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace sonic_json { +namespace internal { + +// kEscapedMap maps the escaped char into origin char, as follows: +// ['/' ] = '/', +// ['"' ] = '"', +// ['b' ] = '\b', +// ['f' ] = '\f', +// ['n' ] = '\n', +// ['r' ] = '\r', +// ['t' ] = '\t', +// ['u' ] = -1, +// ['\\'] = '\\', +static const uint8_t kEscapedMap[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/', + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\\', 0, 0, 0, + 0, 0, '\b', 0, 0, 0, '\f', 0, 0, 0, 0, 0, 0, 0, '\n', 0, + 0, 0, '\r', 0, '\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// GCC didn't support non-trivial designated initializers C99 extension +struct QuotedChar { + long n; + const char *s; +}; + +static const struct QuotedChar kQuoteTab[256] = { + // 0x00 ~ 0x1f + {.n = 6, .s = "\\u0000\0\0"}, + {.n = 6, .s = "\\u0001\0\0"}, + {.n = 6, .s = "\\u0002\0\0"}, + {.n = 6, .s = "\\u0003\0\0"}, + {.n = 6, .s = "\\u0004\0\0"}, + {.n = 6, .s = "\\u0005\0\0"}, + {.n = 6, .s = "\\u0006\0\0"}, + {.n = 6, .s = "\\u0007\0\0"}, + {.n = 2, .s = "\\b\0\0\0\0\0\0"}, + {.n = 2, .s = "\\t\0\0\0\0\0\0"}, + {.n = 2, .s = "\\n\0\0\0\0\0\0"}, + {.n = 6, .s = "\\u000b\0\0"}, + {.n = 2, .s = "\\f\0\0\0\0\0\0"}, + {.n = 2, .s = "\\r\0\0\0\0\0\0"}, + {.n = 6, .s = "\\u000e\0\0"}, + {.n = 6, .s = "\\u000f\0\0"}, + {.n = 6, .s = "\\u0010\0\0"}, + {.n = 6, .s = "\\u0011\0\0"}, + {.n = 6, .s = "\\u0012\0\0"}, + {.n = 6, .s = "\\u0013\0\0"}, + {.n = 6, .s = "\\u0014\0\0"}, + {.n = 6, .s = "\\u0015\0\0"}, + {.n = 6, .s = "\\u0016\0\0"}, + {.n = 6, .s = "\\u0017\0\0"}, + {.n = 6, .s = "\\u0018\0\0"}, + {.n = 6, .s = "\\u0019\0\0"}, + {.n = 6, .s = "\\u001a\0\0"}, + {.n = 6, .s = "\\u001b\0\0"}, + {.n = 6, .s = "\\u001c\0\0"}, + {.n = 6, .s = "\\u001d\0\0"}, + {.n = 6, .s = "\\u001e\0\0"}, + {.n = 6, .s = "\\u001f\0\0"}, + // 0x20 ~ 0x2f + {0, 0}, + {0, 0}, + {.n = 2, .s = "\\\"\0\0\0\0\0\0"}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + // 0x30 ~ 0x4f + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + // 0x50 ~ 0x5f + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {.n = 2, .s = "\\\\\0\0\0\0\0\0"}, + {0, 0}, + {0, 0}, + {0, 0}, + // 0x60 ~ 0xff +}; + +static const bool kNeedEscaped[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/common/skip_common.h b/include/sonic/internal/arch/common/skip_common.h new file mode 100644 index 00000000..81fe4efd --- /dev/null +++ b/include/sonic/internal/arch/common/skip_common.h @@ -0,0 +1,65 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "sonic/macro.h" + +namespace sonic_json { +namespace internal { +namespace common { + +static sonic_force_inline bool EqBytes4(const uint8_t *src, uint32_t target) { + uint32_t val; + static_assert(sizeof(uint32_t) <= SONICJSON_PADDING, + "SONICJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&val, src, sizeof(uint32_t)); + return val == target; +} + +sonic_force_inline bool SkipLiteral(const uint8_t *data, size_t &pos, + size_t len, uint8_t token) { + static constexpr uint32_t kNullBin = 0x6c6c756e; + static constexpr uint32_t kTrueBin = 0x65757274; + static constexpr uint32_t kFalseBin = + 0x65736c61; // the binary of 'alse' in false + auto start = data + pos - 1; + auto end = data + len; + switch (token) { + case 't': + if (start + 4 <= end && EqBytes4(start, kTrueBin)) { + pos += 3; + return true; + }; + break; + case 'n': + if (start + 4 <= end && EqBytes4(start, kNullBin)) { + pos += 3; + return true; + }; + break; + case 'f': + if (start + 5 <= end && EqBytes4(start + 1, kFalseBin)) { + pos += 4; + return true; + } + } + return false; +} + +} // namespace common +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/unicode.h b/include/sonic/internal/arch/common/unicode_common.h similarity index 86% rename from include/sonic/internal/unicode.h rename to include/sonic/internal/arch/common/unicode_common.h index 6ded5cde..708347a6 100644 --- a/include/sonic/internal/unicode.h +++ b/include/sonic/internal/arch/common/unicode_common.h @@ -16,18 +16,10 @@ // Modifications are Copyright 2022 ByteDance Authors. #pragma once -#include -#include - -#include "sonic/internal/haswell.h" -#include "sonic/internal/simd.h" -#include "sonic/macro.h" namespace sonic_json { namespace internal { - -using namespace simd; -using namespace haswell; +namespace common { static const uint32_t digit_to_val32[886] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, @@ -274,63 +266,6 @@ sonic_force_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, return offset > 0; } -struct StringBlock { - public: - sonic_force_inline static StringBlock Find(const uint8_t *src); - sonic_force_inline bool HasQuoteFirst() { - return (((bs_bits - 1) & quote_bits) != 0) && !HasUnescaped(); - } - sonic_force_inline bool HasBackslash() { - return ((quote_bits - 1) & bs_bits) != 0; - } - sonic_force_inline bool HasUnescaped() { - return ((quote_bits - 1) & unescaped_bits) != 0; - } - sonic_force_inline int QuoteIndex() { - return haswell::trailing_zeroes(quote_bits); - } - sonic_force_inline int BsIndex() { return haswell::trailing_zeroes(bs_bits); } - sonic_force_inline int UnescapedIndex() { - return haswell::trailing_zeroes(unescaped_bits); - } - - uint32_t bs_bits; - uint32_t quote_bits; - uint32_t unescaped_bits; -}; - -sonic_force_inline StringBlock StringBlock::Find(const uint8_t *src) { - simd256 v(src); - return { - static_cast((v == '\\').to_bitmask()), - static_cast((v == '"').to_bitmask()), - static_cast((v <= '\x1f').to_bitmask()), - }; -} - -sonic_force_inline uint64_t GetNonSpaceBits(const uint8_t *data) { - const simd::simd8x64 v(data); - const auto whitespace_table = - simd256::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, - '\t', '\n', 112, 100, '\r', 100, 100); - - uint64_t space = v.eq({_mm256_shuffle_epi8(whitespace_table, v.chunks[0]), - _mm256_shuffle_epi8(whitespace_table, v.chunks[1])}); - return ~space; -} - -sonic_force_inline uint64_t GetEscapedBranchless(uint64_t &prev_escaped, - uint64_t backslash) { - backslash &= ~prev_escaped; - uint64_t follows_escape = backslash << 1 | prev_escaped; - const uint64_t even_bits = 0x5555555555555555ULL; - uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; - uint64_t sequences_starting_on_even_bits; - prev_escaped = add_overflow(odd_sequence_starts, backslash, - &sequences_starting_on_even_bits); - uint64_t invert_mask = sequences_starting_on_even_bits << 1; - return (even_bits ^ invert_mask) & follows_escape; -} - +} // namespace common } // namespace internal -} // namespace sonic_json \ No newline at end of file +} // namespace sonic_json diff --git a/include/sonic/internal/arch/common/x86_common/itoa.h b/include/sonic/internal/arch/common/x86_common/itoa.h new file mode 100644 index 00000000..5a8b70e2 --- /dev/null +++ b/include/sonic/internal/arch/common/x86_common/itoa.h @@ -0,0 +1,128 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace sonic_json { + +namespace internal { + +namespace x86_common { + +#define as_m128p(v) ((__m128i *)(v)) +#define as_m128c(v) ((const __m128i *)(v)) +#define as_m128v(v) (*(const __m128i *)(v)) +#define as_uint64v(p) (*(uint64_t *)(p)) + +static const char kVec16xAsc0[16] sonic_align(16) = { + '0', '0', '0', '0', '0', '0', '0', '0', + '0', '0', '0', '0', '0', '0', '0', '0', +}; + +static const uint16_t kVec8x10[8] sonic_align(16) = { + 10, 10, 10, 10, 10, 10, 10, 10, +}; + +static const uint32_t kVec4x10k[4] sonic_align(16) = { + 10000, + 10000, + 10000, + 10000, +}; + +static const uint32_t kVec4xDiv10k[4] sonic_align(16) = { + 0xd1b71759, + 0xd1b71759, + 0xd1b71759, + 0xd1b71759, +}; + +static const uint16_t kVecDivPowers[8] sonic_align(16) = { + 0x20c5, 0x147b, 0x3334, 0x8000, 0x20c5, 0x147b, 0x3334, 0x8000, +}; + +static const uint16_t kVecShiftPowers[8] sonic_align(16) = { + 0x0080, 0x0800, 0x2000, 0x8000, 0x0080, 0x0800, 0x2000, 0x8000, +}; + +// Convert num's each digit as packed 16-bit in a vector. +// num's digits as abcdefgh (high bits is 0 if not enough) +// The converted vector is { a, b, c, d, e, f, g, h } +sonic_force_inline __m128i UtoaSSE(uint32_t num) { + // num(abcdefgh) -> v04 = vector{abcd, efgh, 0, 0, 0, 0, 0, 0} + __m128i v00 = _mm_cvtsi32_si128(num); + __m128i v01 = _mm_mul_epu32(v00, as_m128v(kVec4xDiv10k)); + __m128i v02 = _mm_srli_epi64(v01, 45); + __m128i v03 = _mm_mul_epu32(v02, as_m128v(kVec4x10k)); + __m128i v04 = _mm_sub_epi32(v00, v03); + __m128i v05 = _mm_unpacklo_epi16(v02, v04); + + // v08 = vector{abcd * 4, abcd * 4, abcd * 4, abcd * 4, efgh * 4, efgh * 4, + // efgh * 4, efgh * 4} + __m128i v06 = _mm_slli_epi64(v05, 2); + __m128i v07 = _mm_unpacklo_epi16(v06, v06); + __m128i v08 = _mm_unpacklo_epi32(v07, v07); + + // v10 = { a, ab, abc, abcd, e, ef, efg, efgh } + __m128i v09 = _mm_mulhi_epu16(v08, as_m128v(kVecDivPowers)); + __m128i v10 = _mm_mulhi_epu16(v09, as_m128v(kVecShiftPowers)); + + // v12 = { 0, a0, ab0, abc0, 0, e0, ef0, efg0 } + __m128i v11 = _mm_mullo_epi16(v10, as_m128v(kVec8x10)); + __m128i v12 = _mm_slli_epi64(v11, 16); + + // v13 = { a, b, c, d, e, f, g, h } + __m128i v13 = _mm_sub_epi16(v10, v12); + return v13; +} + +static sonic_force_inline char *Utoa_8(uint32_t val, char *out) { + /* convert to digits */ + __m128i v0 = UtoaSSE(val); + __m128i v1 = _mm_setzero_si128(); + + /* convert to bytes, add '0' */ + __m128i v2 = _mm_packus_epi16(v0, v1); + __m128i v3 = _mm_add_epi8(v2, as_m128v(kVec16xAsc0)); + + /* store high 64 bits */ + _mm_storeu_si128(as_m128p(out), v3); + return out + 8; +} + +static sonic_force_inline char *Utoa_16(uint64_t val, char *out) { + /* remaining digits */ + __m128i v0 = UtoaSSE((uint32_t)(val / 100000000)); + __m128i v1 = UtoaSSE((uint32_t)(val % 100000000)); + __m128i v2 = _mm_packus_epi16(v0, v1); + __m128i v3 = _mm_add_epi8(v2, as_m128v(kVec16xAsc0)); + + /* convert to bytes, add '0' */ + _mm_storeu_si128(as_m128p(out), v3); + return out + 16; +} + +} // namespace x86_common + +} // namespace internal + +} // namespace sonic_json diff --git a/include/sonic/internal/arch/common/x86_common/quote.h b/include/sonic/internal/arch/common/x86_common/quote.h new file mode 100644 index 00000000..a49e764b --- /dev/null +++ b/include/sonic/internal/arch/common/x86_common/quote.h @@ -0,0 +1,189 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../unicode_common.h" + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#if !defined(VEC_LEN) || !defined(VEC_FULL_MASK) +#error "You should define VEC macros before including quote.h" +#endif + +// sse macros +// #define VEC_LEN 16 +// #define VEC_FULL_MASK 0xFFFF + +// avx2 macros +// #define VEC_LEN 32 +// #define VEC_FULL_MASK 0xFFFFFFFF + +namespace sonic_json { +namespace internal { +namespace x86_common { + +using common::handle_unicode_codepoint; + +sonic_force_inline size_t parseStringInplace(uint8_t *&src, SonicError &err) { +#define SONIC_REPEAT8(v) {v v v v v v v v} + + uint8_t *dst = src; + uint8_t *sdst = src; + while (1) { + find: + auto block = StringBlock::Find(src); + if (block.HasQuoteFirst()) { + int idx = block.QuoteIndex(); + src += idx; + *src++ = '\0'; + return src - sdst - 1; + } + if (block.HasUnescaped()) { + err = kParseErrorUnEscaped; + return 0; + } + if (!block.HasBackslash()) { + src += VEC_LEN; + goto find; + } + + /* find out where the backspace is */ + auto bs_dist = block.BsIndex(); + src += bs_dist; + dst = src; + cont: + uint8_t escape_char = src[1]; + if (sonic_unlikely(escape_char == 'u')) { + if (!handle_unicode_codepoint(const_cast(&src), &dst)) { + err = kParseErrorEscapedUnicode; + return 0; + } + } else { + *dst = kEscapedMap[escape_char]; + if (sonic_unlikely(*dst == 0u)) { + err = kParseErrorEscapedFormat; + return 0; + } + src += 2; + dst += 1; + } + // fast path for continous escaped chars + if (*src == '\\') { + bs_dist = 0; + goto cont; + } + + find_and_move: + // Copy the next n bytes, and find the backslash and quote in them. + VecType v(src); + block = StringBlock{ + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + static_cast((v <= '\x1f').to_bitmask()), + }; + // If the next thing is the end quote, copy and return + if (block.HasQuoteFirst()) { + // we encountered quotes first. Move dst to point to quotes and exit + while (1) { + SONIC_REPEAT8(if (sonic_unlikely(*src == '"')) break; + else { *dst++ = *src++; }); + } + *dst = '\0'; + src++; + return dst - sdst; + } + if (block.HasUnescaped()) { + err = kParseErrorUnEscaped; + return 0; + } + if (!block.HasBackslash()) { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + v.store(dst); + src += VEC_LEN; + dst += VEC_LEN; + goto find_and_move; + } + while (1) { + SONIC_REPEAT8(if (sonic_unlikely(*src == '\\')) break; + else { *dst++ = *src++; }); + } + goto cont; + } + sonic_assert(false); +#undef SONIC_REPEAT8 +} + +// forward declartion +static sonic_force_inline int CopyAndGetEscapMask(const char *src, char *dst); + +sonic_static_inline char *Quote(const char *src, size_t nb, char *dst) { + *dst++ = '"'; + sonic_assert(nb < (1ULL << 32)); + uint32_t mm; + int cn; + + /* VEC_LEN-byte loop */ + while (nb >= VEC_LEN) { + /* check for matches */ + // TODO: optimize: exploit the simd bitmask in the escape block. + if ((mm = CopyAndGetEscapMask(src, dst)) != 0) { + cn = __builtin_ctz(mm); + MOVE_N_CHARS(src, cn); + DoEscape(src, dst, nb); + } else { + /* move to next block */ + MOVE_N_CHARS(src, VEC_LEN); + } + } + + if (nb > 0) { + char tmp_src[VEC_LEN * 2]; + const char *src_r; +#ifdef SONIC_USE_SANITIZE + if (0) { +#else + /* This code would cause address sanitizer report heap-buffer-overflow. */ + if (((size_t)(src) & (PAGE_SIZE - 1)) <= (PAGE_SIZE - VEC_LEN * 2)) { + src_r = src; +#endif + } else { + std::memcpy(tmp_src, src, nb); + src_r = tmp_src; + } + while (nb > 0) { + mm = CopyAndGetEscapMask(src_r, dst) & (VEC_FULL_MASK >> (VEC_LEN - nb)); + if (mm) { + cn = __builtin_ctz(mm); + MOVE_N_CHARS(src_r, cn); + DoEscape(src_r, dst, nb); + } else { + dst += nb; + nb = 0; + } + } + } + + *dst++ = '"'; + return dst; +} + +} // namespace x86_common +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/skip.h b/include/sonic/internal/arch/common/x86_common/skip.h similarity index 80% rename from include/sonic/internal/skip.h rename to include/sonic/internal/arch/common/x86_common/skip.h index e41ef98f..2a5ab859 100644 --- a/include/sonic/internal/skip.h +++ b/include/sonic/internal/arch/common/x86_common/skip.h @@ -15,32 +15,25 @@ */ #pragma once -#include -#include "sonic/dom/json_pointer.h" -#include "sonic/error.h" -#include "sonic/internal/haswell.h" -#include "sonic/internal/quote.h" -#include "sonic/internal/simd.h" -#include "sonic/internal/unicode.h" -#include "sonic/macro.h" +#include "../skip_common.h" -namespace sonic_json { -namespace internal { +#ifndef VEC_LEN +#error "You should define VEC macros before including skip.h" +#endif -using namespace haswell; +// sse macros +// #define VEC_LEN 16 -static sonic_force_inline bool EqBytes4(const uint8_t *src, uint32_t target) { - uint32_t val; - static_assert(sizeof(uint32_t) <= SONICJSON_PADDING, - "SONICJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&val, src, sizeof(uint32_t)); - return val == target; -} +// avx2 macros +// #define VEC_LEN 32 -static sonic_force_inline bool IsSpace(uint8_t ch) { - return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t'; -} +namespace sonic_json { +namespace internal { +namespace x86_common { + +using sonic_json::internal::common::EqBytes4; +using sonic_json::internal::common::SkipLiteral; sonic_force_inline uint64_t GetStringBits(const uint8_t *data, uint64_t &prev_instring, @@ -55,7 +48,7 @@ sonic_force_inline uint64_t GetStringBits(const uint8_t *data, prev_escaped = 0; } uint64_t quote_bits = v.eq('"') & ~escaped; - uint64_t in_string = prefix_xor(quote_bits) ^ prev_instring; + uint64_t in_string = PrefixXor(quote_bits) ^ prev_instring; prev_instring = uint64_t(static_cast(in_string) >> 63); return in_string; } @@ -65,18 +58,18 @@ sonic_force_inline uint64_t GetStringBits(const uint8_t *data, template sonic_force_inline uint8_t GetNextToken(const uint8_t *data, size_t &pos, size_t len, const char (&tokens)[N]) { - while (pos + 32 <= len) { - simd256 v(data + pos); - simd256 vor(false); + while (pos + VEC_LEN <= len) { + VecUint8Type v(data + pos); + VecBoolType vor(false); for (size_t i = 0; i < N - 1; i++) { vor |= (v == (uint8_t)(tokens[i])); } uint32_t next = static_cast(vor.to_bitmask()); if (next) { - pos += trailing_zeroes(next); + pos += TrailingZeroes(next); return data[pos]; } - pos += 32; + pos += VEC_LEN; } while (pos < len) { for (size_t i = 0; i < N - 1; i++) { @@ -98,12 +91,12 @@ sonic_force_inline int SkipString(const uint8_t *data, size_t &pos, uint64_t quote_bits; uint64_t escaped, bs_bits, prev_escaped = 0; bool found = false; - while (pos + 32 <= len) { - const simd::simd256 v(data + pos); - bs_bits = static_cast((v == '\\').to_bitmask()); - quote_bits = static_cast((v == '"').to_bitmask()); + while (pos + VEC_LEN <= len) { + const VecUint8Type v(data + pos); + bs_bits = static_cast((v == '\\').to_bitmask()); + quote_bits = static_cast((v == '"').to_bitmask()); if (((bs_bits - 1) & quote_bits) != 0) { - pos += trailing_zeroes(quote_bits) + 1; + pos += TrailingZeroes(quote_bits) + 1; return found ? kEscaped : kNormal; } if (bs_bits) { @@ -111,11 +104,11 @@ sonic_force_inline int SkipString(const uint8_t *data, size_t &pos, found = true; quote_bits &= ~escaped; if (quote_bits) { - pos += trailing_zeroes(quote_bits) + 1; + pos += TrailingZeroes(quote_bits) + 1; return kEscaped; } } - pos += 32; + pos += VEC_LEN; } while (pos < len) { if (data[pos] == '\\') { @@ -139,26 +132,26 @@ sonic_force_inline bool SkipContainer(const uint8_t *data, size_t &pos, const uint8_t *p; while (pos + 64 <= len) { p = data + pos; -#define SKIP_LOOP() \ - { \ - instring = GetStringBits(p, prev_instring, prev_escaped); \ - simd::simd8x64 v(p); \ - last_lbrace_num = lbrace_num; \ - uint64_t rbrace = v.eq(right) & ~instring; \ - uint64_t lbrace = v.eq(left) & ~instring; \ - /* traverse each '}' */ \ - while (rbrace > 0) { \ - rbrace_num++; \ - lbrace_num = last_lbrace_num + count_ones((rbrace - 1) & lbrace); \ - bool is_closed = lbrace_num < rbrace_num; \ - if (is_closed) { \ - sonic_assert(rbrace_num == lbrace_num + 1); \ - pos += trailing_zeroes(rbrace) + 1; \ - return true; \ - } \ - rbrace &= (rbrace - 1); \ - } \ - lbrace_num = last_lbrace_num + count_ones(lbrace); \ +#define SKIP_LOOP() \ + { \ + instring = GetStringBits(p, prev_instring, prev_escaped); \ + simd::simd8x64 v(p); \ + last_lbrace_num = lbrace_num; \ + uint64_t rbrace = v.eq(right) & ~instring; \ + uint64_t lbrace = v.eq(left) & ~instring; \ + /* traverse each '}' */ \ + while (rbrace > 0) { \ + rbrace_num++; \ + lbrace_num = last_lbrace_num + CountOnes((rbrace - 1) & lbrace); \ + bool is_closed = lbrace_num < rbrace_num; \ + if (is_closed) { \ + sonic_assert(rbrace_num == lbrace_num + 1); \ + pos += TrailingZeroes(rbrace) + 1; \ + return true; \ + } \ + rbrace &= (rbrace - 1); \ + } \ + lbrace_num = last_lbrace_num + CountOnes(lbrace); \ } SKIP_LOOP(); pos += 64; @@ -167,6 +160,7 @@ sonic_force_inline bool SkipContainer(const uint8_t *data, size_t &pos, std::memcpy(buf, data + pos, len - pos); p = buf; SKIP_LOOP(); +#undef SKIP_LOOP return false; } @@ -180,36 +174,6 @@ sonic_force_inline bool SkipObject(const uint8_t *data, size_t &pos, return SkipContainer(data, pos, len, '{', '}'); } -sonic_force_inline bool SkipLiteral(const uint8_t *data, size_t &pos, - size_t len, uint8_t token) { - static constexpr uint32_t kNullBin = 0x6c6c756e; - static constexpr uint32_t kTrueBin = 0x65757274; - static constexpr uint32_t kFalseBin = - 0x65736c61; // the binary of 'alse' in false - auto start = data + pos - 1; - auto end = data + len; - switch (token) { - case 't': - if (start + 4 <= end && EqBytes4(start, kTrueBin)) { - pos += 3; - return true; - }; - break; - case 'n': - if (start + 4 <= end && EqBytes4(start, kNullBin)) { - pos += 3; - return true; - }; - break; - case 'f': - if (start + 5 <= end && EqBytes4(start + 1, kFalseBin)) { - pos += 4; - return true; - } - } - return false; -} - sonic_force_inline uint8_t SkipNumber(const uint8_t *data, size_t &pos, size_t len) { return GetNextToken(data, pos, len, "]},"); @@ -232,7 +196,7 @@ class SkipScanner { nonspace = GetNonSpaceBits(data + pos); if (nonspace) { nonspace_bits_end_ = pos + 64; - pos += trailing_zeroes(nonspace); + pos += TrailingZeroes(nonspace); nonspace_bits_ = nonspace; return data[pos++]; } else { @@ -253,7 +217,7 @@ class SkipScanner { pos = nonspace_bits_end_; goto found_space; } - pos = block_start + trailing_zeroes(nonspace); + pos = block_start + TrailingZeroes(nonspace); return data[pos++]; } @@ -274,7 +238,7 @@ class SkipScanner { nonspace = GetNonSpaceBits(data + pos); if (nonspace) { nonspace_bits_end_ = pos + 64; - pos += trailing_zeroes(nonspace); + pos += TrailingZeroes(nonspace); nonspace_bits_ = nonspace; return data[pos++]; } else { @@ -296,7 +260,7 @@ class SkipScanner { pos = nonspace_bits_end_; goto found_space; } - pos = block_start + trailing_zeroes(nonspace); + pos = block_start + TrailingZeroes(nonspace); return data[pos++]; } @@ -499,20 +463,6 @@ class SkipScanner { uint64_t nonspace_bits_{0}; }; -template -ParseResult GetOnDemand(StringView json, - const GenericJsonPointer &path, - StringView &target) { - SkipScanner scan; - size_t pos = 0; - long start = scan.GetOnDemand(json, pos, path); - if (start < 0) { - target = ""; - return ParseResult(SonicError(-start), pos - 1); - } - target = StringView(json.data() + start, pos - start); - return ParseResult(kErrorNone, pos); -} - +} // namespace x86_common } // namespace internal } // namespace sonic_json diff --git a/include/sonic/internal/arch/neon/base.h b/include/sonic/internal/arch/neon/base.h new file mode 100644 index 00000000..82ad38ad --- /dev/null +++ b/include/sonic/internal/arch/neon/base.h @@ -0,0 +1,86 @@ +// Copyright 2018-2019 The simdjson authors + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file may have been modified by ByteDance authors. All ByteDance +// Modifications are Copyright 2022 ByteDance Authors. + +#pragma once + +#include + +#include "sonic/macro.h" + +#ifndef VEC_LEN +#define VEC_LEN 16 +#endif + +namespace sonic_json { +namespace internal { +namespace neon { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. + +sonic_force_inline int TrailingZeroes(uint64_t input_num) { + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +sonic_force_inline uint64_t ClearLowestBit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +sonic_force_inline int LeadingZeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +sonic_force_inline long long int CountOnes(uint64_t input_num) { + return __builtin_popcountll(input_num); +} + +sonic_force_inline bool AddOverflow(uint64_t value1, uint64_t value2, + uint64_t* result) { + return __builtin_uaddll_overflow( + value1, value2, reinterpret_cast(result)); +} + +sonic_force_inline uint64_t PrefixXor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +// sonic_force_inline bool IsAscii(const simd8x64& input) { +// return input.reduce_or().is_ascii(); +// } + +template +sonic_force_inline void Xmemcpy(void* dst_, const void* src_, size_t chunks) { + std::memcpy(dst_, src_, chunks * ChunkSize); +} + +} // namespace neon +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/neon/itoa.h b/include/sonic/internal/arch/neon/itoa.h new file mode 100644 index 00000000..65d9e2ae --- /dev/null +++ b/include/sonic/internal/arch/neon/itoa.h @@ -0,0 +1,93 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +namespace sonic_json { +namespace internal { +namespace neon { + +// Convert num {abcd} to {axxxx, abxxxx, abcxxxx, abcdxxxx} +static sonic_force_inline uint16x8_t Utoa_4_helper(uint16_t num) { + uint16_t v = num << 2; + // v00 = vector{abcd * 4, abcd * 4, abcd * 4, abcd * 4} + uint16x4_t v00 = vdup_n_u16(v); + + uint16x4_t kVecDiv = vreinterpret_u16_u64(vdup_n_u64(0x80003334147b20c5)); + uint32x4_t v01 = vmull_u16(v00, kVecDiv); + uint16x4_t v02 = vshrn_n_u32(v01, 16); + uint16x4_t kVecShift = vreinterpret_u16_u64(vdup_n_u64(0x8000200008000080)); + uint32x4_t v03 = vmull_u16(v02, kVecShift); + return vreinterpretq_u16_u32(v03); +} + +// Convert num's each digit as packed 16-bit in a vector. +// num's digits as abcdefgh (high bits is 0 if not enough) +// The converted vector is { a, b, c, d, e, f, g, h } +sonic_force_inline uint16x8_t UtoaNeon(uint32_t num) { + uint16_t hi = num % 10000; // {efgh} + uint16_t lo = num / 10000; // {abcd} + + // v10 = {a, ab, abc, abcd, e, ef, efg, efgh} + uint16x8_t v10 = vuzp2q_u16(Utoa_4_helper(lo), Utoa_4_helper(hi)); + + // v12 = {0, a0, ab0, abc0, 0, e0, ef0, efg0} + uint16x8_t v11 = vmulq_u16(v10, vdupq_n_u16(10)); + uint16x8_t v12 = + vreinterpretq_u16_u64(vshlq_n_u64(vreinterpretq_u64_u16(v11), 16)); + + // v13 = {a, b, c, d, e, f, g, h} + uint16x8_t v13 = vsubq_u16(v10, v12); + return v13; +} + +static sonic_force_inline char *Utoa_8(uint32_t val, char *out) { + /* convert to digits */ + uint16x8_t v0 = UtoaNeon(val); + uint16x8_t v1 = vdupq_n_u16(0); + + /* convert to bytes, add '0' */ + uint8x16_t v2 = vcombine_u8(vqmovun_s16(vreinterpretq_s16_u16(v0)), + vqmovun_s16(vreinterpretq_s16_u16(v1))); + uint8x16_t v3 = vaddq_u8(v2, vdupq_n_u8('0')); + + /* store high 64 bits */ + vst1q_u8((uint8_t *)(out), v3); + return out + 8; +} + +static sonic_force_inline char *Utoa_16(uint64_t val, char *out) { + /* remaining digits */ + uint16x8_t v0 = UtoaNeon((uint32_t)(val / 100000000)); + uint16x8_t v1 = UtoaNeon((uint32_t)(val % 100000000)); + /* convert to bytes, add '0' */ + uint8x16_t v2 = vcombine_u8(vqmovun_s16(vreinterpretq_s16_u16(v0)), + vqmovun_s16(vreinterpretq_s16_u16(v1))); + uint8x16_t v3 = vaddq_u8(v2, vdupq_n_u8('0')); + + vst1q_u8((uint8_t *)(out), v3); + return out + 16; +} + +} // namespace neon +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/neon/quote.h b/include/sonic/internal/arch/neon/quote.h new file mode 100644 index 00000000..2571ef83 --- /dev/null +++ b/include/sonic/internal/arch/neon/quote.h @@ -0,0 +1,199 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include +#include + +#include "../common/quote_common.h" +#include "../common/quote_tables.h" +#include "base.h" +#include "unicode.h" + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#ifndef VEC_LEN +#error "You should define VEC_LEN before including quote.h!" +#endif + +namespace sonic_json { +namespace internal { +namespace neon { + +sonic_force_inline size_t parseStringInplace(uint8_t *&src, SonicError &err) { +#define SONIC_REPEAT8(v) {v v v v v v v v} + + uint8_t *dst = src; + uint8_t *sdst = src; + while (1) { + find: + auto block = StringBlock::Find(src); + if (block.HasQuoteFirst()) { + int idx = block.QuoteIndex(); + src += idx; + *src++ = '\0'; + return src - sdst - 1; + } + if (block.HasUnescaped()) { + err = kParseErrorUnEscaped; + return 0; + } + if (!block.HasBackslash()) { + src += VEC_LEN; + goto find; + } + + /* find out where the backspace is */ + auto bs_dist = block.BsIndex(); + src += bs_dist; + dst = src; + cont: + uint8_t escape_char = src[1]; + if (sonic_unlikely(escape_char == 'u')) { + if (!handle_unicode_codepoint(const_cast(&src), &dst)) { + err = kParseErrorEscapedUnicode; + return 0; + } + } else { + *dst = kEscapedMap[escape_char]; + if (sonic_unlikely(*dst == 0u)) { + err = kParseErrorEscapedFormat; + return 0; + } + src += 2; + dst += 1; + } + // fast path for continous escaped chars + if (*src == '\\') { + bs_dist = 0; + goto cont; + } + + find_and_move: + // Copy the next n bytes, and find the backslash and quote in them. + uint8x16_t v = vld1q_u8(src); + block = StringBlock::Find(v); + // If the next thing is the end quote, copy and return + if (block.HasQuoteFirst()) { + // we encountered quotes first. Move dst to point to quotes and exit + while (1) { + SONIC_REPEAT8(if (sonic_unlikely(*src == '"')) break; + else { *dst++ = *src++; }); + } + *dst = '\0'; + src++; + return dst - sdst; + } + if (block.HasUnescaped()) { + err = kParseErrorUnEscaped; + return 0; + } + if (!block.HasBackslash()) { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + vst1q_u8(dst, v); + src += VEC_LEN; + dst += VEC_LEN; + goto find_and_move; + } + while (1) { + SONIC_REPEAT8(if (sonic_unlikely(*src == '\\')) break; + else { *dst++ = *src++; }); + } + goto cont; + } + sonic_assert(false); +#undef SONIC_REPEAT8 +} + +static sonic_force_inline uint64_t CopyAndGetEscapMask128(const char *src, + char *dst) { + uint8x16_t v = vld1q_u8(reinterpret_cast(src)); + vst1q_u8(reinterpret_cast(dst), v); + + uint8x16_t m1 = vceqq_u8(v, vdupq_n_u8('\\')); + uint8x16_t m2 = vceqq_u8(v, vdupq_n_u8('"')); + uint8x16_t m3 = vcltq_u8(v, vdupq_n_u8('\x20')); + + uint8x16_t m4 = vorrq_u8(m1, m2); + uint8x16_t m5 = vorrq_u8(m3, m4); + + return to_bitmask(m5); +} + +sonic_static_inline char *Quote(const char *src, size_t nb, char *dst) { + *dst++ = '"'; + sonic_assert(nb < (1ULL << 32)); + uint64_t mm; + int cn; + + /* VEC_LEN byte loop */ + while (nb >= VEC_LEN) { + /* check for matches */ + // TODO: optimize: exploit the simd bitmask in the escape block. + if ((mm = CopyAndGetEscapMask128(src, dst)) != 0) { + // cn = __builtin_ctz(mm); + cn = TrailingZeroes(mm) >> 2; + MOVE_N_CHARS(src, cn); + DoEscape(src, dst, nb); + } else { + /* move to next block */ + MOVE_N_CHARS(src, VEC_LEN); + } + } + + if (nb > 0) { + char tmp_src[64]; + const char *src_r; +#ifdef SONIC_USE_SANITIZE + if (0) { +#else + /* This code would cause address sanitizer report heap-buffer-overflow. */ + if (((size_t)(src) & (PAGE_SIZE - 1)) <= (PAGE_SIZE - 64)) { + src_r = src; +#endif + } else { + std::memcpy(tmp_src, src, nb); + src_r = tmp_src; + } + while (nb > 0) { + mm = CopyAndGetEscapMask128(src_r, dst) & + (0xFFFFFFFFFFFFFFFF >> ((VEC_LEN - nb) << 2)); + if (mm) { + cn = TrailingZeroes(mm) >> 2; + MOVE_N_CHARS(src_r, cn); + DoEscape(src_r, dst, nb); + } else { + dst += nb; + nb = 0; + } + } + } + + *dst++ = '"'; + return dst; +} + +} // namespace neon +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/neon/simd.h b/include/sonic/internal/arch/neon/simd.h new file mode 100644 index 00000000..60d08bd0 --- /dev/null +++ b/include/sonic/internal/arch/neon/simd.h @@ -0,0 +1,38 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "sonic/macro.h" + +namespace sonic_json { +namespace internal { +namespace neon { + +#ifndef VEC_LEN +#define VEC_LEN 16 +#endif + +sonic_force_inline uint64_t to_bitmask(uint8x16_t v) { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(v), 4)), 0); +} + +} // namespace neon +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/neon/skip.h b/include/sonic/internal/arch/neon/skip.h new file mode 100644 index 00000000..f8bbc913 --- /dev/null +++ b/include/sonic/internal/arch/neon/skip.h @@ -0,0 +1,416 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../common/skip_common.h" +#include "base.h" +#include "quote.h" +#include "simd.h" +#include "sonic/dom/json_pointer.h" +#include "sonic/error.h" +#include "sonic/internal/utils.h" +#include "sonic/macro.h" +#include "unicode.h" + +namespace sonic_json { +namespace internal { +namespace neon { + +using sonic_json::internal::common::EqBytes4; +using sonic_json::internal::common::SkipLiteral; + +#ifndef VEC_LEN +#error "Define vector length firstly!" +#endif + +// GetNextToken find the next characters in tokens and update the position to +// it. +template +sonic_force_inline uint8_t GetNextToken(const uint8_t *data, size_t &pos, + size_t len, const char (&tokens)[N]) { + while (pos + VEC_LEN <= len) { + uint8x16_t v = vld1q_u8(data + pos); + // simd256 v(data + pos); + // simd256 vor(false); + uint8x16_t vor = vdupq_n_u8(0); + for (size_t i = 0; i < N - 1; i++) { + uint8x16_t vtmp = vceqq_u8(v, vdupq_n_u8((uint8_t)(tokens[i]))); + vor = vorrq_u8(vor, vtmp); + } + + // neon doesn't have instrution same as movemask, to_bitmask uses shrn to + // reduce 128bits -> 64bits. If a 128bits bool vector in x86 can convert + // as 0101, neon shrn will convert it as 0000111100001111. + uint64_t next = to_bitmask(vor); + if (next) { + pos += (TrailingZeroes(next) >> 2); + return data[pos]; + } + pos += VEC_LEN; + } + while (pos < len) { + for (size_t i = 0; i < N - 1; i++) { + if (data[pos] == tokens[i]) { + return tokens[i]; + } + } + pos++; + } + return '\0'; +} + +// pos is the after the ending quote +sonic_force_inline int SkipString(const uint8_t *data, size_t &pos, + size_t len) { + const static int kEscaped = 2; + const static int kNormal = 1; + const static int kUnclosed = 0; + uint64_t quote_bits = 0; + uint64_t bs_bits = 0; + int ret = kNormal; + while (pos + VEC_LEN <= len) { + // const simd::simd256 v(data + pos); + uint8x16_t v = vld1q_u8(data + pos); + bs_bits = to_bitmask(vceqq_u8(v, vdupq_n_u8('\\'))); + quote_bits = to_bitmask(vceqq_u8(v, vdupq_n_u8('"'))); + if (((bs_bits - 1) & quote_bits) != 0) { + pos += (TrailingZeroes(quote_bits) >> 2) + 1; + return ret; + } + if (bs_bits) { + ret = kEscaped; + pos += ((TrailingZeroes(bs_bits) >> 2) + 2); + while (pos < len) { + if (data[pos] == '\\') { + pos += 2; + } else { + break; + } + } + } else { + pos += VEC_LEN; + } + } + while (pos < len) { + if (data[pos] == '\\') { + ret = kEscaped; + pos += 2; + continue; + } + if (data[pos++] == '"') { + break; + } + }; + if (pos >= len) return kUnclosed; + return ret; +} + +// return true if container is closed. +sonic_force_inline bool SkipContainer(const uint8_t *data, size_t &pos, + size_t len, uint8_t left, uint8_t right) { + int rbrace_num = 0, lbrace_num = 0, last_lbrace_num = 0; + while (pos + VEC_LEN <= len) { + const uint8_t *p = data + pos; + last_lbrace_num = lbrace_num; + + uint8x16_t v = vld1q_u8(p); + uint64_t quote_bits = to_bitmask(vceqq_u8(v, vdupq_n_u8('"'))); + uint64_t not_in_str_mask = 0xFFFFFFFFFFFFFFFF; + int quote_idx = VEC_LEN; + if (quote_bits) { + quote_idx = TrailingZeroes(quote_bits); + not_in_str_mask = + quote_idx == 0 ? 0 : not_in_str_mask >> (64 - quote_idx); + quote_idx = (quote_idx >> 2) + 1; // point to next char after '"' + } + uint64_t to_one_mask = 0x8888888888888888ull; + uint64_t rbrace = to_bitmask(vceqq_u8(v, vdupq_n_u8(right))) & to_one_mask & + not_in_str_mask; + uint64_t lbrace = to_bitmask(vceqq_u8(v, vdupq_n_u8(left))) & to_one_mask & + not_in_str_mask; + + /* traverse each `right` */ + while (rbrace > 0) { + rbrace_num++; + lbrace_num = last_lbrace_num + CountOnes((rbrace - 1) & lbrace); + if (lbrace_num < rbrace_num) { /* closed */ + pos += (TrailingZeroes(rbrace) >> 2) + 1; + return true; + } + rbrace &= (rbrace - 1); + } + lbrace_num = last_lbrace_num + CountOnes(lbrace); + pos += quote_idx; + if (quote_bits) { + SkipString(data, pos, len); + } + } + + while (pos < len) { + uint8_t c = data[pos++]; + if (c == left) { + lbrace_num++; + } else if (c == right) { + rbrace_num++; + } else if (c == '"') { + SkipString(data, pos, len); + } /* else { do nothing } */ + + if (lbrace_num < rbrace_num) { /* closed */ + return true; + } + } + /* attach the end of string, but not closed */ + return false; +} + +sonic_force_inline bool SkipArray(const uint8_t *data, size_t &pos, + size_t len) { + return SkipContainer(data, pos, len, '[', ']'); +} + +sonic_force_inline bool SkipObject(const uint8_t *data, size_t &pos, + size_t len) { + return SkipContainer(data, pos, len, '{', '}'); +} + +sonic_force_inline uint8_t SkipNumber(const uint8_t *data, size_t &pos, + size_t len) { + return GetNextToken(data, pos, len, "]},"); +} + +// SkipScanner is used to skip space and json values in json text. +// TODO: optimize by removing bound checking. +class SkipScanner { + public: + sonic_force_inline uint8_t SkipSpace(const uint8_t *data, size_t &pos) { + // fast path for single space + if (!IsSpace(data[pos++])) return data[pos - 1]; + if (!IsSpace(data[pos++])) return data[pos - 1]; + + // current pos is out of block + while (1) { + uint64_t nonspace = GetNonSpaceBits(data + pos); + if (nonspace) { + pos += TrailingZeroes(nonspace) >> 2; + return data[pos++]; + } else { + pos += 16; + } + } + sonic_assert(false && "!should not happen"); + } + + sonic_force_inline uint8_t SkipSpaceSafe(const uint8_t *data, size_t &pos, + size_t len) { + while (pos < len && IsSpace(data[pos++])) + ; + // if not found, still return the space chars + return data[pos - 1]; + } + + sonic_force_inline SonicError GetArrayElem(const uint8_t *data, size_t &pos, + size_t len, int index) { + while (index > 0 && pos < len) { + index--; + char c = SkipSpaceSafe(data, pos, len); + switch (c) { + case '{': { + if (!SkipObject(data, pos, len)) { + return kParseErrorInvalidChar; + } + break; + } + case '[': { + if (!SkipArray(data, pos, len)) { + return kParseErrorInvalidChar; + } + break; + } + case '"': { + if (!SkipString(data, pos, len)) { + return kParseErrorInvalidChar; + } + break; + } + } + // skip space and primitives + // TODO (liuq): fast path for compat json. + if (GetNextToken(data, pos, len, ",]") != ',') { + return kParseErrorArrIndexOutOfRange; + } + pos++; + } + return index == 0 ? kErrorNone : kParseErrorInvalidChar; + } + + // SkipOne skip one raw json value and return the start of value, return the + // negtive if errors. + sonic_force_inline long SkipOne(const uint8_t *data, size_t &pos, + size_t len) { + uint8_t c = SkipSpaceSafe(data, pos, len); + size_t start = pos - 1; + long err = -kParseErrorInvalidChar; + + switch (c) { + case '"': { + if (!SkipString(data, pos, len)) return err; + break; + } + case '{': { + if (!SkipObject(data, pos, len)) return err; + break; + } + case '[': { + if (!SkipArray(data, pos, len)) return err; + break; + } + case 't': + case 'n': + case 'f': { + if (!SkipLiteral(data, pos, len, c)) return err; + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': { + SkipNumber(data, pos, len); + break; + } + default: + return err; + } + return start; + } + + // GetOnDemand get the target json field through the path, and update the + // position. + template + long GetOnDemand(StringView json, size_t &pos, + const GenericJsonPointer &path) { + using namespace sonic_json::internal; + size_t i = 0; + const uint8_t *sp; + long sn = 0; + uint8_t c; + StringView key; + int skips; + // TODO: use stack smallvector here. + std::vector kbuf(32); // key buffer for parsed keys + const uint8_t *data = reinterpret_cast(json.data()); + size_t len = json.size(); + SonicError err = kErrorNone; + + query: + if (i++ != path.size()) { + c = SkipSpaceSafe(data, pos, len); + if (path[i - 1].IsStr()) { + if (c != '{') goto err_mismatch_type; + c = GetNextToken(data, pos, len, "\"}"); + if (c != '"') goto err_unknown_key; + key = StringView(path[i - 1].GetStr()); + goto obj_key; + } else { + if (c != '[') goto err_mismatch_type; + err = GetArrayElem(data, pos, len, path[i - 1].GetNum()); + if (err) return -err; + goto query; + } + } + return SkipOne(data, pos, len); + + obj_key: + // advance quote + pos++; + sp = data + pos; + skips = SkipString(data, pos, len); + sn = data + pos - 1 - sp; + if (!skips) goto err_invalid_char; + if (skips == 2) { + // parse escaped key + kbuf.resize(sn + 32); + uint8_t *nsrc = &kbuf[0]; + std::memcpy(nsrc, sp, sn); + sn = parseStringInplace(nsrc, err); + if (err) { + pos = (sp - data) + (nsrc - &kbuf[0]); + return err; + } + sp = &kbuf[0]; + } + + c = SkipSpaceSafe(data, pos, len); + if (c != ':') { + goto err_invalid_char; + } + // match key and skip parsing unneeded fields + if (sn == static_cast(key.size()) && + std::memcmp(sp, key.data(), sn) == 0) { + goto query; + } else { + c = SkipSpaceSafe(data, pos, len); + switch (c) { + case '{': { + if (!SkipObject(data, pos, len)) { + goto err_invalid_char; + } + break; + } + case '[': { + if (!SkipArray(data, pos, len)) { + goto err_invalid_char; + } + break; + } + case '"': { + if (!SkipString(data, pos, len)) { + goto err_invalid_char; + } + break; + } + } + // skip space and , find next " or } + c = GetNextToken(data, pos, len, "\"}"); + if (c != '"') { + goto err_unknown_key; + } + goto obj_key; + } + + err_mismatch_type: + pos -= 1; + return -kParseErrorMismatchType; + err_unknown_key: + pos -= 1; + return -kParseErrorUnknownObjKey; + err_invalid_char: + pos -= 1; + return -kParseErrorInvalidChar; + } +}; + +} // namespace neon +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/neon/str2int.h b/include/sonic/internal/arch/neon/str2int.h new file mode 100644 index 00000000..9607070a --- /dev/null +++ b/include/sonic/internal/arch/neon/str2int.h @@ -0,0 +1,38 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace sonic_json { +namespace internal { +namespace neon { + +sonic_force_inline uint64_t simd_str2int(const char* c, int& man_nd) { + uint64_t sum = 0; + int i = 0; + while (c[i] >= '0' && c[i] <= '9' && i < man_nd) { + sum = sum * 10 + (c[i] - '0'); + i++; + } + man_nd = i; + return sum; +} + +} // namespace neon +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/neon/unicode.h b/include/sonic/internal/arch/neon/unicode.h new file mode 100644 index 00000000..2150674c --- /dev/null +++ b/include/sonic/internal/arch/neon/unicode.h @@ -0,0 +1,113 @@ +// Copyright 2018-2019 The simdjson authors + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file may have been modified by ByteDance authors. All ByteDance +// Modifications are Copyright 2022 ByteDance Authors. + +#pragma once + +#include + +#include +#include + +#include "../common/unicode_common.h" +#include "base.h" +#include "simd.h" + +namespace sonic_json { +namespace internal { +namespace neon { + +using sonic_json::internal::common::handle_unicode_codepoint; + +struct StringBlock { + public: + sonic_force_inline static StringBlock Find(const uint8_t *src); + sonic_force_inline static StringBlock Find(uint8x16_t &v); + sonic_force_inline bool HasQuoteFirst() const { + return (((bs_bits - 1) & quote_bits) != 0) && !HasUnescaped(); + } + sonic_force_inline bool HasBackslash() const { + return ((quote_bits - 1) & bs_bits) != 0; + } + sonic_force_inline bool HasUnescaped() const { + return ((quote_bits - 1) & unescaped_bits) != 0; + } + sonic_force_inline int QuoteIndex() const { + // return TrailingZeroes(quote_bits); + return TrailingZeroes(quote_bits) >> 2; + } + sonic_force_inline int BsIndex() const { + // return TrailingZeroes(bs_bits); + return TrailingZeroes(bs_bits) >> 2; + } + sonic_force_inline int UnescapedIndex() const { + // return TrailingZeroes(unescaped_bits); + return TrailingZeroes(unescaped_bits) >> 2; + } + + uint64_t bs_bits; + uint64_t quote_bits; + uint64_t unescaped_bits; +}; + +sonic_force_inline StringBlock StringBlock::Find(const uint8_t *src) { + uint8x16_t v = vld1q_u8(src); + return { + to_bitmask(vceqq_u8(v, vdupq_n_u8('\\'))), + to_bitmask(vceqq_u8(v, vdupq_n_u8('"'))), + to_bitmask(vcleq_u8(v, vdupq_n_u8('\x1f'))), + }; +} + +sonic_force_inline StringBlock StringBlock::Find(uint8x16_t &v) { + return { + to_bitmask(vceqq_u8(v, vdupq_n_u8('\\'))), + to_bitmask(vceqq_u8(v, vdupq_n_u8('"'))), + to_bitmask(vcleq_u8(v, vdupq_n_u8('\x1f'))), + }; +} + +sonic_force_inline uint64_t GetNonSpaceBits(const uint8_t *data) { + uint8x16_t v = vld1q_u8(data); + uint8x16_t m1 = vceqq_u8(v, vdupq_n_u8(' ')); + uint8x16_t m2 = vceqq_u8(v, vdupq_n_u8('\t')); + uint8x16_t m3 = vceqq_u8(v, vdupq_n_u8('\n')); + uint8x16_t m4 = vceqq_u8(v, vdupq_n_u8('\r')); + + uint8x16_t m5 = vorrq_u8(m1, m2); + uint8x16_t m6 = vorrq_u8(m3, m4); + uint8x16_t m7 = vorrq_u8(m5, m6); + uint8x16_t m8 = vmvnq_u8(m7); + + return to_bitmask(m8); +} + +sonic_force_inline uint64_t GetEscapedBranchless(uint64_t &prev_escaped, + uint64_t backslash) { + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = AddOverflow(odd_sequence_starts, backslash, + &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; + return (even_bits ^ invert_mask) & follows_escape; +} + +} // namespace neon +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/simd_base.h b/include/sonic/internal/arch/simd_base.h new file mode 100644 index 00000000..438c435e --- /dev/null +++ b/include/sonic/internal/arch/simd_base.h @@ -0,0 +1,21 @@ +#pragma once + +#include "simd_dispatch.h" + +#ifdef SONIC_STATIC_DISPATCH +#include INCLUDE_ARCH_FILE(base.h) +#endif + +namespace sonic_json { +namespace internal { + +SONIC_USING_ARCH_FUNC(TrailingZeroes); +SONIC_USING_ARCH_FUNC(ClearLowestBit); +SONIC_USING_ARCH_FUNC(LeadingZeroes); +SONIC_USING_ARCH_FUNC(CountOnes); +SONIC_USING_ARCH_FUNC(AddOverflow); +SONIC_USING_ARCH_FUNC(PrefixXor); +SONIC_USING_ARCH_FUNC(Xmemcpy); + +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/simd_dispatch.h b/include/sonic/internal/arch/simd_dispatch.h new file mode 100644 index 00000000..5709e6e4 --- /dev/null +++ b/include/sonic/internal/arch/simd_dispatch.h @@ -0,0 +1,33 @@ +#pragma once +#include "sonic_cpu_feature.h" + +#ifndef SONIC_DYNAMIC_DISPATCH +#define SONIC_STATIC_DISPATCH +#endif + +#ifndef SONIC_STRINGIFY +#define SONIC_STRINGIFY(s) SONIC_STRINGIFY2(s) +#define SONIC_STRINGIFY2(s) #s +#endif + +#if defined(SONIC_STATIC_DISPATCH) + +// clang-format off +#if defined(SONIC_HAVE_AVX2) +#define SONIC_USING_ARCH_FUNC(func) using avx2::func +#define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(avx2/file) +#elif defined(SONIC_HAVE_SSE) +#define SONIC_USING_ARCH_FUNC(func) using sse::func +#define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(sse/file) +#endif + +#if defined(SONIC_HAVE_SVE) +#define SONIC_USING_ARCH_FUNC(func) using sve::func +#define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(sve/file) +#elif defined(SONIC_HAVE_NEON) +#define SONIC_USING_ARCH_FUNC(func) using neon::func +#define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(neon/file) +#endif +// clang-format on + +#endif diff --git a/include/sonic/internal/arch/simd_func.h b/include/sonic/internal/arch/simd_func.h new file mode 100644 index 00000000..e69de29b diff --git a/include/sonic/internal/arch/simd_itoa.h b/include/sonic/internal/arch/simd_itoa.h new file mode 100644 index 00000000..7acac89a --- /dev/null +++ b/include/sonic/internal/arch/simd_itoa.h @@ -0,0 +1,16 @@ +#pragma once + +#include "simd_dispatch.h" + +#ifdef SONIC_STATIC_DISPATCH +#include INCLUDE_ARCH_FILE(itoa.h) +#endif + +namespace sonic_json { +namespace internal { + +SONIC_USING_ARCH_FUNC(Utoa_8); +SONIC_USING_ARCH_FUNC(Utoa_16); + +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/simd_quote.h b/include/sonic/internal/arch/simd_quote.h new file mode 100644 index 00000000..90864650 --- /dev/null +++ b/include/sonic/internal/arch/simd_quote.h @@ -0,0 +1,16 @@ +#pragma once + +#include "simd_dispatch.h" + +#ifdef SONIC_STATIC_DISPATCH +#include INCLUDE_ARCH_FILE(quote.h) +#endif + +namespace sonic_json { +namespace internal { + +SONIC_USING_ARCH_FUNC(parseStringInplace); +SONIC_USING_ARCH_FUNC(Quote); + +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/simd_skip.h b/include/sonic/internal/arch/simd_skip.h new file mode 100644 index 00000000..7e3df8f9 --- /dev/null +++ b/include/sonic/internal/arch/simd_skip.h @@ -0,0 +1,37 @@ +#pragma once + +#include "simd_dispatch.h" + +#ifdef SONIC_STATIC_DISPATCH +#include INCLUDE_ARCH_FILE(skip.h) +#endif + +namespace sonic_json { +namespace internal { + +SONIC_USING_ARCH_FUNC(EqBytes4); +SONIC_USING_ARCH_FUNC(SkipString); +SONIC_USING_ARCH_FUNC(SkipContainer); +SONIC_USING_ARCH_FUNC(SkipArray); +SONIC_USING_ARCH_FUNC(SkipObject); +SONIC_USING_ARCH_FUNC(SkipLiteral); +SONIC_USING_ARCH_FUNC(SkipNumber); +SONIC_USING_ARCH_FUNC(SkipScanner); + +template +ParseResult GetOnDemand(StringView json, + const GenericJsonPointer &path, + StringView &target) { + SkipScanner scan; + size_t pos = 0; + long start = scan.GetOnDemand(json, pos, path); + if (start < 0) { + target = ""; + return ParseResult(SonicError(-start), pos - 1); + } + target = StringView(json.data() + start, pos - start); + return ParseResult(kErrorNone, pos); +} + +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/simd_str2int.h b/include/sonic/internal/arch/simd_str2int.h new file mode 100644 index 00000000..50d0b300 --- /dev/null +++ b/include/sonic/internal/arch/simd_str2int.h @@ -0,0 +1,15 @@ +#pragma once + +#include "simd_dispatch.h" + +#ifdef SONIC_STATIC_DISPATCH +#include INCLUDE_ARCH_FILE(str2int.h) +#endif + +namespace sonic_json { +namespace internal { + +SONIC_USING_ARCH_FUNC(simd_str2int); + +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/sonic_cpu_feature.h b/include/sonic/internal/arch/sonic_cpu_feature.h new file mode 100644 index 00000000..5c1577a2 --- /dev/null +++ b/include/sonic/internal/arch/sonic_cpu_feature.h @@ -0,0 +1,28 @@ +#pragma once + +#if defined(__SSE2__) +#define SONIC_HAVE_SSE +#define SONIC_HAVE_SSE2 +#if defined(__SSE3__) +#define SONIC_HAVE_SSE3 +#endif +#if defined(__SSSE3__) +#define SONIC_HAVE_SSSE3 +#endif +#if defined(__SSE4_1__) +#define SONIC_HAVE_SSE4_1 +#endif +#if defined(__SSE4_2__) +#define SONIC_HAVE_SSE4_2 +#endif +#if defined(__AVX__) +#define SONIC_HAVE_AVX +#endif +#if defined(__AVX2__) +#define SONIC_HAVE_AVX2 +#endif +#elif defined(__ARM_NEON) || defined(__ARM_NEON__) +#define SONIC_HAVE_NEON +#elif defined(__ARM_FEATURE_SVE) +#define SONIC_HAVE_NEON +#endif diff --git a/include/sonic/internal/arch/sse/base.h b/include/sonic/internal/arch/sse/base.h new file mode 100644 index 00000000..1b071480 --- /dev/null +++ b/include/sonic/internal/arch/sse/base.h @@ -0,0 +1,108 @@ +// Copyright 2018-2019 The simdjson authors + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file may have been modified by ByteDance authors. All ByteDance +// Modifications are Copyright 2022 ByteDance Authors. + +#pragma once + +#include + +#include "simd.h" + +namespace sonic_json { +namespace internal { +namespace sse { + +using namespace simd; + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. + +sonic_force_inline int TrailingZeroes(uint64_t input_num) { + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +sonic_force_inline uint64_t ClearLowestBit(uint64_t input_num) { +#if __BMI__ + return _blsr_u64(input_num); +#else + return input_num & (input_num - 1); +#endif +} + +/* result might be undefined when input_num is zero */ +sonic_force_inline int LeadingZeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +sonic_force_inline long long int CountOnes(uint64_t input_num) { + return __builtin_popcountll(input_num); +} + +sonic_force_inline bool AddOverflow(uint64_t value1, uint64_t value2, + uint64_t* result) { + return __builtin_uaddll_overflow( + value1, value2, reinterpret_cast(result)); +} + +sonic_force_inline uint64_t PrefixXor(const uint64_t bitmask) { +#if __PCLMUL__ + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = + _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +#else +#error "PCLMUL instruction set required. Missing option -mpclmul ?" + return 0; +#endif +} + +sonic_force_inline bool IsAscii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +template +sonic_force_inline void Xmemcpy(void* dst_, const void* src_, size_t chunks) { + std::memcpy(dst_, src_, chunks * ChunkSize); +} + +template <> +sonic_force_inline void Xmemcpy<16>(void* dst_, const void* src_, + size_t chunks) { + uint8_t* dst = reinterpret_cast(dst_); + const uint8_t* src = reinterpret_cast(src_); + for (size_t i = 0; i < chunks; i++) { + simd128 s(src); + s.store(dst); + src += 16, dst += 16; + } +} + +template <> +sonic_force_inline void Xmemcpy<32>(void* dst_, const void* src_, + size_t chunks) { + Xmemcpy<16>(dst_, src_, chunks * 2); +} + +} // namespace sse +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/sse/itoa.h b/include/sonic/internal/arch/sse/itoa.h new file mode 100644 index 00000000..bf55c9b8 --- /dev/null +++ b/include/sonic/internal/arch/sse/itoa.h @@ -0,0 +1,31 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../common/x86_common/itoa.h" + +namespace sonic_json { +namespace internal { +namespace sse { + +using sonic_json::internal::x86_common::Utoa_16; +using sonic_json::internal::x86_common::Utoa_8; +using sonic_json::internal::x86_common::UtoaSSE; + +} // namespace sse +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/sse/quote.h b/include/sonic/internal/arch/sse/quote.h new file mode 100644 index 00000000..64980631 --- /dev/null +++ b/include/sonic/internal/arch/sse/quote.h @@ -0,0 +1,63 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "../common/quote_common.h" +#include "../common/quote_tables.h" +#include "base.h" +#include "simd.h" +#include "unicode.h" + +#ifndef VEC_FULL_MASK +#define VEC_FULL_MASK 0xFFFF +#endif + +namespace sonic_json { +namespace internal { +namespace x86_common { + +using StringBlock = sse::StringBlock; +using VecType = simd::simd128; + +static sonic_force_inline int CopyAndGetEscapMask(const char *src, char *dst) { + simd::simd128 v(reinterpret_cast(src)); + v.store(reinterpret_cast(dst)); + return ((v < '\x20') | (v == '\\') | (v == '"')).to_bitmask(); +} + +} // namespace x86_common +} // namespace internal +} // namespace sonic_json + +#include "../common/x86_common/quote.h" + +namespace sonic_json { +namespace internal { +namespace sse { + +using sonic_json::internal::x86_common::parseStringInplace; +using sonic_json::internal::x86_common::Quote; + +} // namespace sse +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/sse/simd.h b/include/sonic/internal/arch/sse/simd.h new file mode 100644 index 00000000..03e91f5c --- /dev/null +++ b/include/sonic/internal/arch/sse/simd.h @@ -0,0 +1,387 @@ +// Copyright 2018-2019 The simdjson authors + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file may have been modified by ByteDance authors. All ByteDance +// Modifications are Copyright 2022 ByteDance Authors. + +#pragma once + +#include +#include + +#include + +#if !__SSE__ +#error "SSE instruction set required. Missing option -msse ?" +#endif + +#ifndef VEC_LEN +#define VEC_LEN 16 +#endif + +namespace sonic_json { +namespace internal { +namespace simd { + +#define REPEAT16_ARGS(typ) \ + typ v0, typ v1, typ v2, typ v3, typ v4, typ v5, typ v6, typ v7, typ v8, \ + typ v9, typ v10, typ v11, typ v12, typ v13, typ v14, typ v15 + +template +struct simd128; + +template > +struct base128 { + public: + using Child = simd128; + __m128i value; + sonic_force_inline base128() : value{__m128i()} {} + sonic_force_inline base128(const __m128i _value) : value(_value) {} + sonic_force_inline base128(const T _value) : value(splat(_value)) {} + sonic_force_inline base128(const T values[16]) : value(load(values)) {} + sonic_force_inline base128(REPEAT16_ARGS(T)) + : value(_mm_setr_epi8(REPEAT16_ARGS())) {} + + // Conversion to SIMD register + sonic_force_inline operator const __m128i&() const { return this->value; } + sonic_force_inline operator __m128i&() { return this->value; } + + // Bit operations + sonic_force_inline Child operator|(const Child other) const { + return _mm_or_si128(*this, other); + } + sonic_force_inline Child operator&(const Child other) const { + return _mm_and_si128(*this, other); + } + sonic_force_inline Child operator^(const Child other) const { + return _mm_xor_si128(*this, other); + } + sonic_force_inline Child bit_andnot(const Child other) const { + return _mm_andnot_si128(other, *this); + } + sonic_force_inline Child operator~() const { + return *this ^ _mm_set1_epi8(uint8_t(0xFFu)); + } + sonic_force_inline Child& operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + sonic_force_inline Child& operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + sonic_force_inline Child& operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } + + // Compare operations + friend sonic_force_inline Mask operator==(const Child lhs, const Child rhs) { + return _mm_cmpeq_epi8(lhs, rhs); + } + + // Memory Operations + sonic_force_inline void store(T dst[16]) const { + return _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), *this); + } + static sonic_force_inline Child load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + static sonic_force_inline Child splat(T _value) { + return Child(_mm_set1_epi8(_value)); + } + static sonic_force_inline Child repeat_16(REPEAT16_ARGS(T)) { + return Child(REPEAT16_ARGS()); + } + template + sonic_force_inline Child prev(const Child prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> +struct simd128 : base128 { + sonic_force_inline simd128() : base128() {} + sonic_force_inline simd128(const __m128i _value) + : base128(_value) {} + // Splat constructor + sonic_force_inline simd128(bool _value) + : base128(splat(_value)) {} + sonic_force_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + // Override splat bool + static sonic_force_inline simd128 splat(bool _value) { + return _mm_set1_epi8(uint8_t(-(!!_value))); + } +}; + +template +struct num128 : base128 { + using Base = base128; + using Base::Base; + static sonic_force_inline simd128 zero() { return _mm_setzero_si128(); } + + // Addition/subtraction are the same for signed and unsigned + sonic_force_inline simd128 operator+(const simd128 other) const { + return _mm_add_epi8(*this, other); + } + sonic_force_inline simd128 operator-(const simd128 other) const { + return _mm_sub_epi8(*this, other); + } + sonic_force_inline simd128& operator+=(const simd128 other) { + *this = *this + other; + return *static_cast*>(this); + } + sonic_force_inline simd128& operator-=(const simd128 other) { + *this = *this - other; + return *static_cast*>(this); + } +}; + +// Signed bytes +template <> +struct simd128 : num128 { + using Base = num128; + using Base::Base; + // Order-sensitive comparisons + sonic_force_inline simd128 max_val( + const simd128 other) const { + return _mm_max_epi8(*this, other); + } + sonic_force_inline simd128 min_val( + const simd128 other) const { + return _mm_min_epi8(*this, other); + } + sonic_force_inline simd128 operator>( + const simd128 other) const { + return _mm_cmpgt_epi8(*this, other); + } + sonic_force_inline simd128 operator<( + const simd128 other) const { + return _mm_cmpgt_epi8(other, *this); + } +}; + +// Unsigned bytes +template <> +struct simd128 : num128 { + using Base = num128; + using Base::Base; + + // Saturated math + sonic_force_inline simd128 saturating_add( + const simd128 other) const { + return _mm_adds_epu8(*this, other); + } + sonic_force_inline simd128 saturating_sub( + const simd128 other) const { + return _mm_subs_epu8(*this, other); + } + + // Order-specific operations + sonic_force_inline simd128 max_val( + const simd128 other) const { + return _mm_max_epu8(*this, other); + } + sonic_force_inline simd128 min_val( + const simd128 other) const { + return _mm_min_epu8(other, *this); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + sonic_force_inline simd128 gt_bits( + const simd128 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + sonic_force_inline simd128 lt_bits( + const simd128 other) const { + return other.saturating_sub(*this); + } + sonic_force_inline simd128 operator<=( + const simd128 other) const { + return other.max_val(*this) == other; + } + sonic_force_inline simd128 operator>=( + const simd128 other) const { + return other.min_val(*this) == other; + } + sonic_force_inline simd128 operator>( + const simd128 other) const { + return this->gt_bits(other).any_bits_set(); + } + sonic_force_inline simd128 operator<( + const simd128 other) const { + return this->lt_bits(other).any_bits_set(); + } + + // Bit-specific operations + sonic_force_inline simd128 bits_not_set() const { + return *this == uint8_t(0); + } + sonic_force_inline simd128 bits_not_set(simd128 bits) const { + return (*this & bits).bits_not_set(); + } + sonic_force_inline simd128 any_bits_set() const { + return ~this->bits_not_set(); + } + sonic_force_inline simd128 any_bits_set(simd128 bits) const { + return ~this->bits_not_set(bits); + } + sonic_force_inline bool is_ascii() const { + return _mm_movemask_epi8(*this) == 0; + } + sonic_force_inline bool bits_not_set_anywhere() const { + return _mm_testz_si128(*this, *this); + } + sonic_force_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + sonic_force_inline bool bits_not_set_anywhere(simd128 bits) const { + return _mm_testz_si128(*this, bits); + } + sonic_force_inline bool any_bits_set_anywhere(simd128 bits) const { + return !bits_not_set_anywhere(bits); + } + template + sonic_force_inline simd128 shr() const { + return simd128(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); + } + template + sonic_force_inline simd128 shl() const { + return simd128(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); + } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + sonic_force_inline int get_bit() const { + return _mm_movemask_epi8(_mm_slli_epi16(*this, 7 - N)); + } +}; + +template +struct simd8x16 { + static constexpr int NUM_CHUNKS = 16 / sizeof(simd128); + static_assert( + NUM_CHUNKS == 1, + "Haswell kernel should use one sse registers per 16-byte block."); + const simd128 chunks[NUM_CHUNKS]; + + simd8x16(const simd8x16& o) = delete; // no copy allowed + simd8x16& operator=(const simd128& other) = + delete; // no assignment allowed + simd8x16() = delete; // no default constructor allowed + + sonic_force_inline simd8x16(const simd128 chunk0) : chunks{chunk0} {} + sonic_force_inline simd8x16(const T ptr[16]) + : chunks{simd128::load(ptr)} {} + + sonic_force_inline void store(T ptr[16]) const { + this->chunks[0].store(ptr + sizeof(simd128) * 0); + } + + sonic_force_inline uint64_t to_bitmask() const { + return this->chunks[0].to_bitmask(); + } + + sonic_force_inline simd128 reduce_or() const { return this->chunks[0]; } + + sonic_force_inline simd8x16 bit_or(const T m) const { + const simd128 mask = simd128::splat(m); + return simd8x16(this->chunks[0] | mask); + } + + sonic_force_inline uint64_t eq(const T m) const { + const simd128 mask = simd128::splat(m); + return simd8x16(this->chunks[0] == mask).to_bitmask(); + } + + sonic_force_inline uint64_t eq(const simd8x16& other) const { + return simd8x16(this->chunks[0] == other.chunks[0]).to_bitmask(); + } + + sonic_force_inline uint64_t lteq(const T m) const { + const simd128 mask = simd128::splat(m); + return simd8x16(this->chunks[0] <= mask).to_bitmask(); + } +}; // struct simd8x16 + +template +struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd128); + static_assert(NUM_CHUNKS == 4, + "Westmere kernel should use four registers per 64-byte block."); + const simd128 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd128& other) = + delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + sonic_force_inline simd8x64(const simd128 chunk0, const simd128 chunk1, + const simd128 chunk2, const simd128 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + sonic_force_inline simd8x64(const T ptr[64]) + : chunks{simd128::load(ptr), simd128::load(ptr + 16), + simd128::load(ptr + 32), simd128::load(ptr + 48)} {} + + sonic_force_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd128) * 0); + this->chunks[1].store(ptr + sizeof(simd128) * 1); + this->chunks[2].store(ptr + sizeof(simd128) * 2); + this->chunks[3].store(ptr + sizeof(simd128) * 3); + } + + sonic_force_inline simd128 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + sonic_force_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + sonic_force_inline uint64_t eq(const T m) const { + const simd128 mask = simd128::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + sonic_force_inline uint64_t eq(const simd8x64& other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + sonic_force_inline uint64_t lteq(const T m) const { + const simd128 mask = simd128::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +#undef REPEAT16_ARGS +} // namespace simd +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/sse/skip.h b/include/sonic/internal/arch/sse/skip.h new file mode 100644 index 00000000..3860bc2f --- /dev/null +++ b/include/sonic/internal/arch/sse/skip.h @@ -0,0 +1,62 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "base.h" +#include "quote.h" +#include "simd.h" +#include "unicode.h" + +namespace sonic_json { +namespace internal { +namespace x86_common { + +using sse::GetEscapedBranchless; +using sse::GetNonSpaceBits; +using VecUint8Type = simd::simd128; +using VecBoolType = simd::simd128; + +} // namespace x86_common +} // namespace internal +} // namespace sonic_json + +#include "../common/x86_common/skip.h" + +namespace sonic_json { +namespace internal { +namespace sse { + +using sonic_json::internal::common::EqBytes4; +using sonic_json::internal::common::SkipLiteral; +using sonic_json::internal::x86_common::GetNextToken; +using sonic_json::internal::x86_common::GetStringBits; +using sonic_json::internal::x86_common::SkipArray; +using sonic_json::internal::x86_common::SkipContainer; +using sonic_json::internal::x86_common::SkipNumber; +using sonic_json::internal::x86_common::SkipObject; +using sonic_json::internal::x86_common::SkipScanner; +using sonic_json::internal::x86_common::SkipString; + +} // namespace sse +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/arch/sse/str2int.h b/include/sonic/internal/arch/sse/str2int.h new file mode 100644 index 00000000..a65b51d1 --- /dev/null +++ b/include/sonic/internal/arch/sse/str2int.h @@ -0,0 +1,36 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace sonic_json { + +namespace internal { + +namespace sse { + +uint64_t simd_str2int(const char* c, int& mand_nd); + +} // namespace sse +} // namespace internal +} // namespace sonic_json + +#define SIMD_INLINE sonic_force_inline +#include "str2int_impl.h" diff --git a/include/sonic/internal/simd_str2int.h b/include/sonic/internal/arch/sse/str2int_impl.h similarity index 92% rename from include/sonic/internal/simd_str2int.h rename to include/sonic/internal/arch/sse/str2int_impl.h index 522cde39..a5ce7123 100644 --- a/include/sonic/internal/simd_str2int.h +++ b/include/sonic/internal/arch/sse/str2int_impl.h @@ -18,22 +18,35 @@ #include +#include + namespace sonic_json { namespace internal { +namespace sse { + +#ifdef DATA_MADDUBS +#undef DATA_MADDUBS +#endif #define DATA_MADDUBS() \ { \ __m128i q = _mm_set1_epi64x(0x010A010A010A010A); \ data = _mm_maddubs_epi16(data, q); \ } +#ifdef DATA_MADD +#undef DATA_MADD +#endif #define DATA_MADD() \ { \ __m128i q = _mm_set1_epi64x(0x0001006400010064); \ data = _mm_madd_epi16(data, q); \ } +#ifdef DATA_PACK_AND_MADD +#undef DATA_PACK_AND_MADD +#endif #define DATA_PACK_AND_MADD() \ { \ data = _mm_packus_epi32(data, data); \ @@ -41,7 +54,8 @@ namespace internal { data = _mm_madd_epi16(data, q); \ } -sonic_force_inline uint64_t simd_str2int_sse(const char* c, int& man_nd) { +SIMD_INLINE uint64_t simd_str2int(const char* c, int& man_nd) { + // uint64_t simd_str2int(const char* c, int& man_nd) { __m128i data = _mm_loadu_si128((const __m128i*)c); __m128i zero = _mm_setzero_si128(); __m128i nine = _mm_set1_epi8(9); @@ -128,5 +142,6 @@ sonic_force_inline uint64_t simd_str2int_sse(const char* c, int& man_nd) { _mm_extract_epi32(data, 1); } +} // namespace sse } // namespace internal } // namespace sonic_json diff --git a/include/sonic/internal/arch/sse/unicode.h b/include/sonic/internal/arch/sse/unicode.h new file mode 100644 index 00000000..6544c1ac --- /dev/null +++ b/include/sonic/internal/arch/sse/unicode.h @@ -0,0 +1,98 @@ +// Copyright 2018-2019 The simdjson authors + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file may have been modified by ByteDance authors. All ByteDance +// Modifications are Copyright 2022 ByteDance Authors. + +#pragma once + +#include + +#include +#include + +#include "../common/unicode_common.h" +#include "base.h" +#include "simd.h" + +namespace sonic_json { +namespace internal { +namespace sse { + +using namespace simd; +using sonic_json::internal::common::handle_unicode_codepoint; + +struct StringBlock { + public: + sonic_force_inline static StringBlock Find(const uint8_t *src); + sonic_force_inline bool HasQuoteFirst() { + return (((bs_bits - 1) & quote_bits) != 0) && !HasUnescaped(); + } + sonic_force_inline bool HasBackslash() { + return ((quote_bits - 1) & bs_bits) != 0; + } + sonic_force_inline bool HasUnescaped() { + return ((quote_bits - 1) & unescaped_bits) != 0; + } + sonic_force_inline int QuoteIndex() { return TrailingZeroes(quote_bits); } + sonic_force_inline int BsIndex() { return TrailingZeroes(bs_bits); } + sonic_force_inline int UnescapedIndex() { + return TrailingZeroes(unescaped_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; + uint32_t unescaped_bits; +}; + +sonic_force_inline StringBlock StringBlock::Find(const uint8_t *src) { + simd128 v(src); + return { + static_cast((v == '\\').to_bitmask()), + static_cast((v == '"').to_bitmask()), + static_cast((v <= '\x1f').to_bitmask()), + }; +} + +sonic_force_inline uint64_t GetNonSpaceBits(const uint8_t *data) { + const simd::simd8x64 v(data); + const auto whitespace_table = + simd128::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, + '\t', '\n', 112, 100, '\r', 100, 100); + + uint64_t space = v.eq({ + _mm_shuffle_epi8(whitespace_table, v.chunks[0]), + _mm_shuffle_epi8(whitespace_table, v.chunks[1]), + _mm_shuffle_epi8(whitespace_table, v.chunks[2]), + _mm_shuffle_epi8(whitespace_table, v.chunks[3]), + }); + return ~space; +} + +sonic_force_inline uint64_t GetEscapedBranchless(uint64_t &prev_escaped, + uint64_t backslash) { + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = AddOverflow(odd_sequence_starts, backslash, + &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; + return (even_bits ^ invert_mask) & follows_escape; +} + +} // namespace sse +} // namespace internal +} // namespace sonic_json diff --git a/include/sonic/internal/atof_native.h b/include/sonic/internal/atof_native.h index 34f8889b..de8a04f2 100644 --- a/include/sonic/internal/atof_native.h +++ b/include/sonic/internal/atof_native.h @@ -9,7 +9,7 @@ #include -#include "sonic/internal/haswell.h" +#include "sonic/internal/arch/simd_base.h" namespace sonic_json { @@ -803,7 +803,7 @@ static sonic_force_inline bool AtofEiselLemire64(uint64_t mant, int exp10, } /* Calculate the 2-base exponent of float */ - uint32_t clz = haswell::leading_zeroes(mant); + uint32_t clz = LeadingZeroes(mant); mant <<= clz; /* lg10/lg2 ≈ 217706>>16 */ uint64_t ret_exp2 = diff --git a/include/sonic/internal/ftoa.h b/include/sonic/internal/ftoa.h index 1e905873..8edf397c 100644 --- a/include/sonic/internal/ftoa.h +++ b/include/sonic/internal/ftoa.h @@ -1065,4 +1065,4 @@ sonic_static_noinline int F64toa(char* out, double fp) { } // namespace internal -} // namespace sonic_json \ No newline at end of file +} // namespace sonic_json diff --git a/include/sonic/internal/itoa.h b/include/sonic/internal/itoa.h index 646add36..15c7af86 100644 --- a/include/sonic/internal/itoa.h +++ b/include/sonic/internal/itoa.h @@ -16,21 +16,16 @@ #pragma once -#include -#include +#include +#include +#include "sonic/internal/arch/simd_itoa.h" #include "sonic/macro.h" namespace sonic_json { namespace internal { -#define as_m128p(v) ((__m128i *)(v)) -#define as_m128c(v) ((const __m128i *)(v)) -#define as_m256c(v) ((const __m256i *)(v)) -#define as_m128v(v) (*(const __m128i *)(v)) -#define as_uint64v(p) (*(uint64_t *)(p)) - static const char kDigits[202] sonic_align(2) = "00010203040506070809" "10111213141516171819" @@ -43,68 +38,6 @@ static const char kDigits[202] sonic_align(2) = "80818283848586878889" "90919293949596979899"; -static const char kVec16xAsc0[16] sonic_align(16) = { - '0', '0', '0', '0', '0', '0', '0', '0', - '0', '0', '0', '0', '0', '0', '0', '0', -}; - -static const uint16_t kVec8x10[8] sonic_align(16) = { - 10, 10, 10, 10, 10, 10, 10, 10, -}; - -static const uint32_t kVec4x10k[4] sonic_align(16) = { - 10000, - 10000, - 10000, - 10000, -}; - -static const uint32_t kVec4xDiv10k[4] sonic_align(16) = { - 0xd1b71759, - 0xd1b71759, - 0xd1b71759, - 0xd1b71759, -}; - -static const uint16_t kVecDivPowers[8] sonic_align(16) = { - 0x20c5, 0x147b, 0x3334, 0x8000, 0x20c5, 0x147b, 0x3334, 0x8000, -}; - -static const uint16_t kVecShiftPowers[8] sonic_align(16) = { - 0x0080, 0x0800, 0x2000, 0x8000, 0x0080, 0x0800, 0x2000, 0x8000, -}; - -// Convert num's each digit as packed 16-bit in a vector. -// num's digits as abcdefgh (high bits is 0 if not enough) -// The converted vector is { a, b, c, d, e, f, g, h } -sonic_force_inline __m128i UtoaSSE(uint32_t num) { - // num(abcdefgh) -> v04 = vector{abcd, efgh, 0, 0, 0, 0, 0, 0} - __m128i v00 = _mm_cvtsi32_si128(num); - __m128i v01 = _mm_mul_epu32(v00, as_m128v(kVec4xDiv10k)); - __m128i v02 = _mm_srli_epi64(v01, 45); - __m128i v03 = _mm_mul_epu32(v02, as_m128v(kVec4x10k)); - __m128i v04 = _mm_sub_epi32(v00, v03); - __m128i v05 = _mm_unpacklo_epi16(v02, v04); - - // v08 = vector{abcd * 4, abcd * 4, abcd * 4, abcd * 4, efgh * 4, efgh * 4, - // efgh * 4, efgh * 4} - __m128i v06 = _mm_slli_epi64(v05, 2); - __m128i v07 = _mm_unpacklo_epi16(v06, v06); - __m128i v08 = _mm_unpacklo_epi32(v07, v07); - - // v10 = { a, ab, abc, abcd, e, ef, efg, efgh } - __m128i v09 = _mm_mulhi_epu16(v08, as_m128v(kVecDivPowers)); - __m128i v10 = _mm_mulhi_epu16(v09, as_m128v(kVecShiftPowers)); - - // v12 = { 0, a0, ab0, abc0, 0, e0, ef0, efg0 } - __m128i v11 = _mm_mullo_epi16(v10, as_m128v(kVec8x10)); - __m128i v12 = _mm_slli_epi64(v11, 16); - - // v13 = { a, b, c, d, e, f, g, h } - __m128i v13 = _mm_sub_epi16(v10, v12); - return v13; -} - sonic_force_inline void Copy2Digs(char *dst, const char *src) { *(dst) = *(src); *(dst + 1) = *(src + 1); @@ -154,32 +87,6 @@ sonic_force_inline char *Utoa_1_8(char *out, uint32_t val) { } } -static sonic_force_inline char *Utoa_8(uint32_t val, char *out) { - /* convert to digits */ - __m128i v0 = UtoaSSE(val); - __m128i v1 = _mm_setzero_si128(); - - /* convert to bytes, add '0' */ - __m128i v2 = _mm_packus_epi16(v0, v1); - __m128i v3 = _mm_add_epi8(v2, as_m128v(kVec16xAsc0)); - - /* store high 64 bits */ - _mm_storeu_si128(as_m128p(out), v3); - return out + 8; -} - -static sonic_force_inline char *Utoa_16(uint64_t val, char *out) { - /* remaining digits */ - __m128i v0 = UtoaSSE((uint32_t)(val / 100000000)); - __m128i v1 = UtoaSSE((uint32_t)(val % 100000000)); - __m128i v2 = _mm_packus_epi16(v0, v1); - __m128i v3 = _mm_add_epi8(v2, as_m128v(kVec16xAsc0)); - - /* convert to bytes, add '0' */ - _mm_storeu_si128(as_m128p(out), v3); - return out + 16; -} - sonic_force_inline char *U64toa_17_20(char *out, uint64_t val) { uint64_t lo = val % 10000000000000000; uint32_t hi = (uint32_t)(val / 10000000000000000); @@ -222,4 +129,4 @@ sonic_force_inline char *I64toa(char *buf, int64_t val) { } // namespace internal -} // namespace sonic_json \ No newline at end of file +} // namespace sonic_json diff --git a/include/sonic/internal/parse_number_normal_fast.h b/include/sonic/internal/parse_number_normal_fast.h index 9d82bd48..4c91250c 100644 --- a/include/sonic/internal/parse_number_normal_fast.h +++ b/include/sonic/internal/parse_number_normal_fast.h @@ -19,6 +19,8 @@ #include +#include "sonic/internal/arch/simd_base.h" + namespace sonic_json { namespace internal { inline bool ParseFloatingNormalFast(uint64_t& d_raw, int exp10, uint64_t man, @@ -36,7 +38,7 @@ inline bool ParseFloatingNormalFast(uint64_t& d_raw, int exp10, uint64_t man, sig2 = internal::kPow10M128Tab[idx][1]; // TODO: } - lz = internal::haswell::leading_zeroes(man); + lz = internal::LeadingZeroes(man); sig1 = man << lz; exp2 = ((217706 * exp10 - 4128768) >> 16) - lz; diff --git a/include/sonic/internal/quote.h b/include/sonic/internal/quote.h deleted file mode 100644 index 6d317d82..00000000 --- a/include/sonic/internal/quote.h +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright 2022 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -#include "sonic/error.h" -#include "sonic/internal/haswell.h" -#include "sonic/internal/simd.h" -#include "sonic/internal/unicode.h" -#include "sonic/macro.h" - -#ifndef PAGE_SIZE -#define PAGE_SIZE 4096 -#endif - -namespace sonic_json { -namespace internal { - -using namespace simd; - -// kEscapedMap maps the escaped char into origin char, as follows: -// ['/' ] = '/', -// ['"' ] = '"', -// ['b' ] = '\b', -// ['f' ] = '\f', -// ['n' ] = '\n', -// ['r' ] = '\r', -// ['t' ] = '\t', -// ['u' ] = -1, -// ['\\'] = '\\', -static const uint8_t kEscapedMap[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/', - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\\', 0, 0, 0, - 0, 0, '\b', 0, 0, 0, '\f', 0, 0, 0, 0, 0, 0, 0, '\n', 0, - 0, 0, '\r', 0, '\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -sonic_force_inline size_t parseStringInplace(uint8_t *&src, SonicError &err) { -#define SONIC_REPEAT8(v) {v v v v v v v v} - - uint8_t *dst = src; - uint8_t *sdst = src; - while (1) { - find: - auto block = StringBlock::Find(src); - if (block.HasQuoteFirst()) { - int idx = block.QuoteIndex(); - src += idx; - *src++ = '\0'; - return src - sdst - 1; - } - if (block.HasUnescaped()) { - err = kParseErrorUnEscaped; - return 0; - } - if (!block.HasBackslash()) { - src += 32; - goto find; - } - - /* find out where the backspace is */ - auto bs_dist = block.BsIndex(); - src += bs_dist; - dst = src; - cont: - uint8_t escape_char = src[1]; - if (sonic_unlikely(escape_char == 'u')) { - if (!handle_unicode_codepoint(const_cast(&src), &dst)) { - err = kParseErrorEscapedUnicode; - return 0; - } - } else { - *dst = kEscapedMap[escape_char]; - if (sonic_unlikely(*dst == 0u)) { - err = kParseErrorEscapedFormat; - return 0; - } - src += 2; - dst += 1; - } - // fast path for continous escaped chars - if (*src == '\\') { - bs_dist = 0; - goto cont; - } - - find_and_move: - // Copy the next n bytes, and find the backslash and quote in them. - simd256 v(src); - block = StringBlock{ - static_cast((v == '\\').to_bitmask()), // bs_bits - static_cast((v == '"').to_bitmask()), // quote_bits - static_cast((v <= '\x1f').to_bitmask()), - }; - // If the next thing is the end quote, copy and return - if (block.HasQuoteFirst()) { - // we encountered quotes first. Move dst to point to quotes and exit - while (1) { - SONIC_REPEAT8(if (sonic_unlikely(*src == '"')) break; - else { *dst++ = *src++; }); - } - *dst = '\0'; - src++; - return dst - sdst; - } - if (block.HasUnescaped()) { - err = kParseErrorUnEscaped; - return 0; - } - if (!block.HasBackslash()) { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - v.store(dst); - src += 32; - dst += 32; - goto find_and_move; - } - while (1) { - SONIC_REPEAT8(if (sonic_unlikely(*src == '\\')) break; - else { *dst++ = *src++; }); - } - goto cont; - } - sonic_assert(false); -#undef SONIC_REPEAT8 -} - -// GCC didn't support non-trivial designated initializers C99 extension -struct QuotedChar { - long n; - const char *s; -}; - -static const struct QuotedChar kQuoteTab[256] = { - // 0x00 ~ 0x1f - {.n = 6, .s = "\\u0000\0\0"}, - {.n = 6, .s = "\\u0001\0\0"}, - {.n = 6, .s = "\\u0002\0\0"}, - {.n = 6, .s = "\\u0003\0\0"}, - {.n = 6, .s = "\\u0004\0\0"}, - {.n = 6, .s = "\\u0005\0\0"}, - {.n = 6, .s = "\\u0006\0\0"}, - {.n = 6, .s = "\\u0007\0\0"}, - {.n = 2, .s = "\\b\0\0\0\0\0\0"}, - {.n = 2, .s = "\\t\0\0\0\0\0\0"}, - {.n = 2, .s = "\\n\0\0\0\0\0\0"}, - {.n = 6, .s = "\\u000b\0\0"}, - {.n = 2, .s = "\\f\0\0\0\0\0\0"}, - {.n = 2, .s = "\\r\0\0\0\0\0\0"}, - {.n = 6, .s = "\\u000e\0\0"}, - {.n = 6, .s = "\\u000f\0\0"}, - {.n = 6, .s = "\\u0010\0\0"}, - {.n = 6, .s = "\\u0011\0\0"}, - {.n = 6, .s = "\\u0012\0\0"}, - {.n = 6, .s = "\\u0013\0\0"}, - {.n = 6, .s = "\\u0014\0\0"}, - {.n = 6, .s = "\\u0015\0\0"}, - {.n = 6, .s = "\\u0016\0\0"}, - {.n = 6, .s = "\\u0017\0\0"}, - {.n = 6, .s = "\\u0018\0\0"}, - {.n = 6, .s = "\\u0019\0\0"}, - {.n = 6, .s = "\\u001a\0\0"}, - {.n = 6, .s = "\\u001b\0\0"}, - {.n = 6, .s = "\\u001c\0\0"}, - {.n = 6, .s = "\\u001d\0\0"}, - {.n = 6, .s = "\\u001e\0\0"}, - {.n = 6, .s = "\\u001f\0\0"}, - // 0x20 ~ 0x2f - {0, 0}, - {0, 0}, - {.n = 2, .s = "\\\"\0\0\0\0\0\0"}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - // 0x30 ~ 0x4f - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - // 0x50 ~ 0x5f - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {.n = 2, .s = "\\\\\0\0\0\0\0\0"}, - {0, 0}, - {0, 0}, - {0, 0}, - // 0x60 ~ 0xff -}; - -static const bool kNeedEscaped[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static sonic_force_inline uint8_t GetEscapeMask4(const char *src) { - return kNeedEscaped[*(uint8_t *)(src)] | - (kNeedEscaped[*(uint8_t *)(src + 1)] << 1) | - (kNeedEscaped[*(uint8_t *)(src + 2)] << 2) | - (kNeedEscaped[*(uint8_t *)(src + 3)] << 3); -} - -static sonic_force_inline int CopyAndGetEscapMask128(const char *src, - char *dst) { - simd128 v(reinterpret_cast(src)); - v.store(reinterpret_cast(dst)); - return ((v < '\x20') | (v == '\\') | (v == '"')).to_bitmask(); -} - -static sonic_force_inline int CopyAndGetEscapMask256(const char *src, - char *dst) { - simd256 v(reinterpret_cast(src)); - v.store(reinterpret_cast(dst)); - return ((v < '\x20') | (v == '\\') | (v == '"')).to_bitmask(); -} - -sonic_static_inline void DoEscape(const char *&src, char *&dst, size_t &nb) { - /* get the escape entry, handle consecutive quotes */ - do { - uint8_t ch = *(uint8_t *)src; - int nc = kQuoteTab[ch].n; - std::memcpy(dst, kQuoteTab[ch].s, 8); - src++; - nb--; - dst += nc; - if (nb <= 0) return; - /* copy and find escape chars */ - if (kNeedEscaped[*(uint8_t *)(src)] == 0) { - return; - } - } while (true); -} - -// Not check the buffer size of dst, src must be a valid UTF-8 string with -// null-terminator. -#define MOVE_N_CHARS(src, N) \ - { \ - (src) += (N); \ - nb -= (N); \ - dst += (N); \ - } - -#ifdef __GNUC__ -#if defined(__SANITIZE_THREAD__) || defined(__SANITIZE_ADDRESS__) || \ - defined(__SANITIZE_LEAK__) || defined(__SANITIZE_UNDEFINED__) -#ifndef SONIC_USE_SANITIZE -#define SONIC_USE_SANITIZE -#endif -#endif -#endif - -#if defined(__clang__) -#if defined(__has_feature) -#if __has_feature(address_sanitizer) || __has_feature(thread_sanitizer) || \ - __has_feature(memory_sanitizer) || \ - __has_feature(undefined_behavior_sanitizer) || \ - __has_feature(leak_sanitizer) -#ifndef SONIC_USE_SANITIZE -#define SONIC_USE_SANITIZE -#endif -#endif -#endif -#endif - -sonic_static_inline char *Quote(const char *src, size_t nb, char *dst) { - *dst++ = '"'; - sonic_assert(nb < (1ULL << 32)); - uint32_t mm; - int cn; - - /* 32-byte loop */ - while (nb >= 32) { - /* check for matches */ - // TODO: optimize: exploit the simd bitmask in the escape block. - if ((mm = CopyAndGetEscapMask256(src, dst)) != 0) { - cn = __builtin_ctz(mm); - MOVE_N_CHARS(src, cn); - DoEscape(src, dst, nb); - } else { - /* move to next block */ - MOVE_N_CHARS(src, 32); - } - } - - if (nb > 0) { - char tmp_src[64]; - const char *src_r; -#ifdef SONIC_USE_SANITIZE - if (0) { -#else - /* This code would cause address sanitizer report heap-buffer-overflow. */ - if (((size_t)(src) & (PAGE_SIZE - 1)) <= (PAGE_SIZE - 64)) { - src_r = src; -#endif - } else { - std::memcpy(tmp_src, src, nb); - src_r = tmp_src; - } - while (nb > 0) { - mm = CopyAndGetEscapMask256(src_r, dst) & (0xFFFFFFFF >> (32 - nb)); - if (mm) { - cn = __builtin_ctz(mm); - MOVE_N_CHARS(src_r, cn); - DoEscape(src_r, dst, nb); - } else { - dst += nb; - nb = 0; - } - } - } - - *dst++ = '"'; - return dst; -} - -} // namespace internal -} // namespace sonic_json diff --git a/include/sonic/internal/utils.h b/include/sonic/internal/utils.h new file mode 100644 index 00000000..c88f1712 --- /dev/null +++ b/include/sonic/internal/utils.h @@ -0,0 +1,29 @@ +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace sonic_json { +namespace internal { + +static sonic_force_inline bool IsSpace(uint8_t ch) { + return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t'; +} + +} // namespace internal +} // namespace sonic_json diff --git a/tests/quote_test.cpp b/tests/quote_test.cpp index bfe75379..2c149d38 100644 --- a/tests/quote_test.cpp +++ b/tests/quote_test.cpp @@ -14,9 +14,8 @@ * limitations under the License. */ -#include "sonic/internal/quote.h" - #include +#include #include #include