Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ option(BUILD_UNITTEST "Build unittest." ON)
option(BUILD_FUZZ "Build fuzz." OFF)
option(BUILD_BENCH "Build benchmark." OFF)
option(ENABLE_SVE2_128 "Build for Arm SVE2 with 128 bit vector size" OFF)
option(ENABLE_SVE_256 "Build for Arm SVE vector size" OFF)

set(CMAKE_CXX_EXTENSIONS OFF)
if(BUILD_UNITTEST)
Expand Down
2 changes: 2 additions & 0 deletions cmake/set_arch_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ function(set_arch_flags target arch)
elseif(arch MATCHES "aarch64")
if(ENABLE_SVE2_128)
target_compile_options(${target} PRIVATE -march=armv8-a+sve2 -msve-vector-bits=128)
elseif(ENABLE_SVE_256)
target_compile_options(${target} PRIVATE -march=armv8-a+sve -msve-vector-bits=256)
else()
target_compile_options(${target} PRIVATE -march=armv8-a)
endif()
Expand Down
3 changes: 3 additions & 0 deletions include/sonic/internal/arch/simd_dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
#if defined(SONIC_HAVE_SVE2_128)
#define SONIC_USING_ARCH_FUNC(func) using sve2_128::func
#define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(sve2-128/file)
#elif defined(SONIC_HAVE_SVE_256)
#define SONIC_USING_ARCH_FUNC(func) using sve_256::func
#define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(sve-256/file)
#elif defined(SONIC_HAVE_NEON)
#define SONIC_USING_ARCH_FUNC(func) using neon::func
#define INCLUDE_ARCH_FILE(file) SONIC_STRINGIFY(neon/file)
Expand Down
3 changes: 3 additions & 0 deletions include/sonic/internal/arch/sonic_cpu_feature.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,7 @@
#if defined(__ARM_FEATURE_SVE2) && (__ARM_FEATURE_SVE_BITS == 128)
#define SONIC_HAVE_SVE2_128
#endif
#if defined(__ARM_FEATURE_SVE) && (__ARM_FEATURE_SVE_BITS == 256)
#define SONIC_HAVE_SVE_256
#endif
#endif
157 changes: 157 additions & 0 deletions include/sonic/internal/arch/sve-256/base.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#pragma once

#include "../common/arm_common/base.h"
#include <arm_sve.h>

namespace sonic_json {
namespace internal {
namespace sve_256 {

using sonic_json::internal::arm_common::ClearLowestBit;
using sonic_json::internal::arm_common::CountOnes;
using sonic_json::internal::arm_common::InlinedMemcmp;
using sonic_json::internal::arm_common::LeadingZeroes;
using sonic_json::internal::arm_common::PrefixXor;
using sonic_json::internal::arm_common::TrailingZeroes;

static inline bool is_eq_lt_32(const void* a, const void* b, size_t s) {
auto lhs = static_cast<const uint8_t*>(a);
auto rhs = static_cast<const uint8_t*>(b);
svbool_t pg = svwhilelt_b8((size_t)0, s);
svbool_t ptrue = svptrue_b8();
svuint8_t va = svld1(pg, lhs);
svuint8_t vb = svld1(pg, rhs);
svbool_t neq_mask = svcmpne(ptrue, va, vb);
return svptest_any(pg, neq_mask) == 0;
}

sonic_force_inline bool InlinedMemcmpEq(const void* _a, const void* _b, size_t s) {
const uint8_t* a = static_cast<const uint8_t*>(_a);
const uint8_t* b = static_cast<const uint8_t*>(_b);

if (s == 0) return true;
if (s < 32) return is_eq_lt_32(a, b, s);

svbool_t ptrue = svptrue_b8();
svbool_t pg_head = svwhilelt_b8(0, 32);
svuint8_t head_a = svld1(pg_head, a);
svuint8_t head_b = svld1(pg_head, b);

svbool_t cmp_head = svcmpne(pg_head, head_a, head_b);
if (svptest_any(pg_head, cmp_head)) {
return false;
}

if (s > 32) {
size_t tail_offset = s - 32;
svuint8_t tail_a = svld1(pg_head, a + tail_offset);
svuint8_t tail_b = svld1(pg_head, b + tail_offset);
svbool_t cmp_tail = svcmpne(pg_head, tail_a, tail_b);
if (svptest_any(pg_head, cmp_tail)) {
return false;
}
}

if (s > 64) {
for (size_t offset = 32; offset < s - 32; offset += 32) {
svuint8_t va = svld1(ptrue, a + offset);
svuint8_t vb = svld1(ptrue, b + offset);
svbool_t neq_mask = svcmpne(ptrue, va, vb);
if (svptest_any(ptrue, neq_mask)) {
return false;
}
}
}
return true;
}

template <size_t ChunkSize>
sonic_force_inline void Xmemcpy(void* dst_, const void* src_, size_t chunks) {
std::memcpy(dst_, src_, chunks * ChunkSize);
}

template <>
sonic_force_inline void Xmemcpy<32>(void* dst_, const void* src_, size_t chunks) {
uint8_t* dst = reinterpret_cast<uint8_t*>(dst_);
const uint8_t* src = reinterpret_cast<const uint8_t*>(src_);
svbool_t pg = svptrue_b8();
size_t blocks = chunks / 4;
for (size_t i = 0; i < blocks; i++) {
for (size_t j = 0; j < 4; j++) {
svuint8_t vsrc = svld1_u8(pg, src);
svst1_u8(pg, dst, vsrc);
src += 32;
dst += 32;
}
}

switch (chunks & 3) {
case 3: {
svuint8_t vsrc = svld1_u8(pg, src);
svst1_u8(pg, dst, vsrc);
src += 32;
dst += 32;
}
/* fall through */
case 2: {
svuint8_t vsrc = svld1_u8(pg, src);
svst1_u8(pg, dst, vsrc);
src += 32;
dst += 32;
}
/* fall through */
case 1: {
svuint8_t vsrc = svld1_u8(pg, src);
svst1_u8(pg, dst, vsrc);
}
}
}

template <>
sonic_force_inline void Xmemcpy<16>(void* dst_, const void* src_, size_t chunks) {
uint8_t* dst = reinterpret_cast<uint8_t*>(dst_);
const uint8_t* src = reinterpret_cast<const uint8_t*>(src_);
svbool_t pg = svptrue_b8();
size_t blocks = chunks / 8;
for (size_t i = 0; i < blocks; i++) {
for (size_t j = 0; j < 4; j++) {
svuint8_t vsrc = svld1_u8(pg, src);
svst1_u8(pg, dst, vsrc);
src += 32;
dst += 32;
}
}

switch ((chunks / 2) & 3) {
case 3: {
svuint8_t vsrc = svld1_u8(pg, src);
svst1_u8(pg, dst, vsrc);
src += 32;
dst += 32;
}
/* fall through */
case 2: {
svuint8_t vsrc = svld1_u8(pg, src);
svst1_u8(pg, dst, vsrc);
src += 32;
dst += 32;
}
/* fall through */
case 1: {
svuint8_t vsrc = svld1_u8(pg, src);
svst1_u8(pg, dst, vsrc);
src += 32;
dst += 32;
}
}

if (chunks & 1) {
svbool_t pg = svwhilelt_b8(0, 16);
svuint8_t vsrc = svld1_u8(pg, src);
svst1_u8(pg, dst, vsrc);
}
}

} // namespace sve_256
} // namespace internal
} // namespace sonic_json
14 changes: 14 additions & 0 deletions include/sonic/internal/arch/sve-256/itoa.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include "../common/arm_common/itoa.h"

namespace sonic_json {
namespace internal {
namespace sve_256 {

using sonic_json::internal::arm_common::Utoa_16;
using sonic_json::internal::arm_common::Utoa_8;

} // namespace sve_256
} // namespace internal
} // namespace sonic_json
187 changes: 187 additions & 0 deletions include/sonic/internal/arch/sve-256/quote.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#pragma once

#define VEC_LEN 16

#include "../common/arm_common/quote.h"
#include "unicode.h"

#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif

#ifdef __GNUC__
#if defined(__SANITIZE_THREAD__) || defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_LEAK__) || \
defined(__SANITIZE_UNDEFINED__)
#ifndef SONIC_USE_SANITIZE
#define SONIC_USE_SANITIZE
#endif
#endif
#endif

#if defined(__clang__)
#if defined(__has_feature)
#if __has_feature(address_sanitizer) || __has_feature(thread_sanitizer) || __has_feature(memory_sanitizer) || \
__has_feature(undefined_behavior_sanitizer) || __has_feature(leak_sanitizer)
#ifndef SONIC_USE_SANITIZE
#define SONIC_USE_SANITIZE
#endif
#endif
#endif
#endif

#ifndef VEC_LEN
#error "You should define VEC_LEN before including quote.h!"
#endif

#define MOVE_N_CHARS(src, N) \
do { \
(src) += (N); \
nb -= (N); \
dst += (N); \
} while (0)

namespace sonic_json {
namespace internal {
namespace sve_256 {

sonic_force_inline svbool_t copy_get_escaped_mask_predicate(svbool_t pg, const char *src, char *dst)
{
svuint8_t v = svld1_u8(pg, reinterpret_cast<const uint8_t *>(src));
svst1_u8(pg, reinterpret_cast<uint8_t *>(dst), v);
svbool_t m1 = svcmpeq_n_u8(pg, v, '\\');
svbool_t m2 = svcmpeq_n_u8(pg, v, '"');
svbool_t m3 = svcmplt_n_u8(pg, v, '\x20');
svbool_t m4 = svorr_b_z(pg, m1, m2);
svbool_t m5 = svorr_b_z(pg, m3, m4);
return m5;
}

// The function returns the index of first (to the rigth) active elem
sonic_force_inline int get_first_active_index(svbool_t input)
{
return svlastb_u8(svbrka_b_z(input, input), svindex_u8(0, 1));
}


sonic_force_inline size_t parseStringInplace(uint8_t *&src, SonicError &err) {
#define SONIC_REPEAT8(v) {v v v v v v v v}

uint8_t *dst = src;
uint8_t *sdst = src;
while (1) {
find:
auto block = StringBlock::Find(src);
if (block.HasQuoteFirst()) {
int idx = block.QuoteIndex();
src += idx;
*src++ = '\0';
return src - sdst - 1;
}
if (block.HasUnescaped()) {
err = kParseErrorUnEscaped;
return 0;
}
if (!block.HasBackslash()) {
src += VEC_LEN;
goto find;
}

/* find out where the backspace is */
auto bs_dist = block.BsIndex();
src += bs_dist;
dst = src;
cont:
uint8_t escape_char = src[1];
if (sonic_unlikely(escape_char == 'u')) {
if (!handle_unicode_codepoint(const_cast<const uint8_t **>(&src), &dst)) {
err = kParseErrorEscapedUnicode;
return 0;
}
} else {
*dst = kEscapedMap[escape_char];
if (sonic_unlikely(*dst == 0u)) {
err = kParseErrorEscapedFormat;
return 0;
}
src += 2;
dst += 1;
}
// fast path for continous escaped chars
if (*src == '\\') {
bs_dist = 0;
goto cont;
}

find_and_move:
// Copy the next n bytes, and find the backslash and quote in them.
uint8x16_t v = vld1q_u8(src);
block = StringBlock::Find(v);
// If the next thing is the end quote, copy and return
if (block.HasQuoteFirst()) {
// we encountered quotes first. Move dst to point to quotes and exit
while (1) {
SONIC_REPEAT8(if (sonic_unlikely(*src == '"')) break;
else { *dst++ = *src++; });
}
*dst = '\0';
src++;
return dst - sdst;
}
if (block.HasUnescaped()) {
err = kParseErrorUnEscaped;
return 0;
}
if (!block.HasBackslash()) {
/* they are the same. Since they can't co-occur, it means we
* encountered neither. */
vst1q_u8(dst, v);
src += VEC_LEN;
dst += VEC_LEN;
goto find_and_move;
}
while (1) {
SONIC_REPEAT8(if (sonic_unlikely(*src == '\\')) break;
else { *dst++ = *src++; });
}
goto cont;
}
sonic_assert(false);
#undef SONIC_REPEAT8
}

sonic_force_inline char *Quote(const char *src, size_t nb, char *dst)
{
*dst++ = '"';
sonic_assert(nb < (1LL << 0x20));
auto svelen = svcntb();
svbool_t ptrue = svptrue_b8();
while (nb > svelen) {
svbool_t mask = copy_get_escaped_mask_predicate(ptrue, src, dst);
if (svptest_any(ptrue, mask)) {
auto cn = get_first_active_index(mask);
MOVE_N_CHARS(src, cn);
DoEscape(src, dst, nb);
} else {
MOVE_N_CHARS(src, svelen);
}
}
while (nb > 0) {
svbool_t predicate = svwhilelt_b8_u64(0, nb);
svbool_t mask = copy_get_escaped_mask_predicate(predicate, src, dst);
if (svptest_any(predicate, mask)) {
auto cn = get_first_active_index(mask);
MOVE_N_CHARS(src, cn);
DoEscape(src, dst, nb);
} else {
auto active_elems = svcntp_b8(predicate, predicate);
MOVE_N_CHARS(src, active_elems);
}
}
*dst++ = '"';
return dst;
}
} // namespace sve_256
} // namespace internal
} // namespace sonic_json

#undef VEC_LEN
Loading