Skip to content

Commit

Permalink
Merge pull request #27 from eddelbuettel/feature/simdjson_0.4.1
Browse files Browse the repository at this point in the history
simdjson 0.4.1 (closes #26) (closes #14)
  • Loading branch information
eddelbuettel committed Jun 27, 2020
2 parents 20338d3 + db24b82 commit 5b699ea
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 34 deletions.
53 changes: 45 additions & 8 deletions inst/include/simdjson.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on Tue 23 Jun 2020 20:51:12 EDT. Do not edit! */
/* auto-generated on Fri 26 Jun 2020 20:03:28 EDT. Do not edit! */
/* begin file src/simdjson.cpp */
#include "simdjson.h"

Expand Down Expand Up @@ -213,6 +213,10 @@ static inline uint32_t detect_supported_architectures() {
/* begin file src/simdprune_tables.h */
#ifndef SIMDJSON_SIMDPRUNE_TABLES_H
#define SIMDJSON_SIMDPRUNE_TABLES_H


#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE

#include <cstdint>

namespace simdjson { // table modified and copied from
Expand Down Expand Up @@ -340,6 +344,8 @@ static const uint64_t thintable_epi8[256] = {

} // namespace simdjson


#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE
#endif // SIMDJSON_SIMDPRUNE_TABLES_H
/* end file src/simdprune_tables.h */

Expand Down Expand Up @@ -934,9 +940,8 @@ struct value128 {
uint64_t high;
};

#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) && \
!defined(_M_X64) && !defined(_M_ARM64)// _umul128 for x86, arm
// this is a slow emulation routine for 32-bit Windows
#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
// this is a slow emulation routine for 32-bit
//
static inline uint64_t __emulu(uint32_t x, uint32_t y) {
return x * (uint64_t)y;
Expand All @@ -955,15 +960,15 @@ static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {

really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
value128 answer;
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
#ifdef _M_ARM64
// ARM64 has native support for 64-bit multiplications, no need to emultate
answer.high = __umulh(value1, value2);
answer.low = value1 * value2;
#else
answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
#endif // _M_ARM64
#else // SIMDJSON_REGULAR_VISUAL_STUDIO
#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
__uint128_t r = ((__uint128_t)value1) * value2;
answer.low = uint64_t(r);
answer.high = uint64_t(r >> 64);
Expand Down Expand Up @@ -4038,7 +4043,15 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative,
// It was described in
// Clinger WD. How to read floating point numbers accurately.
// ACM SIGPLAN Notices. 1990
#ifndef FLT_EVAL_METHOD
#error "FLT_EVAL_METHOD should be defined, please include cfloat."
#endif
#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
// We cannot be certain that x/y is rounded to nearest.
if (0 <= power && power <= 22 && i <= 9007199254740991) {
#else
if (-22 <= power && power <= 22 && i <= 9007199254740991) {
#endif
// convert the integer into a double. This is lossless since
// 0 <= i <= 2^53 - 1.
double d = double(i);
Expand Down Expand Up @@ -6195,7 +6208,15 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative,
// It was described in
// Clinger WD. How to read floating point numbers accurately.
// ACM SIGPLAN Notices. 1990
#ifndef FLT_EVAL_METHOD
#error "FLT_EVAL_METHOD should be defined, please include cfloat."
#endif
#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
// We cannot be certain that x/y is rounded to nearest.
if (0 <= power && power <= 22 && i <= 9007199254740991) {
#else
if (-22 <= power && power <= 22 && i <= 9007199254740991) {
#endif
// convert the integer into a double. This is lossless since
// 0 <= i <= 2^53 - 1.
double d = double(i);
Expand Down Expand Up @@ -7826,7 +7847,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2,
#endif
}

#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
#pragma intrinsic(_umul128)
#endif
really_inline bool mul_overflow(uint64_t value1, uint64_t value2,
Expand Down Expand Up @@ -9499,7 +9520,15 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative,
// It was described in
// Clinger WD. How to read floating point numbers accurately.
// ACM SIGPLAN Notices. 1990
#ifndef FLT_EVAL_METHOD
#error "FLT_EVAL_METHOD should be defined, please include cfloat."
#endif
#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
// We cannot be certain that x/y is rounded to nearest.
if (0 <= power && power <= 22 && i <= 9007199254740991) {
#else
if (-22 <= power && power <= 22 && i <= 9007199254740991) {
#endif
// convert the integer into a double. This is lossless since
// 0 <= i <= 2^53 - 1.
double d = double(i);
Expand Down Expand Up @@ -11114,7 +11143,7 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2,
#endif
}

#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
#pragma intrinsic(_umul128)
#endif
really_inline bool mul_overflow(uint64_t value1, uint64_t value2,
Expand Down Expand Up @@ -12777,7 +12806,15 @@ really_inline double compute_float_64(int64_t power, uint64_t i, bool negative,
// It was described in
// Clinger WD. How to read floating point numbers accurately.
// ACM SIGPLAN Notices. 1990
#ifndef FLT_EVAL_METHOD
#error "FLT_EVAL_METHOD should be defined, please include cfloat."
#endif
#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
// We cannot be certain that x/y is rounded to nearest.
if (0 <= power && power <= 22 && i <= 9007199254740991) {
#else
if (-22 <= power && power <= 22 && i <= 9007199254740991) {
#endif
// convert the integer into a double. This is lossless since
// 0 <= i <= 2^53 - 1.
double d = double(i);
Expand Down
102 changes: 76 additions & 26 deletions inst/include/simdjson.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on Tue 23 Jun 2020 20:51:12 EDT. Do not edit! */
/* auto-generated on Fri 26 Jun 2020 20:03:28 EDT. Do not edit! */
/* begin file include/simdjson.h */
#ifndef SIMDJSON_H
#define SIMDJSON_H
Expand Down Expand Up @@ -58,6 +58,7 @@
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cfloat>


#ifdef _MSC_VER
Expand Down Expand Up @@ -91,24 +92,28 @@

#if defined(__x86_64__) || defined(_M_AMD64)
#define SIMDJSON_IS_X86_64 1
#endif
#if defined(__aarch64__) || defined(_M_ARM64)
#elif defined(__aarch64__) || defined(_M_ARM64)
#define SIMDJSON_IS_ARM64 1
#else
#define SIMDJSON_IS_32BITS 1

// We do not support 32-bit platforms, but it can be
// handy to identify them.
#if defined(_M_IX86) || defined(__i386__)
#define SIMDJSON_IS_X86_32BITS 1
#elif defined(__arm__) || defined(_M_ARM)
#define SIMDJSON_IS_ARM_32BITS 1
#endif

#if (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64))
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
#pragma message("The simdjson library is designed\
for 64-bit processors and it seems that you are not \
#endif // defined(__x86_64__) || defined(_M_AMD64)

#ifdef SIMDJSON_IS_32BITS
#pragma message("The simdjson library is designed \
for 64-bit processors and it seems that you are not \
compiling for a known 64-bit platform. All fast kernels \
will be disabled and performance may be poor. Please \
use a 64-bit target such as x64 or 64-bit ARM.")
#else
#error "The simdjson library is designed\
for 64-bit processors. It seems that you are not \
compiling for a known 64-bit platform."
#endif
#endif // (!defined(SIMDJSON_IS_X86_64)) && (!defined(SIMDJSON_IS_ARM64))
#endif // SIMDJSON_IS_32BITS
// this is almost standard?
#undef STRINGIFY_IMPLEMENTATION_
Expand All @@ -128,6 +133,15 @@ compiling for a known 64-bit platform."
#define SIMDJSON_IMPLEMENTATION_WESTMERE 0
#endif // SIMDJSON_IS_ARM64
// Our fast kernels require 64-bit systems.
//
// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
// Furthermore, the number of SIMD registers is reduced.
//
// On 32-bit ARM, we would have smaller registers.
//
// The simdjson users should still have the fallback kernel. It is
// slower, but it should run everywhere.
#if SIMDJSON_IS_X86_64
#ifndef SIMDJSON_IMPLEMENTATION_HASWELL
#define SIMDJSON_IMPLEMENTATION_HASWELL 1
Expand All @@ -138,7 +152,7 @@ compiling for a known 64-bit platform."
#define SIMDJSON_IMPLEMENTATION_ARM64 0
#endif // SIMDJSON_IS_X86_64
// we are going to use runtime dispatch
// We are going to use runtime dispatch.
#ifdef SIMDJSON_IS_X86_64
#ifdef __clang__
// clang does not have GCC push pop
Expand Down Expand Up @@ -2018,7 +2032,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
#define SIMDJSON_SIMDJSON_VERSION_H
/** The version of simdjson being used (major.minor.revision) */
#define SIMDJSON_VERSION 0.4.0
#define SIMDJSON_VERSION 0.4.1
namespace simdjson {
enum {
Expand All @@ -2033,7 +2047,7 @@ enum {
/**
* The revision (major.minor.REVISION) of simdjson being used.
*/
SIMDJSON_VERSION_REVISION = 0
SIMDJSON_VERSION_REVISION = 1
};
} // namespace simdjson
Expand Down Expand Up @@ -2632,7 +2646,6 @@ inline error_code dom_parser_implementation::allocate(size_t capacity, size_t ma
#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
/* end file include/simdjson/internal/dom_parser_implementation.h */
#include <optional>
#include <string>
#include <atomic>
#include <vector>
Expand Down Expand Up @@ -4008,21 +4021,42 @@ class element {
*/
inline simdjson_result<object> get_object() const noexcept;
/**
* Cast this element to a string.
* Cast this element to a null-terminated C string.
*
* The string is guaranteed to be valid UTF-8.
*
* The get_c_str() function is equivalent to get<const char *>().
*
* The length of the string is given by get_string_length(). Because JSON strings
* may contain null characters, it may be incorrect to use strlen to determine the
* string length.
*
* Equivalent to get<const char *>().
* It is possible to get a single string_view instance which represents both the string
* content and its length: see get_string().
*
* @returns An pointer to a null-terminated string. This string is stored in the parser and will
* @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will
* be invalidated the next time it parses a document or when it is destroyed.
* Returns INCORRECT_TYPE if the JSON element is not a string.
*/
inline simdjson_result<const char *> get_c_str() const noexcept;
/**
* Cast this element to a string.
* Gives the length in bytes of the string.
*
* It is possible to get a single string_view instance which represents both the string
* content and its length: see get_string().
*
* @returns A string length in bytes.
* Returns INCORRECT_TYPE if the JSON element is not a string.
*/
inline simdjson_result<size_t> get_string_length() const noexcept;
/**
* Cast this element to a string.
*
* The string is guaranteed to be valid UTF-8.
*
* Equivalent to get<std::string_view>().
*
* @returns A string. The string is stored in the parser and will be invalidated the next time it
* @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it
* parses a document or when it is destroyed.
* Returns INCORRECT_TYPE if the JSON element is not a string.
*/
Expand Down Expand Up @@ -4199,7 +4233,9 @@ class element {
inline operator bool() const noexcept(false);

/**
* Read this element as a null-terminated string.
* Read this element as a null-terminated UTF-8 string.
*
* Be mindful that JSON allows strings to contain null characters.
*
* Does *not* convert other types to a string; requires that the JSON type of the element was
* an actual string.
Expand All @@ -4210,7 +4246,7 @@ class element {
inline explicit operator const char*() const noexcept(false);

/**
* Read this element as a null-terminated string.
* Read this element as a null-terminated UTF-8 string.
*
* Does *not* convert other types to a string; requires that the JSON type of the element was
* an actual string.
Expand Down Expand Up @@ -4410,6 +4446,7 @@ struct simdjson_result<dom::element> : public internal::simdjson_result_base<dom
really_inline simdjson_result<dom::array> get_array() const noexcept;
really_inline simdjson_result<dom::object> get_object() const noexcept;
really_inline simdjson_result<const char *> get_c_str() const noexcept;
really_inline simdjson_result<size_t> get_string_length() const noexcept;
really_inline simdjson_result<std::string_view> get_string() const noexcept;
really_inline simdjson_result<int64_t> get_int64() const noexcept;
really_inline simdjson_result<uint64_t> get_uint64() const noexcept;
Expand Down Expand Up @@ -5820,6 +5857,10 @@ really_inline simdjson_result<const char *> simdjson_result<dom::element>::get_c
if (error()) { return error(); }
return first.get_c_str();
}
really_inline simdjson_result<size_t> simdjson_result<dom::element>::get_string_length() const noexcept {
if (error()) { return error(); }
return first.get_string_length();
}
really_inline simdjson_result<std::string_view> simdjson_result<dom::element>::get_string() const noexcept {
if (error()) { return error(); }
return first.get_string();
Expand Down Expand Up @@ -5960,6 +6001,15 @@ inline simdjson_result<const char *> element::get_c_str() const noexcept {
return INCORRECT_TYPE;
}
}
inline simdjson_result<size_t> element::get_string_length() const noexcept {
switch (tape.tape_ref_type()) {
case internal::tape_type::STRING: {
return tape.get_string_length();
}
default:
return INCORRECT_TYPE;
}
}
inline simdjson_result<std::string_view> element::get_string() const noexcept {
switch (tape.tape_ref_type()) {
case internal::tape_type::STRING:
Expand Down Expand Up @@ -7610,14 +7660,14 @@ really_inline T tape_ref::next_tape_value() const noexcept {
}

really_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
uint64_t string_buf_index = size_t(tape_value());
size_t string_buf_index = size_t(tape_value());
uint32_t len;
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
return len;
}

really_inline const char * internal::tape_ref::get_c_str() const noexcept {
uint64_t string_buf_index = size_t(tape_value());
size_t string_buf_index = size_t(tape_value());
return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
}

Expand Down

0 comments on commit 5b699ea

Please sign in to comment.