diff --git a/src/yyjson.c b/src/yyjson.c index 9546e29..92d5b71 100644 --- a/src/yyjson.c +++ b/src/yyjson.c @@ -2849,6 +2849,7 @@ bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) { static_inline bool read_flag_eq(yyjson_read_flag flg, yyjson_read_flag chk) { #if YYJSON_DISABLE_NON_STANDARD if (chk == YYJSON_READ_ALLOW_INF_AND_NAN || + chk == YYJSON_READ_INF_AND_NAN_AS_NULL || chk == YYJSON_READ_ALLOW_COMMENTS || chk == YYJSON_READ_ALLOW_TRAILING_COMMAS || chk == YYJSON_READ_ALLOW_INVALID_UNICODE) @@ -2940,6 +2941,45 @@ static_inline bool read_hex_u16(const u8 *cur, u16 *val) { * These functions are used by JSON reader to read literals and comments. *============================================================================*/ +/** Returns true if `cur` matches `str`. + `cur` should be the padded json string, `str` should be a string literal. */ +static_inline bool is_str_matched(u8 *cur, const char *str, bool ignore_case) { + usize i, len = strlen(str); + const u8 *cmp = (const u8 *)str; + if (ignore_case) { + u8 dif = 'a' - 'A'; + for (i = 0; i < len; i++) { + if (cur[i] != cmp[i] && cur[i] != cmp[i] - dif) return false; + } + } else { + for (i = 0; i < len; i++) { + if (cur[i] != cmp[i]) return false; + } + } + return true; +} + +/** Returns true if `cur` matches `str` but is truncated. + `cur` should be the padded json string, `str` should be a string literal. */ +static_inline bool is_str_truncated(u8 *cur, u8 *end, + const char *str, bool ignore_case) { + usize i, len = strlen(str); + const u8 *cmp = (const u8 *)str; + if (cur >= end || cur + len <= end || len == 0) return false; + + if (ignore_case) { + u8 dif = 'a' - 'A'; + for (; cur < end; cur++, cmp++) { + if (*cur != *cmp && *cur != *cmp - dif) return false; + } + } else { + for (; cur < end; cur++, cmp++) { + if (*cur != *cmp) return false; + } + } + return true; +} + /** Read 'true' literal, '*cur' should be 't'. */ static_inline bool read_true(u8 **ptr, yyjson_val *val) { u8 *cur = *ptr; @@ -2976,70 +3016,40 @@ static_inline bool read_null(u8 **ptr, yyjson_val *val) { return false; } -/** Read 'Inf' or 'Infinity' literal (ignoring case). */ -static_inline bool read_inf(bool sign, u8 **ptr, u8 **pre, yyjson_val *val) { +/** Read 'Inf', 'Infinity' or 'NaN' literal (ignoring case). */ +static_noinline bool read_inf_or_nan(bool sign, u8 **ptr, u8 **pre, + yyjson_read_flag flg, + yyjson_val *val) { u8 *hdr = *ptr - sign; u8 *cur = *ptr; u8 **end = ptr; - if ((cur[0] == 'I' || cur[0] == 'i') && - (cur[1] == 'N' || cur[1] == 'n') && - (cur[2] == 'F' || cur[2] == 'f')) { - if ((cur[3] == 'I' || cur[3] == 'i') && - (cur[4] == 'N' || cur[4] == 'n') && - (cur[5] == 'I' || cur[5] == 'i') && - (cur[6] == 'T' || cur[6] == 't') && - (cur[7] == 'Y' || cur[7] == 'y')) { - cur += 8; - } else { - cur += 3; - } + bool is_inf = false; + + if (is_str_matched(cur, "inf", true)) { + cur += 3; + if (is_str_matched(cur, "inity", true)) cur += 5; *end = cur; - if (pre) { - /* add null-terminator for previous raw string */ - if (*pre) **pre = '\0'; - *pre = cur; - val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; - val->uni.str = (const char *)hdr; - } else { - val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; - val->uni.u64 = f64_get_inf_bin(sign); - } - return true; - } - return false; -} - -/** Read 'NaN' literal (ignoring case). */ -static_inline bool read_nan(bool sign, u8 **ptr, u8 **pre, yyjson_val *val) { - u8 *hdr = *ptr - sign; - u8 *cur = *ptr; - u8 **end = ptr; - if ((cur[0] == 'N' || cur[0] == 'n') && - (cur[1] == 'A' || cur[1] == 'a') && - (cur[2] == 'N' || cur[2] == 'n')) { + is_inf = true; + } else if (is_str_matched(cur, "nan", true)) { cur += 3; *end = cur; - if (pre) { - /* add null-terminator for previous raw string */ - if (*pre) **pre = '\0'; - *pre = cur; - val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; - val->uni.str = (const char *)hdr; - } else { - val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; - val->uni.u64 = f64_get_nan_bin(sign); - } - return true; + } else { + return false; } - return false; -} - -/** Read 'Inf', 'Infinity' or 'NaN' literal (ignoring case). */ -static_inline bool read_inf_or_nan(bool sign, u8 **ptr, u8 **pre, - yyjson_val *val) { - if (read_inf(sign, ptr, pre, val)) return true; - if (read_nan(sign, ptr, pre, val)) return true; - return false; + + if (pre && has_flag(NUMBER_AS_RAW)) { + if (*pre) **pre = '\0'; /* end the previous raw string */ + *pre = cur; /* set current raw string */ + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; + val->uni.str = (const char *)hdr; + } else if (has_flag(INF_AND_NAN_AS_NULL)) { + val->tag = YYJSON_TYPE_NULL; + val->uni.u64 = 0; + } else { + val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; + val->uni.u64 = is_inf ? f64_get_inf_bin(sign) : f64_get_nan_bin(sign); + } + return true; } /** Read a JSON number as raw string. */ @@ -3073,8 +3083,8 @@ static_noinline bool read_number_raw(u8 **ptr, /* read first digit, check leading zero */ if (unlikely(!digi_is_digit(*cur))) { - if (has_flag(ALLOW_INF_AND_NAN)) { - if (read_inf_or_nan(*hdr == '-', &cur, pre, val)) return_raw(); + if (has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) { + if (read_inf_or_nan(*hdr == '-', &cur, pre, flg, val)) return_raw(); } return_err(cur, "no digit after minus sign"); } @@ -3160,38 +3170,16 @@ static_noinline bool skip_spaces_and_comments(u8 **ptr) { return hdr != cur; } -/** - Check truncated string. - Returns true if `cur` match `str` but is truncated. - */ -static_inline bool is_truncated_str(u8 *cur, u8 *end, - const char *str, - bool case_sensitive) { - usize len = strlen(str); - if (cur + len <= end || end <= cur) return false; - if (case_sensitive) { - return memcmp(cur, str, (usize)(end - cur)) == 0; - } - for (; cur < end; cur++, str++) { - if ((*cur != (u8)*str) && (*cur != (u8)*str - 'a' + 'A')) { - return false; - } - } - return true; -} - -/** - Check truncated JSON on parsing errors. - Returns true if the input is valid but truncated. - */ +/** Check truncated JSON on parsing errors. + Returns true if the input is valid but truncated. */ static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *end, yyjson_read_code code, yyjson_read_flag flg) { if (cur >= end) return true; if (code == YYJSON_READ_ERROR_LITERAL) { - if (is_truncated_str(cur, end, "true", true) || - is_truncated_str(cur, end, "false", true) || - is_truncated_str(cur, end, "null", true)) { + if (is_str_truncated(cur, end, "true", false) || + is_str_truncated(cur, end, "false", false) || + is_str_truncated(cur, end, "null", false)) { return true; } } @@ -3200,8 +3188,8 @@ static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *end, code == YYJSON_READ_ERROR_LITERAL) { if (has_flag(ALLOW_INF_AND_NAN)) { if (*cur == '-') cur++; - if (is_truncated_str(cur, end, "infinity", false) || - is_truncated_str(cur, end, "nan", false)) { + if (is_str_truncated(cur, end, "infinity", true) || + is_str_truncated(cur, end, "nan", true)) { return true; } } @@ -3209,7 +3197,7 @@ static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *end, if (code == YYJSON_READ_ERROR_UNEXPECTED_CONTENT) { if (has_flag(ALLOW_INF_AND_NAN)) { if (hdr + 3 <= cur && - is_truncated_str(cur - 3, end, "infinity", false)) { + is_str_truncated(cur - 3, end, "infinity", true)) { return true; /* e.g. infin would be read as inf + in */ } } @@ -3553,6 +3541,11 @@ static_inline bool read_number(u8 **ptr, return false; \ } while (false) +#define return_null() do { \ + val->tag = YYJSON_TYPE_NULL; \ + *end = cur; return true; \ +} while (false) + #define return_0() do { \ val->tag = YYJSON_TYPE_NUM | (u8)((u8)sign << 3); \ val->uni.u64 = 0; \ @@ -3579,6 +3572,7 @@ static_inline bool read_number(u8 **ptr, #define return_inf() do { \ if (has_flag(BIGNUM_AS_RAW)) return_raw(); \ + if (has_flag(INF_AND_NAN_AS_NULL)) return_null(); \ if (has_flag(ALLOW_INF_AND_NAN)) return_f64_bin(F64_INF_BIN); \ else return_err(hdr, "number is infinity when parsed as double"); \ } while (false) @@ -3619,8 +3613,8 @@ static_inline bool read_number(u8 **ptr, /* begin with a leading zero or non-digit */ if (unlikely(!digi_is_nonzero(*cur))) { /* 0 or non-digit char */ if (unlikely(*cur != '0')) { /* non-digit char */ - if (has_flag(ALLOW_INF_AND_NAN)) { - if (read_inf_or_nan(sign, &cur, pre, val)) { + if (has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) { + if (read_inf_or_nan(sign, &cur, pre, flg, val)) { *end = cur; return true; } @@ -4165,6 +4159,11 @@ static_inline bool read_number(u8 **ptr, return false; \ } while (false) +#define return_null() do { \ + val->tag = YYJSON_TYPE_NULL; \ + *end = cur; return true; \ +} while (false) + #define return_0() do { \ val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3); \ val->uni.u64 = 0; \ @@ -4191,6 +4190,7 @@ static_inline bool read_number(u8 **ptr, #define return_inf() do { \ if (has_flag(BIGNUM_AS_RAW)) return_raw(); \ + if (has_flag(INF_AND_NAN_AS_NULL)) return_null(); \ if (has_flag(ALLOW_INF_AND_NAN)) return_f64_bin(F64_INF_BIN); \ else return_err(hdr, "number is infinity when parsed as double"); \ } while (false) @@ -4221,8 +4221,8 @@ static_inline bool read_number(u8 **ptr, /* read first digit, check leading zero */ if (unlikely(!digi_is_digit(*cur))) { - if (has_flag(ALLOW_INF_AND_NAN)) { - if (read_inf_or_nan(sign, &cur, pre, val)) { + if (has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) { + if (read_inf_or_nan(sign, &cur, pre, flg, val)) { *end = cur; return true; } @@ -4763,7 +4763,7 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - bool raw; /* read number as raw */ + bool raw; /* read number/bignum as raw */ bool inv; /* allow invalid unicode */ u8 *raw_end; /* raw end for null-terminator */ u8 **pre; /* previous raw end pointer */ @@ -4798,13 +4798,13 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, } if (*cur == 'n') { if (likely(read_null(&cur, val))) goto doc_end; - if (has_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, val)) goto doc_end; + if (has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) { + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto doc_end; } goto fail_literal; } - if (has_flag(ALLOW_INF_AND_NAN)) { - if (read_inf_or_nan(false, &cur, pre, val)) goto doc_end; + if (has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) { + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto doc_end; } goto fail_character; @@ -4901,7 +4901,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - bool raw; /* read number as raw */ + bool raw; /* read number/bignum as raw */ bool inv; /* allow invalid unicode */ u8 *raw_end; /* raw end for null-terminator */ u8 **pre; /* previous raw end pointer */ @@ -4985,8 +4985,8 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, val_incr(); ctn_len++; if (likely(read_null(&cur, val))) goto arr_val_end; - if (has_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, val)) goto arr_val_end; + if (has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) { + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto arr_val_end; } goto fail_literal; } @@ -5001,11 +5001,11 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, while (char_is_space(*++cur)); goto arr_val_begin; } - if (has_flag(ALLOW_INF_AND_NAN) && + if ((has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val_incr(); ctn_len++; - if (read_inf_or_nan(false, &cur, pre, val)) goto arr_val_end; + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto arr_val_end; goto fail_character; } if (has_flag(ALLOW_COMMENTS)) { @@ -5138,8 +5138,8 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, val++; ctn_len++; if (likely(read_null(&cur, val))) goto obj_val_end; - if (has_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, val)) goto obj_val_end; + if (has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) { + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto obj_val_end; } goto fail_literal; } @@ -5147,11 +5147,11 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, while (char_is_space(*++cur)); goto obj_val_begin; } - if (has_flag(ALLOW_INF_AND_NAN) && + if ((has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val++; ctn_len++; - if (read_inf_or_nan(false, &cur, pre, val)) goto obj_val_end; + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto obj_val_end; goto fail_character; } if (has_flag(ALLOW_COMMENTS)) { @@ -5289,7 +5289,7 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - bool raw; /* read number as raw */ + bool raw; /* read number/bignum as raw */ bool inv; /* allow invalid unicode */ u8 *raw_end; /* raw end for null-terminator */ u8 **pre; /* previous raw end pointer */ @@ -5388,8 +5388,8 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, val_incr(); ctn_len++; if (likely(read_null(&cur, val))) goto arr_val_end; - if (has_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, val)) goto arr_val_end; + if (has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) { + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto arr_val_end; } goto fail_literal; } @@ -5404,11 +5404,11 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, while (char_is_space(*++cur)); goto arr_val_begin; } - if (has_flag(ALLOW_INF_AND_NAN) && + if ((has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val_incr(); ctn_len++; - if (read_inf_or_nan(false, &cur, pre, val)) goto arr_val_end; + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto arr_val_end; goto fail_character; } if (has_flag(ALLOW_COMMENTS)) { @@ -5562,8 +5562,8 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, val++; ctn_len++; if (likely(read_null(&cur, val))) goto obj_val_end; - if (has_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, val)) goto obj_val_end; + if (has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) { + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto obj_val_end; } goto fail_literal; } @@ -5571,11 +5571,11 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, while (char_is_space(*++cur)); goto obj_val_begin; } - if (has_flag(ALLOW_INF_AND_NAN) && + if ((has_flag(ALLOW_INF_AND_NAN) || has_flag(INF_AND_NAN_AS_NULL)) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val++; ctn_len++; - if (read_inf_or_nan(false, &cur, pre, val)) goto obj_val_end; + if (read_inf_or_nan(false, &cur, pre, flg, val)) goto obj_val_end; goto fail_character; } if (has_flag(ALLOW_COMMENTS)) { @@ -5912,7 +5912,7 @@ const char *yyjson_read_number(const char *dat, } while (false) u8 *hdr = constcast(u8 *)dat, *cur = hdr; - bool raw; /* read number as raw */ + bool raw; /* read number/bignum as raw */ u8 *raw_end; /* raw end for null-terminator */ u8 **pre; /* previous raw end pointer */ const char *msg; @@ -5949,7 +5949,7 @@ const char *yyjson_read_number(const char *dat, hdr[dat_len] = 0; #endif - raw = (flg & (YYJSON_READ_NUMBER_AS_RAW | YYJSON_READ_BIGNUM_AS_RAW)) != 0; + raw = (bool)(has_flag(NUMBER_AS_RAW) || has_flag(BIGNUM_AS_RAW)); raw_end = NULL; pre = raw ? &raw_end : NULL; @@ -7094,10 +7094,12 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, goto copy_utf8; } case CHAR_ENC_ESC_2: { - u16 u, v; + u16 u; #if !YYJSON_DISABLE_UTF8_VALIDATION - v = byte_load_2(src); - if (unlikely(!is_utf8_seq2(v))) goto err_esc; + u32 v4 = 0; + u16 v2 = byte_load_2(src); + byte_copy_2(&v4, &v2); + if (unlikely(!is_utf8_seq2(v4))) goto err_esc; #endif u = (u16)(((u16)(src[0] & 0x1F) << 6) | ((u16)(src[1] & 0x3F) << 0)); diff --git a/src/yyjson.h b/src/yyjson.h index 118a074..40a8e8c 100644 --- a/src/yyjson.h +++ b/src/yyjson.h @@ -731,59 +731,69 @@ typedef struct yyjson_mut_val yyjson_mut_val; typedef uint32_t yyjson_read_flag; /** Default option (RFC 8259 compliant): - - Read positive integer as uint64_t. - - Read negative integer as int64_t. - - Read floating-point number as double with round-to-nearest mode. - - Read integer which cannot fit in uint64_t or int64_t as double. - - Report error if double number is infinity. - - Report error if string contains invalid UTF-8 character or BOM. - - Report error on trailing commas, comments, inf and nan literals. */ + - Read positive integers as `uint64_t`. + - Read negative integers as `int64_t`. + - Read floating-point numbers as `double`. + - Read integers that cannot fit in `uint64_t` or `int64_t` as `double`. + - Report an error if a double number is infinity. + - Report an error if a string contains an invalid UTF-8 character or BOM. + - Report an error on trailing commas, comments, and Inf/NaN literals. */ static const yyjson_read_flag YYJSON_READ_NOFLAG = 0; /** Read the input data in-situ. This option allows the reader to modify and use input data to store string - values, which can increase reading speed slightly. - The caller should hold the input data before free the document. - The input data must be padded by at least `YYJSON_PADDING_SIZE` bytes. + values, which can slightly increase reading speed. + The caller should retain the input data before freeing the document. + The input data must be padded with at least `YYJSON_PADDING_SIZE` bytes. For example: `[1,2]` should be `[1,2]\0\0\0\0`, input length should be 5. */ static const yyjson_read_flag YYJSON_READ_INSITU = 1 << 0; -/** Stop when done instead of issuing an error if there's additional content - after a JSON document. This option may be used to parse small pieces of JSON - in larger data, such as `NDJSON`. */ +/** Stop when done instead of issuing an error if there is additional content + after a JSON document. This option may be used to read small pieces of JSON + within larger data, such as `NDJSON`. */ static const yyjson_read_flag YYJSON_READ_STOP_WHEN_DONE = 1 << 1; -/** Allow single trailing comma at the end of an object or array, - such as `[1,2,3,]`, `{"a":1,"b":2,}` (non-standard). */ +/** Allow a single trailing comma at the end of an object or array, + such as `[1,2,3,]`, `{"a":1,"b":2,}`. + @note This is non-standard JSON. */ static const yyjson_read_flag YYJSON_READ_ALLOW_TRAILING_COMMAS = 1 << 2; -/** Allow C-style single line and multiple line comments (non-standard). */ +/** Allow C-style single-line and multiple-line comments. + @note This is non-standard JSON. */ static const yyjson_read_flag YYJSON_READ_ALLOW_COMMENTS = 1 << 3; -/** Allow inf/nan number and literal, case-insensitive, - such as 1e999, NaN, inf, -Infinity (non-standard). */ +/** Allow Inf/NaN numbers and literals, case-insensitive, + such as 1e999, NaN, inf, -Infinity. + @note This is non-standard JSON. */ static const yyjson_read_flag YYJSON_READ_ALLOW_INF_AND_NAN = 1 << 4; -/** Read all numbers as raw strings (value with `YYJSON_TYPE_RAW` type), - inf/nan literal is also read as raw with `ALLOW_INF_AND_NAN` flag. */ +/** Read all numbers as raw strings (value with `YYJSON_TYPE_RAW` type). + Inf/NaN literals are also read as raw strings with `ALLOW_INF_AND_NAN` or + `INF_AND_NAN_AS_NULL` flag. */ static const yyjson_read_flag YYJSON_READ_NUMBER_AS_RAW = 1 << 5; -/** Allow reading invalid unicode when parsing string values (non-standard). +/** Allow reading invalid unicode when parsing string values. Invalid characters will be allowed to appear in the string values, but invalid escape sequences will still be reported as errors. This flag does not affect the performance of correctly encoded strings. - - @warning Strings in JSON values may contain incorrect encoding when this - option is used, you need to handle these strings carefully to avoid security - risks. */ + @note This is non-standard JSON. Strings in JSON values may contain + incorrect encoding when this flag is used, you need to handle these + strings carefully to avoid security risks. */ static const yyjson_read_flag YYJSON_READ_ALLOW_INVALID_UNICODE = 1 << 6; /** Read big numbers as raw strings. These big numbers include integers that cannot be represented by `int64_t` and `uint64_t`, and floating-point - numbers that cannot be represented by finite `double`. - The flag will be overridden by `YYJSON_READ_NUMBER_AS_RAW` flag. */ + numbers that cannot be represented by finite `double`. If `NUMBER_AS_RAW` + flag is also used, all numbers will be read as raw strings. */ static const yyjson_read_flag YYJSON_READ_BIGNUM_AS_RAW = 1 << 7; +/** Read Inf/NaN number and literal as null. + This flag is the same as `ALLOW_INF_AND_NAN`, but converts Inf/NaN to null + to ensure that all number are finite. If `NUMBER_AS_RAW` or `BIGNUM_AS_RAW` + flag is also used, these numbers will be read as raw strings. + @note This is non-standard JSON. */ +static const yyjson_read_flag YYJSON_READ_INF_AND_NAN_AS_NULL = 1 << 8; + /** Result code for JSON reader. */ diff --git a/test/test_number.c b/test/test_number.c index 4cad093..6b5965a 100644 --- a/test/test_number.c +++ b/test/test_number.c @@ -25,11 +25,11 @@ *============================================================================*/ typedef enum { - NUM_TYPE_FAIL, - NUM_TYPE_SINT, - NUM_TYPE_UINT, - NUM_TYPE_REAL, - NUM_TYPE_INF_NAN_LITERAL, + NUM_TYPE_FAIL, // should fail with any flag + NUM_TYPE_SINT, // valid negative integer, can fit in `int64_t` + NUM_TYPE_UINT, // valid positive integer, can fit in `uint64_t` + NUM_TYPE_REAL, // valid finite real number, can fit in `double` + NUM_TYPE_INF_NAN_LITERAL, // Inf/NaN literal } num_type; /// Convert double to raw. @@ -581,7 +581,13 @@ static void test_nan_inf_read(const char *line, usize len, f64 num) { } yyjson_doc_free(doc); - // read raw + // read as null + doc = yyjson_read(line, len, YYJSON_READ_INF_AND_NAN_AS_NULL); + val = yyjson_doc_get_root(doc); + yy_assertf(yyjson_is_null(val), "nan or inf read fail: %s \n", line); + yyjson_doc_free(doc); + + // read as raw doc = yyjson_read(line, len, YYJSON_READ_ALLOW_INF_AND_NAN | YYJSON_READ_NUMBER_AS_RAW); val = yyjson_doc_get_root(doc); yy_assertf(yyjson_is_raw(val), @@ -591,6 +597,15 @@ static void test_nan_inf_read(const char *line, usize len, f64 num) { line, yyjson_get_raw(val)); yyjson_doc_free(doc); + // read as raw + doc = yyjson_read(line, len, YYJSON_READ_INF_AND_NAN_AS_NULL | YYJSON_READ_NUMBER_AS_RAW); + val = yyjson_doc_get_root(doc); + yy_assertf(yyjson_is_raw(val), + "num should be read as raw: %s\n", line); + yy_assertf(strcmp(line, yyjson_get_raw(val)) == 0, + "num read as raw not match:\nstr: %s\nreturn: %s\n", + line, yyjson_get_raw(val)); + yyjson_doc_free(doc); #endif } @@ -653,11 +668,16 @@ static void test_fail(const char *line, usize len) { yy_assertf(doc == NULL, "num should fail: %s\n", line); doc = yyjson_read(line, len, YYJSON_READ_ALLOW_INF_AND_NAN); yy_assertf(doc == NULL, "num should fail: %s\n", line); + doc = yyjson_read(line, len, YYJSON_READ_INF_AND_NAN_AS_NULL); + yy_assertf(doc == NULL, "num should fail: %s\n", line); doc = yyjson_read(line, len, YYJSON_READ_NUMBER_AS_RAW); yy_assertf(doc == NULL, "num should fail: %s\n", line); doc = yyjson_read(line, len, YYJSON_READ_NUMBER_AS_RAW | YYJSON_READ_ALLOW_INF_AND_NAN); yy_assertf(doc == NULL, "num should fail: %s\n", line); + doc = yyjson_read(line, len, YYJSON_READ_NUMBER_AS_RAW | + YYJSON_READ_INF_AND_NAN_AS_NULL); + yy_assertf(doc == NULL, "num should fail: %s\n", line); yyjson_val val; const char *ptr; @@ -665,11 +685,16 @@ static void test_fail(const char *line, usize len) { yy_assertf(ptr != &line[len], "num should fail: %s\n", line); ptr = yyjson_read_number(line, &val, YYJSON_READ_ALLOW_INF_AND_NAN, NULL, NULL); yy_assertf(ptr != &line[len], "num should fail: %s\n", line); + ptr = yyjson_read_number(line, &val, YYJSON_READ_INF_AND_NAN_AS_NULL, NULL, NULL); + yy_assertf(ptr != &line[len], "num should fail: %s\n", line); ptr = yyjson_read_number(line, &val, YYJSON_READ_NUMBER_AS_RAW, NULL, NULL); yy_assertf(ptr != &line[len], "num should fail: %s\n", line); ptr = yyjson_read_number(line, &val, YYJSON_READ_NUMBER_AS_RAW | YYJSON_READ_ALLOW_INF_AND_NAN, NULL, NULL); yy_assertf(ptr != &line[len], "num should fail: %s\n", line); + ptr = yyjson_read_number(line, &val, YYJSON_READ_NUMBER_AS_RAW | + YYJSON_READ_INF_AND_NAN_AS_NULL, NULL, NULL); + yy_assertf(ptr != &line[len], "num should fail: %s\n", line); #endif }